%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import datetime
import csv
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import seaborn as sns
import itertools
import networkx as nx
import statsmodels.api as sm
import statsmodels.formula.api as smf
from collections import defaultdict, Counter
from pandas.core import datetools

path = '/Users/annas/Dropbox/Postdoc/Projects/mitigate/YFA_Mitigate/RoyalSociety_Data/Data/'#INSERT DATA PATH HERE

df = pd.DataFrame()
df_reader = pd.read_csv(path+'structured_data_queue.csv', header = None, low_memory = False, chunksize = 100*1024)
for chunk in df_reader:
    df = pd.concat([df,chunk])

cols = ['queueType','player_id', 'match_id', 'match_datetime', 'match_creation','match_duration','map_id',
'champion_id','team_id','assists','deaths','champ_level','winner','combat_player_score','double_kills',
'gold_earned','gold_spent','inhibitor_kills','killing_sprees','kills','largest_critical_strike','largest_killing_spree',
'largest_multi_kill','magic_damage_dealt','magic_damage_dealt_to_champions','magic_damage_taken','minions_killed',
'neutral_minions_killed','neutral_minions_killed_enemy_jungle','neutral_minions_killed_team_jungle',
'objective_player_score','penta_kills','physical_damage_dealt','physical_damage_dealt_to_champions','physical_damage_taken',
'quadra_kills','total_damage_dealt','total_damage_dealt_to_champions','total_damage_taken','total_heal',
'total_player_score','total_score_rank','total_time_crowd_control_dealt','total_units_healed','triple_kills',
'true_damage_dealt','true_damage_dealt_to_champions','true_damage_taken','unreal_kills','vision_wards_bought_in_game',
'wards_killed','wards_placed']
df.columns = cols
df = df.dropna(axis=0, how='any')
print 'number of matches:',len(df.groupby(['match_id'])['match_id'].nunique().index)

number of matches: 436554

df['KDA'] = (df['assists'] + df['kills'])/(df['deaths']+1)

# select only solo queue matches
df_solo = df[df['queueType']=='RANKED_SOLO_5x5']

print 'number of solo-queue matches:',len(df_solo.groupby(['match_id'])['match_id'].nunique().index)

number of solo-queue matches: 242384

#setting individual players match threshold
threshold = 10 # minimum number of matches per user

players = df_solo['player_id'].value_counts()
print 'number of threshold selected palyers:', len(players[players >= threshold])

players = pd.DataFrame(players[players >= threshold].index, columns = ['id'])

df_s10 = df_solo[df_solo['player_id'].isin(players['id'])]

number of threshold selected palyers: 16665

print 'number of solo-queue matches after putting threshold on palyers:',
print(len(df_s10.groupby(['match_id'])['match_id'].nunique().index))

number of solo-queue matches after putting threshold on palyers: 242352

player_stats = df_s10.groupby('player_id').agg( ['mean','count','sum'] )

RQ1¶

import matplotlib.gridspec as gridspec
fig = plt.figure(figsize = (18,5))
gs = gridspec.GridSpec(1, 3) 
fig.subplots_adjust(wspace=.25)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])


font_size = 16
data = np.array(player_stats[('winner','mean')].astype(float))

# ax1
ax1.scatter(player_stats[('winner','count')], player_stats[('winner','mean')], marker = 'o', alpha = .25, color = '#30a2da')
ax1.set_xlabel('matches', fontsize=font_size)
ax1.set_ylabel('winner', fontsize=font_size)
ax1.set_ylim(0,1)
ax1.set_xscale('log', basex=2)
ax1.axhline(y = .5, xmin=0, xmax=2500, linewidth=2, color = 'k', ls = 'dashed')

#ax2
ax2.hist(data, bins = 30, histtype = 'stepfilled', alpha = .75, color = '#30a2da')
ax2.set_xlabel('winner', fontsize=font_size)
ax2.set_ylabel('counts', fontsize=font_size)
ax2.set_xlim(0,1)
ax2.axvline(x = .5, ymin=0, ymax=300, linewidth=2, color = 'k', ls = 'dashed')

#plt.savefig('winner_scatterplot.png', dpi=300)

print(np.mean(data))
print(np.std(data))

cor = np.corrcoef(player_stats[('winner','count')], player_stats[('winner','mean')])
print(cor)

0.505757776495
0.141938450527
[[ 1.          0.00181299]
 [ 0.00181299  1.        ]]

fig = plt.figure(figsize = (18,5))
gs = gridspec.GridSpec(1, 3) 
fig.subplots_adjust(wspace=.25)
ax1 = fig.add_subplot(gs[0])
ax2 = fig.add_subplot(gs[1])
font_size = 16
data = np.array(player_stats[('KDA','mean')].astype(float))
# ax1
ax1.scatter(player_stats[('KDA','count')], player_stats[('KDA','mean')], marker = 'o', alpha = .25, color = '#30a2da')
ax1.set_xlabel('matches', fontsize=font_size)
ax1.set_ylabel('KDA', fontsize=font_size)
ax1.axhline(y = np.mean(data) , xmin=0, xmax=2500, linewidth=2, color = 'k', ls = 'dashed')

#ax2
ax2.hist(data, bins = 30, histtype = 'stepfilled', alpha = .75, color = '#30a2da')
ax2.set_xlabel('KDA', fontsize=font_size)
ax2.set_ylabel('counts', fontsize=font_size)
ax2.axvline(x = np.mean(data), ymin=0, ymax=300, linewidth=2, color = 'k', ls = 'dashed')
#plt.savefig('KDA_scatterplot.png', dpi=300)

print(np.mean(data))
print(np.std(data))

cor = np.corrcoef(player_stats[('KDA','count')], player_stats[('KDA','mean')])
print(cor)

2.71424242466
0.828682319451
[[ 1.          0.02039965]
 [ 0.02039965  1.        ]]

RQ2¶

playerid_stats = df_s10.groupby('player_id')[['winner']].agg( ['mean','count','sum'])
playerid_stats = playerid_stats.sort_values(by = [('winner','mean')], ascending = False)
playerid_stats = playerid_stats[playerid_stats[('winner','count')] >= threshold]

inter_arrival_times = []
for player_id in playerid_stats.index:
    temp = df_s10[df_s10['player_id'] == player_id]
    datetime = pd.to_datetime(temp['match_datetime'])
    inter = datetime.diff().astype('timedelta64[s]')[1:] / 3600
    duration = temp['match_duration'] / 3600
    inter_arrival_times.extend(np.subtract(inter, duration[:-1]))

class Sequence(object):
    def __init__(self):
        self.player_id = None
        self.sequences = list()

# compute original sessions
m = 0.25
sequences = []
for player_id in playerid_stats.index:
    old_id = 0
    obj = Sequence()
    obj.player_id = player_id
    temp = df_s10[df_s10['player_id'] == player_id].reset_index().drop(['index','queueType'], axis=1)

    for i in range(1,len(temp.index)):
        dates = pd.to_datetime(temp['match_datetime'].ix[i - 1 : i])
        diff = dates.diff().astype('timedelta64[s]') / 3600
        duration = temp['match_duration'].ix[i - 1 : i] / 3600
        delta = float(np.subtract(diff[1:], duration[:-1]))
        
        if delta >= m:
            df_copy = temp.ix[old_id:i-1].copy()
            old_id = i
            if len(df_copy.index) > 0:
                obj.sequences.append(df_copy)

    sequences.append(obj)

# compute randomized index sessions
randomized_sequences = []
for player_id in playerid_stats.index:
    old_id = 0
    obj = Sequence()
    obj.player_id = player_id
    temp = df_s10[df_s10['player_id'] == player_id].reset_index().drop(['index','queueType'], axis=1)
    for i in range(1,len(temp.index)):
        dates = pd.to_datetime(temp['match_datetime'].ix[i - 1 : i])
        diff = dates.diff().astype('timedelta64[s]') / 3600
        duration = temp['match_duration'].ix[i - 1 : i] / 3600
        delta = float(np.subtract(diff[1:], duration[:-1]))
        
        if delta >= m:
            df_copy = temp.ix[old_id:i-1].copy()
            df_copy = df_copy.sample(frac=1).reset_index(drop=True)
            old_id = i
            if len(df_copy.index) > 0:
                obj.sequences.append(df_copy)

    randomized_sequences.append(obj)

### code to generate figures
from collections import OrderedDict
def sequence_analysis(sequence_length, sequences, feature, verbose = True):
    data = dict()
    for seq in sequences:
        for df in seq.sequences:
            if len(df.index) == sequence_length:
                for match in xrange(1,sequence_length+1):
                    temp = df[feature].reset_index().drop('index', axis=1)
                    data.setdefault(str(match), list()).extend( [temp[feature].ix[match-1]] )
    
    stats = dict()
    for session in range(1,sequence_length+1):
        stats.setdefault(str(session), dict(mu = None, sigma = None, n = None, ci = None))
        stats[str(session)]['mu'] = np.mean(data[str(session)])
        stats[str(session)]['sigma'] = np.std(data[str(session)])
        stats[str(session)]['n'] = len(data[str(session)])
        stats[str(session)]['ci'] = 1.96 * np.std(data[str(session)]) / np.sqrt(len(data[str(session)]))

        if verbose == True:
            print('Session ='),
            print(session),
            print('| Sequence length ='),
            print(sequence_length)
            print('mu ='),
            print(stats[str(session)]['mu'])
            print('sigma ='),
            print(stats[str(session)]['sigma'])
            print('n obs ='),
            print(stats[str(session)]['n'])
            print('n ci ='),
            print(stats[str(session)]['ci'])
            print('-' * 40)

    return OrderedDict(sorted(stats.items(), key=lambda t: t[0]))

font_size = 16
feature = 'winner'

ls = ['solid','solid','dashed','dashdot','dotted']
sym = ['o','*','s','v','^']
col = ['#30a2da', '#fc4f30', '#e5ae38', '#6d904f', '#8b8b8b']

fig = plt.figure(figsize = (10,5))
fig.subplots_adjust(wspace=.25)
orig = fig.add_subplot(121)
rand = fig.add_subplot(122, sharey = orig)

orig.set_xlabel('match position', fontsize=font_size)
orig.set_ylabel('win rate', fontsize=font_size)
orig.set_xlim(0.5,5.5)
orig.set_xticks(np.arange(1,6), xrange(1,6))

rand.set_xlabel('match position', fontsize=font_size)
rand.set_ylabel('win rate', fontsize=font_size)
rand.set_xlim(0.5,5.5)
rand.set_xticks(np.arange(1,6), xrange(1,6))
rand.set_title('Original index sessions', fontsize=font_size)
rand_bar_y = []
bar_x = []
bar_y = []

for i in range(1,6):
    stats = sequence_analysis(i, sequences, feature, verbose = True)
    rand_stats = sequence_analysis(i, randomized_sequences, feature, verbose = True)
    
    x = range(1,i+1)
    y = []
    err = []
    
    rand_y = []
    rand_err = []
    
    for item in stats:
        y.append(stats[item]['mu'])
        err.append(stats[item]['ci'])
    
    for item in stats:
        rand_y.append(rand_stats[item]['mu'])
        rand_err.append(rand_stats[item]['ci'])

    xx = [x[0], x[-1]]
    yy = [y[0], y[-1]]
    eerr = [err[0], err[-1]]
    
    rand_yy = [rand_y[0], rand_y[-1]]
    rand_eerr = [rand_err[0], rand_err[-1]]
    
    bar_y.append((y[-1] - y[0]) / float(y[0]) * 100)
    bar_x.append(x[-1])
    rand_bar_y.append( (rand_y[-1] - rand_y[0]) / float(rand_y[0]) * 100 )
    
    orig.errorbar(x, y, yerr = err, fmt = sym[len(y)-1], linestyle = ls[len(y)-1], color = col[len(y)-1], 
                lw = 3, markersize = 10, capthick = 3, capsize = 6)
    rand.errorbar(x, rand_y, yerr = rand_err, fmt = sym[len(y)-1], linestyle = ls[len(y)-1], color = col[len(y)-1],
                lw = 3, markersize = 10, capthick = 3, capsize = 6)

rand.legend([str(i) + ' matches' for i in range(1,6)], 
           loc = 'lower right', shadow = True, fancybox = True)
orig.set_title('original sessions', fontsize = font_size)
rand.set_title('randomized index sessions', fontsize = font_size)
#plt.savefig('deterioration_winner.png',dpi=300)
plt.show()

Session = 1 | Sequence length = 1
mu = 0.508482718219
sigma = 0.499928038313
n obs = 256050
n ci = 0.00193642720739
----------------------------------------
Session = 1 | Sequence length = 1
mu = 0.508482718219
sigma = 0.499928038313
n obs = 256050
n ci = 0.00193642720739
----------------------------------------
Session = 1 | Sequence length = 2
mu = 0.475597772683
sigma = 0.499404176296
n obs = 45795
n ci = 0.0045740321149
----------------------------------------
Session = 2 | Sequence length = 2
mu = 0.495709138552
sigma = 0.499981588169
n obs = 45795
n ci = 0.00457932061784
----------------------------------------
Session = 1 | Sequence length = 2
mu = 0.485642537395
sigma = 0.499793820758
n obs = 45795
n ci = 0.00457760086015
----------------------------------------
Session = 2 | Sequence length = 2
mu = 0.48566437384
sigma = 0.499794447571
n obs = 45795
n ci = 0.00457760660112
----------------------------------------
Session = 1 | Sequence length = 3
mu = 0.518821695066
sigma = 0.499645618208
n obs = 17958
n ci = 0.00730784236619
----------------------------------------
Session = 2 | Sequence length = 3
mu = 0.513030404277
sigma = 0.499830179725
n obs = 17958
n ci = 0.00731054177238
----------------------------------------
Session = 3 | Sequence length = 3
mu = 0.492649515536
sigma = 0.499945967459
n obs = 17958
n ci = 0.00731223528969
----------------------------------------
Session = 1 | Sequence length = 3
mu = 0.509355162045
sigma = 0.499912473282
n obs = 17958
n ci = 0.00731174540215
----------------------------------------
Session = 2 | Sequence length = 3
mu = 0.51225080744
sigma = 0.499849895186
n obs = 17958
n ci = 0.00731083013171
----------------------------------------
Session = 3 | Sequence length = 3
mu = 0.502895645395
sigma = 0.499991615167
n obs = 17958
n ci = 0.00731290293541
----------------------------------------
Session = 1 | Sequence length = 4
mu = 0.527445687106
sigma = 0.499246165994
n obs = 7779
n ci = 0.0110945303862
----------------------------------------
Session = 2 | Sequence length = 4
mu = 0.545057205296
sigma = 0.497965709915
n obs = 7779
n ci = 0.0110660753677
----------------------------------------
Session = 3 | Sequence length = 4
mu = 0.526545828513
sigma = 0.499294821712
n obs = 7779
n ci = 0.0110956116411
----------------------------------------
Session = 4 | Sequence length = 4
mu = 0.479496079188
sigma = 0.499579412337
n obs = 7779
n ci = 0.0111019359747
----------------------------------------
Session = 1 | Sequence length = 4
mu = 0.516647383983
sigma = 0.49972278776
n obs = 7779
n ci = 0.0111051221444
----------------------------------------
Session = 2 | Sequence length = 4
mu = 0.518704203625
sigma = 0.499650030288
n obs = 7779
n ci = 0.0111035052867
----------------------------------------
Session = 3 | Sequence length = 4
mu = 0.519861164674
sigma = 0.499605378412
n obs = 7779
n ci = 0.0111025130075
----------------------------------------
Session = 4 | Sequence length = 4
mu = 0.523332047821
sigma = 0.499455318867
n obs = 7779
n ci = 0.0110991782995
----------------------------------------
Session = 1 | Sequence length = 5
mu = 0.544776119403
sigma = 0.497991063305
n obs = 3618
n ci = 0.0162271906749
----------------------------------------
Session = 2 | Sequence length = 5
mu = 0.549474847982
sigma = 0.497546218373
n obs = 3618
n ci = 0.0162126952671
----------------------------------------
Session = 3 | Sequence length = 5
mu = 0.559425096739
sigma = 0.496456098641
n obs = 3618
n ci = 0.0161771733832
----------------------------------------
Session = 4 | Sequence length = 5
mu = 0.530403537866
sigma = 0.499074768833
n obs = 3618
n ci = 0.01626250355
----------------------------------------
Session = 5 | Sequence length = 5
mu = 0.477611940299
sigma = 0.499498523304
n obs = 3618
n ci = 0.0162763117187
----------------------------------------
Session = 1 | Sequence length = 5
mu = 0.537866224433
sigma = 0.498564087202
n obs = 3618
n ci = 0.0162458628333
----------------------------------------
Session = 2 | Sequence length = 5
mu = 0.525428413488
sigma = 0.499352977149
n obs = 3618
n ci = 0.0162715690528
----------------------------------------
Session = 3 | Sequence length = 5
mu = 0.524046434494
sigma = 0.49942143425
n obs = 3618
n ci = 0.0162737997483
----------------------------------------
Session = 4 | Sequence length = 5
mu = 0.532061912659
sigma = 0.498970974864
n obs = 3618
n ci = 0.0162591213919
----------------------------------------
Session = 5 | Sequence length = 5
mu = 0.542288557214
sigma = 0.498208468343
n obs = 3618
n ci = 0.0162342748843
----------------------------------------

feature = 'KDA'

ls = ['solid','solid','dashed','dashdot','dotted']
sym = ['o','*','s','v','^']
col = ['#30a2da', '#fc4f30', '#e5ae38', '#6d904f', '#8b8b8b']

fig = plt.figure(figsize = (10,5))
fig.subplots_adjust(wspace=.25)
orig = fig.add_subplot(121)
rand = fig.add_subplot(122, sharey = orig)

orig.set_xlabel('match position', fontsize=font_size)
orig.set_ylabel('KDA', fontsize=font_size)
orig.set_xlim(0.5,5.5)
orig.set_xticks(np.arange(1,6), xrange(1,6))

rand.set_xlabel('match position', fontsize=font_size)
rand.set_ylabel('KDA', fontsize=font_size)
rand.set_xlim(0.5,5.5)
rand.set_xticks(np.arange(1,6), xrange(1,6))

rand_bar_y = []
bar_x = []
bar_y = []

for i in range(1,6):
    stats = sequence_analysis(i, sequences, feature, verbose = False)
    rand_stats = sequence_analysis(i, randomized_sequences, feature, verbose = False)
    
    x = range(1,i+1)
    y = []
    err = []
    
    rand_y = []
    rand_err = []
    
    for item in stats:
        y.append(stats[item]['mu'])
        err.append(stats[item]['ci'])
    
    for item in stats:
        rand_y.append(rand_stats[item]['mu'])
        rand_err.append(rand_stats[item]['ci'])
    
    xx = [x[0], x[-1]]
    yy = [y[0], y[-1]]
    eerr = [err[0], err[-1]]
    
    rand_yy = [rand_y[0], rand_y[-1]]
    rand_eerr = [rand_err[0], rand_err[-1]]
    
    bar_y.append((y[-1] - y[0]) / float(y[0]) * 100)
    bar_x.append(x[-1])
    rand_bar_y.append( (rand_y[-1] - rand_y[0]) / float(rand_y[0]) * 100 )
 
    orig.errorbar(x, y, yerr = err, fmt = sym[len(y)-1], linestyle = ls[len(y)-1], color = col[len(y)-1], 
                lw = 3, markersize = 10, capthick = 3, capsize = 6)
    rand.errorbar(x, rand_y, yerr = rand_err, fmt = sym[len(y)-1], linestyle = ls[len(y)-1], color = col[len(y)-1],
                lw = 3, markersize = 10, capthick = 3, capsize = 6)

rand.legend([str(i) + ' matches' for i in range(1,6)], 
           loc = 'upper right', shadow = True, fancybox = True)

orig.set_title('original sessions', fontsize=font_size)
rand.set_title('randomized index sessions', fontsize=font_size)
#plt.savefig('deterioration_KDA.png', dpi=300)
plt.show()

RQ3¶

above_rand_bar_y_w =[0.0, 0.098891375631079581, 1.9668985368193823, -0.29231995748073775, 1.5211267605633785]
above_bar_y_w=[0.0, 5.1275210756589455, -3.7903603182030885, -8.2938388625592427, -11.062431544359256]
above_rand_bar_y_kda=[0.0, 0.096883076604551838, 1.4126766902006405, -0.2720597529709291, -0.6356256393628531]
above_bar_y_kda=[0.0, 1.8406555581309387, -2.9021423088618317, -4.5344283604762419, -7.3743866611672066]

below_bar_y_w= [0.0, 8.0568720379146868, 3.3333333333333179, -35.0, -23.07692307692307]
below_rand_bar_y_w= [0.0, 2.3041474654377807, 3.4482758620689724, 13.333333333333334, -21.428571428571423]
below_bar_y_kda=[0.0, 4.040344227742108, 3.3484269606076666, -20.001191957622233, -30.378195392012262]
below_rand_bar_y_kda=[0.0, -2.7673605007987367, -18.395179155755489, 43.152967294420975, -16.984859819786116]

fig = plt.figure(figsize = (10,5))
fig.subplots_adjust(wspace=.25)
above = fig.add_subplot(121)
below = fig.add_subplot(122)

above.bar(np.array(bar_x) - .1, above_bar_y_w, color = '#30a2da', width = .25, label = 'original sessions')
above.bar(np.array(bar_x) + .1, above_rand_bar_y_w, color = '#fc4f30', hatch = '//', width = .25, label = 'randomized index sessions')


above.set_title('High Experience Players', fontsize = font_size)
above.set_ylabel(r'$\Delta \% $ win rate', fontsize = font_size)
above.set_xlabel('session length', fontsize = font_size)
above.set_xlim(0.5,5.5)
above.set_ylim(-20,10)
above.axhline(y = 0, xmin=0, xmax = 6, linewidth=2, color = 'k', ls = 'dashed')
above.set_xticks(bar_x, bar_x)
above.legend(loc = 'lower left')

below.bar(np.array(bar_x) - .1, below_bar_y_w, color = '#30a2da', width = .25, label = 'original sessions')
below.bar(np.array(bar_x) + .1, below_rand_bar_y_w, color = '#fc4f30', hatch = '//', width = .25, label = 'randomized index sessions')

below.set_title('Low Experience Players', fontsize = font_size)
below.set_ylabel(r'$\Delta \% $ win rate', fontsize = font_size)
below.set_xlabel('session length', fontsize = font_size)
below.set_xlim(0.5,5.5)
below.set_ylim(-40,20)
below.axhline(y = 0, xmin=0, xmax = 6, linewidth=2, color = 'k', ls = 'dashed')
below.set_xticks(bar_x, bar_x)

#plt.savefig('deterioration_winner_above95below5.png',dpi=300)

[<matplotlib.axis.XTick at 0x314dfbdd0>,
 <matplotlib.axis.XTick at 0x314e1ea50>,
 <matplotlib.axis.XTick at 0x314edb1d0>,
 <matplotlib.axis.XTick at 0x314edb990>,
 <matplotlib.axis.XTick at 0x314ee40d0>]

font_size = 16
fig = plt.figure(figsize = (10,5))
fig.subplots_adjust(wspace=.25)
above = fig.add_subplot(121)
below = fig.add_subplot(122)

above.bar(np.array(bar_x) - .1, above_bar_y_kda, color = '#30a2da', width = .25, label = 'original sessions')
above.bar(np.array(bar_x) + .1, above_rand_bar_y_kda, color = '#fc4f30', hatch = '//', width = .25, label = 'randomized index sessions')


above.set_title('High Experience Players', fontsize = font_size)
above.set_ylabel(r'$\Delta \% $ KDA', fontsize = font_size)
above.set_xlabel('session length', fontsize = font_size)
above.set_xlim(0.5,5.5)
above.set_ylim(-10,10)
above.axhline(y = 0, xmin=0, xmax = 6, linewidth=2, color = 'k', ls = 'dashed')
above.set_xticks(bar_x, bar_x)
above.legend(loc = 'lower left')

below.bar(np.array(bar_x) - .1, below_bar_y_kda, color = '#30a2da', width = .25, label = 'original sessions')
below.bar(np.array(bar_x) + .1, below_rand_bar_y_kda, color = '#fc4f30', hatch = '//', width = .25, label = 'randomized index sessions')


below.set_title('Low Experience Players', fontsize = font_size)
below.set_ylabel(r'$\Delta \% $ KDA', fontsize = font_size)
below.set_xlabel('session length', fontsize = font_size)
below.set_xlim(0.5,5.5)
below.set_ylim(-35,35)
below.axhline(y = 0, xmin=0, xmax = 6, linewidth=2, color = 'k', ls = 'dashed')
below.set_xticks(bar_x, bar_x)

#plt.savefig('deterioration_KDA_above95below5.png',dpi=300)

[<matplotlib.axis.XTick at 0x3150d7cd0>,
 <matplotlib.axis.XTick at 0x315103610>,
 <matplotlib.axis.XTick at 0x3151aea90>,
 <matplotlib.axis.XTick at 0x3151ba290>,
 <matplotlib.axis.XTick at 0x3151ba990>]

plt.style.use('seaborn-white')
font_size = 22
plt.rc('ytick',labelsize=14)

# compute original sessions
player_stats_top = playerid_stats[playerid_stats[('winner','count')] >= np.percentile(playerid_stats[('winner','count')], 95)]

sequences_top = []
for player_id in player_stats_top.index:
    old_id = 0
    obj = Sequence()
    obj.player_id = player_id
    temp = df_s10[df_s10['player_id'] == player_id].reset_index(0).drop('index', 1)
    for i in range(1,len(temp.index)):
        dates = pd.to_datetime(temp['match_datetime'].ix[i - 1 : i])
        diff = dates.diff().astype('timedelta64[s]') / 3600
        duration = temp['match_duration'].ix[i - 1 : i] / 3600
        delta = float(np.subtract(diff[1:], duration[:-1]))
        
        if delta >= m:
            df_copy = temp.ix[old_id:i-1].copy()
            old_id = i
            if len(df_copy.index) > 0:
                obj.sequences.append(df_copy)

    sequences_top.append(obj)

# compute original sessions
player_stats_worst = playerid_stats[playerid_stats[('winner','count')] <= np.percentile(playerid_stats[('winner','count')], 5)]

sequences_worst = []
for player_id in player_stats_worst.index:
    old_id = 0
    obj = Sequence()
    obj.player_id = player_id
    temp = df_s10[df_s10['player_id'] == player_id].reset_index(0).drop('index', 1)
    for i in range(1,len(temp.index)):
        dates = pd.to_datetime(temp['match_datetime'].ix[i - 1 : i])
        diff = dates.diff().astype('timedelta64[s]') / 3600
        duration = temp['match_duration'].ix[i - 1 : i] / 3600
        delta = float(np.subtract(diff[1:], duration[:-1]))
        
        if delta >= m:
            df_copy = temp.ix[old_id:i-1].copy()
            old_id = i
            if len(df_copy.index) > 0:
                obj.sequences.append(df_copy)

    sequences_worst.append(obj)

top_mean_sessions = []
for player in range(len(sequences_top)):
    session_length = []
    for seq in sequences_top[player].sequences:
        if len(seq.index) > 0: 
            session_length.append(len(seq.index))
    if len(session_length)>0: top_mean_sessions.append(np.mean(session_length))

worst_mean_sessions = []
for player in range(len(sequences_worst)):
    session_length = []
    for seq in sequences_worst[player].sequences:
        if len(seq.index) > 0:
            session_length.append(len(seq.index)) 
    if len(session_length)>0: worst_mean_sessions.append(np.mean(session_length))


# boxplot
from scipy.stats import wilcoxon
from scipy.stats import mannwhitneyu
test = mannwhitneyu(worst_mean_sessions, top_mean_sessions)
print test
print test.pvalue
if test.pvalue > 0.05:
    print 'No significant difference :('
else:
    print 'Significant difference detected :)'

fig = plt.figure(figsize = (7,5))
ax = fig.add_subplot(111)
bp = ax.boxplot([worst_mean_sessions, top_mean_sessions], patch_artist=True, widths = .75)
ax.set_xticklabels(['Low Experience\nPlayers', 'High Experience\nPlayers'], fontsize = 18)
ax.set_ylabel('Average Session Length',fontsize = font_size)
ax.set_yticks([1,2,3,4,5])

for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=2)
    # change fill color
    box.set( facecolor = 'white', alpha = .75 )

## change color and linewidth of the whiskers
for whisker in bp['whiskers']:
    whisker.set(color='#333333', linewidth=2)

## change color and linewidth of the caps
for cap in bp['caps']:
    cap.set(color='#333333', linewidth=2)

## change color and linewidth of the medians
for median in bp['medians']:
    median.set(color='#fc4f30', linewidth=2)

## change the style of fliers and their fill
for flier in bp['fliers']:
    flier.set(marker='s', markerfacecolor='#fc4f30')
fig.tight_layout()
#plt.savefig('average_session_all.png',dpi=300)
plt.show()

MannwhitneyuResult(statistic=40482.5, pvalue=0.0)
0.0
Significant difference detected :)

top_mean_duration = []
for player in range(len(sequences_top)):
    session_duration = []
    for seq in sequences_top[player].sequences:
        if len(seq.index) > 0:
            session_duration.append(np.sum(seq.match_duration)) 
    if len(session_duration)>0:top_mean_duration.append( np.mean(session_duration) )

worst_mean_duration = []
for player in range(len(sequences_worst)):
    session_duration = []
    for seq in sequences_worst[player].sequences:
        if len(seq.index) > 0:
            session_duration.append(np.sum(seq.match_duration)) 
    if len(session_duration)>0:worst_mean_duration.append( np.mean(session_duration) )

# boxplot
from scipy.stats import wilcoxon
from scipy.stats import mannwhitneyu
test = mannwhitneyu(worst_mean_duration, top_mean_duration)
print test
if test.pvalue > 0.05:
    print 'No significant difference :('
else:
    print 'Significant difference detected :)'

fig = plt.figure(figsize = (8,5.3))
ax = fig.add_subplot(111)
bp = ax.boxplot([worst_mean_duration, top_mean_duration], patch_artist=True, widths = .75)
ax.set_xticklabels(['Low Experience\nPlayers', 'High Experience\nPlayers'], fontsize = 18)
ax.set_ylabel('Average Session Duration', fontsize = font_size)
ax.set_yscale('log')

for box in bp['boxes']:
    # change outline color
    box.set( color='black', linewidth=2)
    # change fill color
    box.set( facecolor = 'white', alpha = .75 )

## change color and linewidth of the whiskers
for whisker in bp['whiskers']:
    whisker.set(color='#333333', linewidth=2)

## change color and linewidth of the caps
for cap in bp['caps']:
    cap.set(color='#333333', linewidth=2)

## change color and linewidth of the medians
for median in bp['medians']:
    median.set(color='#fc4f30', linewidth=2)

## change the style of fliers and their fill
for flier in bp['fliers']:
    flier.set(marker='s', markerfacecolor='#fc4f30')
fig.tight_layout()
#plt.savefig('average_duration_all.png',dpi=300)
plt.show()

MannwhitneyuResult(statistic=43238.5, pvalue=0.0)
Significant difference detected :)