-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcalculations.py
82 lines (61 loc) · 3.14 KB
/
calculations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import pandas as pd
from datetime import datetime, timedelta
from utils import load_data, load_medal_times
def calculate_complete_data():
raw_data = load_data()
medals = load_medal_times()
individual_records = get_individual_records(raw_data)
data = substitute_missing_times(individual_records, medals)
data = sort_by_track_and_tracks_by_date(data)
data.groupby("Player")["Time"].sum()
return data
def sort_by_track_and_tracks_by_date(data):
track_min_dates = data.dropna().groupby("track_id")["Date"].min()
track_order = track_min_dates.apply(lambda x: x.isocalendar()[0:2]).sort_values()
ordered_data = data.set_index("track_id").loc[track_order.index].reset_index()
return ordered_data
def substitute_missing_times(data, medals, remove_unknown=True):
all_players = data["Player"].unique()
track_id_map = data.set_index("track_id")["Track"].drop_duplicates()
for track, track_data in data.groupby("track_id"):
if track not in medals.index.values: # track's medals not in database
if remove_unknown:
data = data.loc[data["track_id"] != track]
continue
players_with_time = track_data["Player"].unique()
players_without_time = list(set(all_players) - set(players_with_time))
if len(players_without_time) == 0: # everybody already has a time
continue
slowest_time = track_data["Time"].max()
cols = ["Author", "Gold", "Silver", "Bronze"]
slower_medals = medals.loc[track, cols].where(medals.loc[track, cols] > slowest_time).dropna()
if len(slower_medals) == 0: # nobody has beaten any medal time / not all have beaten Bronze
substitute_time = medals.loc[track, "Bronze"]
substitute_origin = "Bronze"
else: # this should be the regular case
substitute_time = slower_medals.min()
substitute_origin = slower_medals[slower_medals == slower_medals.min()].index[0]
for player in players_without_time:
substitute_data = {
"track_id": track,
"Track": track_id_map.loc[track],
"Date": None,
"Player": player,
"Time": substitute_time,
"Origin": substitute_origin,
}
data = data.append(substitute_data, ignore_index=True)
return data.sort_values("track_id").reset_index(drop=True)
def get_current_track_data(data):
current_track = data.loc[data["Date"] == data["Date"].dropna().max(), "track_id"].values[0]
return data[data["track_id"] == current_track].copy()
def get_individual_records(data):
return data.groupby("Player").apply(get_track_records).reset_index(drop=True)
def get_track_records(data):
return data.loc[data.groupby("track_id")["Time"].idxmin()].reset_index()
def get_standings(data):
return data.groupby("Player")["Time"].sum().sort_values(ascending=False)
def drop_inactive_players(data):
driven_tracks = data[data["Origin"] == "Player"].groupby("Player").size()
inactive = driven_tracks[driven_tracks <= 0.5*data["track_id"].nunique()].index
return data[~data["Player"].isin(inactive)]