On the 23rd of April 2017, Southampton hosted the ABP Soton Marathon, Half-Marathon and 10k (http://www.abpsouthamptonhalf.co.uk/). I did the 10k, and a number of my Astro-colleagues took part in the various distances. The results were available as a csv download - so naturally I thought I would take a look. I'm hoping that I'll be able to get some use out of pandas
, which I've only used in passing before.
First, import the packages I'll be using, and define some functions to help me tidy up the data - there are a few DNF and DNS values as well as some 99:99:99 timestamps, which could trip us up.
# %matplotlib notebook
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from astropy.time import Time
def convert_to_ap_Time(df, key):
print(key)
df[key] = pd.to_datetime(df[key])
df[key] = Time([t1.astype(str) for t1 in df[key].values], format="isot")
return df
def convert_to_Timedelta(x):
pd.to_datetime()
def convert_times_to_datetime(df):
columns = ["Gun Time", "Chip Time", "TOD", "Beat the Bridge", "Beat the Bridge.1"]
for key in columns:
df = convert_to_ap_Time(df, key)
df = convert_Time_to_seconds(df, key)
return df
def convert_Time_to_seconds(df, key):
t0 = Time("2017-05-04T00:00:00.000", format="isot")
df["sub" + key] = df[key] - t0
df["sub" + key] = [t.sec for t in df["sub" + key].values]
return df
def find_astronomers(df):
astronomers = ("Robert FIRTH", "Stephen BROWETT", "Mathew SMITH", "Sadie JONES")
astro_df = df[df["Name"].isin((astronomers))]
return astro_df
def plot_hist_with_astronomers(df, astro_df, key):
rob_time = astro_df[key][158]/60.
mat_time = astro_df[key][737]/60.
steve_time = astro_df[key][1302]/60.
sadie_time = astro_df[key][576]/60.
mean_time = df[key].mean()/60
median_time = df[key].median()/60
plt.hist(df[key]/60., bins = 100)
plt.plot([rob_time, rob_time], [0, 70], lw = 2, label = "Rob")
plt.plot([mat_time, mat_time], [0, 70], lw = 2, label = "Mat")
plt.plot([steve_time, steve_time], [0, 70], lw = 2, label = "Steve")
plt.plot([sadie_time, sadie_time], [0, 70], lw = 2, label = "Sadie")
plt.plot([mean_time, mean_time], [0, 70], lw = 2, color = "Black", ls = ":", label = "Mean")
plt.plot([median_time, median_time], [0, 70], lw = 2, color = "Black", ls = "--", label = "Median")
plt.xlabel(key.replace("sub", "") + " Minutes")
plt.legend()
As mentioned above, the data was released as a pretty straightforward .csv
file, so I'll just pull it into a pandas
dataframe
so I can easily interact with it.
results_path = "/Users/berto/Code/zoidberg/ABPSoton10k/data/Results10k.csv"
df = pd.read_csv(results_path)
origlength = len(df)
print("Original Length = ", origlength)
# df = df.drop(df.index[len(df)-10:])
df = df.drop(df.loc[df["Gun Time"] == "DNF"].index)
newlen = len(df)
print("Dropping ", origlength - len(df), " rows that Gun Time = 'DNF'")
df = df.drop(df.loc[df["Gun Time"] == "QRY"].index)
print("Dropping ", newlen - len(df), " rows that Gun Time = 'QRY'")
newlen = len(df)
df = df.drop(df.loc[df["Beat the Bridge"] == "99:99:99"].index)
print("Dropping ", newlen - len(df), " rows that have bad 'Beat the Bridge' time")
newlen = len(df)
print("final length = ", newlen)
Now we have Let's take a look at what info we have in the table:
# df.columns
df.head()
There are a number of columns that are timestamps, but they are sting formatted. We can convert them into timedeltas using a lambda function and the to_timedelta
method and simply iterating through the keys we're interested in.
While preserving the data in a new column "$KEYNAME original" as a timedelta, but I will convert the new column to minutes, which are the most natural unit for taking a quick look at this data.
key_list = ["Gun Time", "Chip Time", "Beat the Bridge", "Beat the Bridge.1"]
for key in key_list:
df[key+" original"] = pd.to_timedelta(df[key].map(lambda x : "0 days " + x ))
timedelta = pd.to_timedelta(df[key].map(lambda x : "0 days " + x ))
df[key] = timedelta.map(lambda x : x.seconds/60.)
df.head()
pandas
has a bunch of ways of easily chaining methods so you can make borderline unreadble one-liners. However, they are handy!
df["Chip Time original"].map(lambda x : x.seconds/60.).hist(bins = 25)
I was one of four Astronomers who ran the 10k (three more did the Half Marathon) - it'll be interesting to see how we stack up.
So to find them, I can use the handy isin
method, with a list of names:
astronomers = ("Robert FIRTH", "Stephen BROWETT", "Mathew SMITH", "Sadie JONES")
df[df["Name"].isin(astronomers)]
astro_df = find_astronomers(df)
Now we have them, it's probably worth assigning everyone a colour so we can see them a little easier on the plots. We can just use the new matplotlib 2.0
defaults for ease.
# astro_df["colour"] = pd.Series(["red", "yellow", "green", "black"], index=astro_df.index)
astro_df["colour"] = pd.Series(["C1", "C2", "C3", "C4"], index=astro_df.index)
For the run time, we will use the "Chip Time", which is the time logged by the unit on the back of each runner's number when they cross the finish line. Since this only starts counting when the start line is crossed, this is a better measurement than the "Gun Time". However, we will will look into differences between the two in a bit.
# key = "subGun Time"
key = "Chip Time"
mean_time = df[key].mean()
median_time = df[key].median()
fig = plt.figure(figsize=[10, 6])
ax1 = fig.add_subplot(111)
ax1.hist(df[key], bins = 100)
for astro_indices in astro_df.index:
runtime = astro_df["Chip Time"][astro_indices]
ax1.plot([runtime, runtime], [0, 70], lw = 2, label=astro_df["Name"][astro_indices])
ax1.plot([mean_time, mean_time], [0, 70], lw = 2, color = "Black", ls = ":", label = "Mean")
ax1.plot([median_time, median_time], [0, 70], lw = 2, color = "Black", ls = "--", label = "Median")
plt.xlabel(key + ", Minutes")
ax1.legend()
plt.tight_layout()
# plot_hist_with_astronomers(df=df, astro_df=astro_df, key="Beat the Bridge")
keyx = "Chip Time"
keyy = "Beat the Bridge"
corr_co = np.corrcoef(df[keyx], df[keyy])
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
ax1.scatter(df[keyx], df[keyy])
ax1.scatter(astro_df[keyx], astro_df[keyy], edgecolor = "black", color = astro_df["colour"])
plt.xlabel(keyx + ", Minutes")
plt.ylabel(keyy + ", Minutes")
plt.tight_layout()
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(sign*50, -1*sign*30),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
print(corr_co[1,0])
we can fit a model to this.
keyx = "Chip Time"
keyy = "Beat the Bridge"
z = np.polyfit(df[keyx], df[keyy],1)
p = np.poly1d(z)
fit = p(df[keyx])
keyx = "Chip Time"
keyy = "Beat the Bridge"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
ax1.scatter(df[keyx], df[keyy])
ax1.scatter(astro_df[keyx], astro_df[keyy], edgecolor = "black", color = astro_df["colour"])
plt.xlabel(keyx + ", Minutes")
plt.ylabel(keyy + ", Minutes")
plt.tight_layout()
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(sign*50, -1*sign*30),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
ax1.plot(df[keyx], fit, color = "black", ls = "--")
gender_groups = df.groupby(df["Gender"])
keyx = "Chip Time"
keyy = "Beat the Bridge"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
lstyles = ["--",":"]
i = 0
for name, group in gender_groups:
ax1.scatter(group[keyx], group[keyy], label = name)
z = np.polyfit(group[keyx], group[keyy],1)
p = np.poly1d(z)
fit = p(group[keyx])
ax1.plot(group[keyx], fit, color = "black", ls = lstyles[i], label = name)
i+=1
# ax1.scatter(df[keyx].map(lambda x : x.seconds/60.), df[keyy].map(lambda x : x.seconds/60.))
ax1.scatter(astro_df[keyx], astro_df[keyy], edgecolor = "black", color = astro_df["colour"])
plt.xlabel(keyx + ", Minutes")
plt.ylabel(keyy + ", Minutes")
plt.tight_layout()
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(sign*50, -1*sign*30),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
ax1.legend()
keyx = "Chip Time"
keyy = "Beat the Bridge"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
lstyles = ["--",":"]
i = 0
for name, group in gender_groups:
z = np.polyfit(group[keyx], group[keyy],1)
p = np.poly1d(z)
fit = p(group[keyx])
ax1.scatter(group[keyx], group[keyy]-fit, label = name)
# ax1.plot(group[keyx], fit, color = "black", ls = lstyles[i], label = name)
ax1.plot(group[keyx], np.zeros_like(group[keyx]), color = "black", ls = lstyles[i], label = name)
print(name, "stddev", np.std(group[keyy]-fit))
i+=1
ax1.set_xlabel(keyx + ", Minutes")
ax1.set_ylabel(keyy + ", Minutes")
ax1.legend()
keyx = "Chip Time"
alpha = 0.8
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
x = group[keyx]
plt.hist(x, bins=np.arange(30.0,125.,0.75), alpha = alpha, label = name)
ax1.set_xlabel(keyx + " Minutes")
ax1.legend()
keyy = "Beat the Bridge"
alpha = 0.8
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
y = group[keyy]
ax1.hist(y, bins=np.arange(6.0,22.0, 0.5), alpha = alpha, label = name)
ax1.set_xlabel(keyy + " Minutes")
ax1.legend()
keyx = "Chip Time"
keyy = "Bib No"
corr_co = np.corrcoef(df[keyx], df[keyy])
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
ax1.scatter(df[keyx], df[keyy])
ax1.scatter(astro_df[keyx], astro_df[keyy], edgecolor = "black", color = astro_df["colour"])
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
plt.xlabel(keyx + " Minutes")
plt.ylabel(keyy)
print(corr_co[1,0])
gender_groups = df.groupby(df["Gender"])
keyx = "Pos"
keyy = "G/Pos"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
ax1.scatter(group[keyx], group[keyy], label = name)
ax1.legend()
ax1.scatter(astro_df[keyx], astro_df[keyy], edgecolor = "black", color = astro_df["colour"])
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy][astro_indices]
namelabel = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
namelabel,
xy=(x, y), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
plt.xlabel(keyx)
plt.ylabel(keyy)
keyx = "Gun Time"
keyy = "Chip Time"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
ax1.scatter(group[keyx], group[keyy], label = name)
ax1.scatter(astro_df[keyx], astro_df[keyy], edgecolor = "black", color = astro_df["colour"])
ax1.legend()
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
ax1.plot([0,130], [0, 130], color = "black")
ax1.set_xlabel(keyx)
ax1.set_ylabel(keyy)
ax1.set_xlim([df[keyx].min()*0.95, df[keyx].max()*1.05])
keyx = "Pos"
keyy1 = "Gun Time"
keyy2 = "Chip Time"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
x = group[keyx]
y = group[keyy1] / group[keyy2]
ax1.scatter(x,y, label = name)
x = astro_df[keyx]
y = astro_df[keyy1] / astro_df[keyy2]
ax1.scatter(x, y, edgecolor = "black", color = astro_df["colour"])
ax1.legend()
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx][astro_indices]
y = astro_df[keyy1][astro_indices] / astro_df[keyy2][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
# plt.plot([0,130], [0, 130], color = "black")
plt.xlabel(keyx)
plt.ylabel(keyy)
alpha = 0.8
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
x = group[keyx]
y = group[keyy1] / group[keyy2]
plt.hist(y, bins=np.arange(1.0,1.2,0.005), alpha = alpha)
alpha = 0.8
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
x = group[keyx]
y = group[keyy1] / group[keyy2]
plt.hist(x, alpha = alpha)
from scipy import stats
for name, group in gender_groups:
x = group[keyx]
y = group[keyy1] / group[keyy2]
xmin = np.min(x)
xmax = np.max(x)
ymin = np.min(y)
ymax = np.max(y)
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
fig = plt.figure(figsize=[7, 5])
ax = fig.add_subplot(111)
# ax.imshow(np.rot90(Z))
ax.pcolormesh(X, Y, Z)
ax.plot(x, y, 'k.', markersize=2)
# ax.set_xlim([xmin, xmax])
# ax.set_ylim([ymin, ymax])
for i, astro_indices in enumerate(astro_df.index):
x_astro = astro_df[keyx][astro_indices]
y_astro = astro_df[keyy1][astro_indices] / astro_df[keyy2][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax.annotate(
label,
xy=(x_astro, y_astro), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
keyx1 = "Gun Time"
keyx2 = "Chip Time"
keyy = "Gun Time"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
ax1.scatter(group[keyx1] - group[keyx2] ,group[keyy],label=name)
for i, astro_indices in enumerate(astro_df.index):
x = astro_df[keyx1][astro_indices] - astro_df[keyx2][astro_indices]
y = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x, y), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
ax1.legend()
plt.ylabel(keyy)
plt.xlabel(keyx1 + " - " + keyx2)
keyx1 = "Gun Time"
keyx2 = "Chip Time"
# keyy = "Gun Time"
# keyy = "G/Pos"
# keyy = "Gen Pos"
keyy = "Pos"
for name, group in gender_groups:
x = group[keyx1] - group[keyx2]
y = group[keyy]
xmin = np.min(x)
xmax = np.max(x)
ymin = np.min(y)
ymax = np.max(y)
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([x, y])
kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
# ax.imshow(np.rot90(Z))
ax1.pcolormesh(X, Y, Z)
ax1.plot(x, y, 'k.', markersize=2)
# ax.set_xlim([xmin, xmax])
# ax.set_ylim([ymin, ymax])
for i, astro_indices in enumerate(astro_df.index):
x_astro = astro_df[keyx1][astro_indices] - astro_df[keyx2][astro_indices]
y_astro = astro_df[keyy][astro_indices]
label = astro_df["Name"][astro_indices]
sign = 2*(i%2) - 1
ax1.annotate(
label,
xy=(x_astro, y_astro), xytext=(-1*sign*20, sign*50),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5),
arrowprops=dict(arrowstyle = '->', connectionstyle='arc3,rad=0'))
# ax1.legend()
ax1.set_ylabel(keyy)
ax1.set_xlabel(keyx1 + " - " + keyx2)
keyx1 = "Gun Time"
keyx2 = "Chip Time"
# keyy = "Gun Time"
# keyy = "G/Pos"
# keyy = "Gen Pos"
keyy = "Pos"
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for name, group in gender_groups:
x = group[keyx1] - group[keyx2]
y = group[keyy]
ax1.hist(x, bins=np.arange(0.0,6.0,0.1), alpha = alpha, label=name)
ax1.legend()
ax1.set_ylabel("Number")
ax1.set_xlabel(keyx1 + " - " + keyx2)
keyx = "Chip Time"
keyy = "Beat the Bridge"
labels = ["Non-Club Runner", "Club Runner"]
lstyles = ["--",":"]
for i, indices in enumerate([~df["Club"].isin(df["Club"].dropna()), df["Club"].isin(df["Club"].dropna())]) :
print(labels[i])
print(df[indices][keyx].mean(),df[indices][keyy].mean())
df.loc[df["Club"].dropna().index]["Gun Time"].hist(bins=25)
keyx = "Chip Time"
keyy = "Beat the Bridge"
labels = ["Non-Club Runner", "Club Runner"]
lstyles = ["--",":"]
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for i, indices in enumerate([~df["Club"].isin(df["Club"].dropna()), df["Club"].isin(df["Club"].dropna())]) :
z = np.polyfit(df[indices][keyx], df[indices][keyy],1)
p = np.poly1d(z)
fit = p(df[indices][keyx])
ax1.scatter(df[indices][keyx], df[indices][keyy], label = labels[i])
ax1.plot(df[indices][keyx], fit, ls = lstyles[i], color = "Black")
# ax1.scatter(df[df["Club"].isin(df["Club"].dropna())][keyx].map(lambda x : x.seconds/60), df[df["Club"].isin(df["Club"].dropna())][keyy].map(lambda x : x.seconds/60))
ax1.set_xlabel(keyx)
ax1.set_ylabel(keyy)
ax1.legend()
keyx = "Chip Time"
keyy = "Beat the Bridge"
labels = ["Non-Club Runner", "Club Runner"]
lstyles = ["--",":"]
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for i, indices in enumerate([~df["Club"].isin(df["Club"].dropna()), df["Club"].isin(df["Club"].dropna())]) :
z = np.polyfit(df[indices][keyx], df[indices][keyy],1)
p = np.poly1d(z)
fit = p(df[indices][keyx])
ax1.scatter(df[indices][keyx], df[indices][keyy]-fit, label = labels[i])
# ax1.plot(df[indices][keyx], fit, ls = lstyles[i], color = "Black")
# ax1.scatter(df[df["Club"].isin(df["Club"].dropna())][keyx].map(lambda x : x.seconds/60), df[df["Club"].isin(df["Club"].dropna())][keyy].map(lambda x : x.seconds/60))
print("stddev = ", np.std(df[indices][keyy]-fit))
ax1.set_xlabel(keyx)
ax1.set_ylabel(keyy)
ax1.legend()
keyx = "Chip Time"
keyy = "Beat the Bridge"
labels = ["Non-Club Runner", "Club Runner"]
bins = np.arange(-4., 3, 0.2)
fig = plt.figure(figsize=[7, 5])
ax1 = fig.add_subplot(111)
for i, indices in enumerate([~df["Club"].isin(df["Club"].dropna()), df["Club"].isin(df["Club"].dropna())]) :
z = np.polyfit(df[indices][keyx], df[indices][keyy],1)
p = np.poly1d(z)
fit = p(df[indices][keyx])
ax1.hist(df[indices][keyy]-fit, label = labels[i], bins = bins)
print("stddev = ", np.std(df[indices][keyy]-fit))
ax1.set_xlabel(keyx)
ax1.set_ylabel(keyy)
ax1.legend()
keyx = "Chip Time"
keyy = "Beat the Bridge"
labels = ["Non-Club Runner", "Club Runner"]
lstyles = ["--",":"]
fig = plt.figure(figsize=[12, 8])
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
for i, indices in enumerate([~df["Club"].isin(df["Club"].dropna()), df["Club"].isin(df["Club"].dropna())]) :
z = np.polyfit(df[indices][keyx], df[indices][keyy],1)
p = np.poly1d(z)
fit = p(df[indices][keyx])
ax1.scatter(df[indices][keyx], df[indices][keyy]-fit, label = labels[i])
# ax1.plot(df[indices][keyx].map(lambda x : x.seconds/60), fit, ls = lstyles[i], color = "Black")
# ax1.scatter(df[df["Club"].isin(df["Club"].dropna())][keyx].map(lambda x : x.seconds/60), df[df["Club"].isin(df["Club"].dropna())][keyy].map(lambda x : x.seconds/60))
print("stddev = ", np.std(df[indices][keyy]-fit))
ax2.hist(df[indices][keyy]-fit, label = labels[i], bins = bins, orientation="horizontal")
fig.subplots_adjust(wspace=0)
ax2.set_yticklabels("")
ax1.set_xlabel(keyx)
ax1.set_ylabel(keyy+ " - Fit")
ax1.legend()
Doesn't look like it!
# for name, group in club_groups:
# print(name, group["Chip Time"].map(lambda x : x.seconds/60).mean(), df_1["Chip Time"].map(lambda x : x.seconds/60).mean())
keyx = "subChip Time"
keyy = "subBeat the Bridge"
corr_co = np.corrcoef(df[keyx][df["Club"].isin(clubs)]/60., df[keyy][df["Club"].isin(clubs)]/60.)
plt.scatter(df[keyx][df["Club"].isin(clubs)]/60., df[keyy][df["Club"].isin(clubs)]/60.)
# plt.scatter(df[keyx][df["Club"].isin(np.invert(clubs))]/60., df[keyy][df["Club"].isin(np.invert(clubs))]/60.)
plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", "") + " Minutes")
df[["Club", "Name", "subChip Time"]][df["Club"].isin(clubs)]
t1 - t0
col = df["Gun Time"] - t0
x = col[0]
x.
col.sec