--- layout: post title: "Welcome to Jekyll!" date: 2016-02-12 17:50:00 categories: main --- Data_Analysis
In [27]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from astropy.time import Time
In [105]:
def convert_to_ap_Time(df, key):
    print(key)
    df[key] = pd.to_datetime(df[key])
    df[key] = Time([t1.astype(str) for t1 in  df[key].values], format="isot")
    return df

def convert_times_to_datetime(df):
    columns = ["Gun Time", "Chip Time", "TOD", "Beat the Bridge", "Beat the Bridge.1"]

    for key in columns:
        df = convert_to_ap_Time(df, key)
        df = convert_Time_to_seconds(df, key)
    return df

def convert_Time_to_seconds(df, key):
    t0 = Time("2017-05-04T00:00:00.000", format="isot")
    df["sub" + key] = df[key] - t0
    df["sub" + key] = [t.sec for t in df["sub" + key].values]
    return df

def find_astronomers(df):
    astronomers = ("Robert FIRTH", "Stephen BROWETT", "Mathew SMITH", "Sadie JONES")
    astro_df = df[df["Name"].isin((astronomers))]
    return astro_df

def plot_hist_with_astronomers(df, astro_df, key):
    rob_time = astro_df[key][158]/60.
    mat_time = astro_df[key][737]/60.
    steve_time = astro_df[key][1302]/60.
    sadie_time = astro_df[key][576]/60.

    mean_time = df[key].mean()/60
    median_time = df[key].median()/60

    plt.hist(df[key]/60., bins = 100)

    plt.plot([rob_time, rob_time], [0, 70], lw = 2, label = "Rob")
    plt.plot([mat_time, mat_time], [0, 70], lw = 2, label = "Mat")
    plt.plot([steve_time, steve_time], [0, 70], lw = 2, label = "Steve")
    plt.plot([sadie_time, sadie_time], [0, 70], lw = 2, label = "Sadie")

    plt.plot([mean_time, mean_time], [0, 70], lw = 2, color = "Black", ls = ":", label = "Mean")
    plt.plot([median_time, median_time], [0, 70], lw = 2, color = "Black", ls = "--", label = "Median")
    plt.xlabel(key.replace("sub", "") + " Minutes")

    plt.legend()
In [95]:
results_path = "/Users/berto/Code/zoidberg/ABPSoton10k/data/Results10k.csv"

df = pd.read_csv(results_path)
# df = df.drop(df.index[len(df)-10:])
df = df.drop(df.loc[df["Gun Time"] == "DNF"].index)
df = df.drop(df.loc[df["Gun Time"] == "QRY"].index)
df = df.drop(df.loc[df["Beat the Bridge"] == "99:99:99"].index)
In [96]:
df.columns
Out[96]:
Index(['Pos', 'Bib No', 'Fav', 'Share', 'Print', 'Name', 'Gun Time',
       'Chip Time', 'Category', 'Cat Pos', 'Gender', 'Gen Pos', 'Club', 'Pace',
       'TOD', 'Beat the Bridge', 'G/Pos', 'Beat the Bridge.1', 'G/Pos.1'],
      dtype='object')
In [97]:
df = convert_times_to_datetime(df)
Gun Time
Chip Time
TOD
Beat the Bridge
Beat the Bridge.1
In [98]:
astro_df = find_astronomers(df)
In [99]:
astro_df
Out[99]:
Pos Bib No Fav Share Print Name Gun Time Chip Time Category Cat Pos ... TOD Beat the Bridge G/Pos Beat the Bridge.1 G/Pos.1 subGun Time subChip Time subTOD subBeat the Bridge subBeat the Bridge.1
158 159 1662 NaN NaN NaN Robert FIRTH 2017-05-04T00:48:59.600 2017-05-04T00:48:07.600 Sen 89 ... 2017-05-04T11:19:05.600 2017-05-04T00:08:38.900 125 2017-05-04T00:08:38.900 125 2939.6 2887.6 40745.6 518.9 518.9
576 577 1560 NaN NaN NaN Sadie JONES 2017-05-04T00:59:35.600 2017-05-04T00:55:00.400 Sen 123 ... 2017-05-04T11:29:41.600 2017-05-04T00:09:27.200 80 2017-05-04T00:09:27.200 80 3575.6 3300.4 41381.6 567.2 567.2
737 738 1165 NaN NaN NaN Mathew SMITH 2017-05-04T01:01:54.300 2017-05-04T00:56:49.400 Sen 276 ... 2017-05-04T11:32:00.300 2017-05-04T00:09:52.100 316 2017-05-04T00:09:52.100 316 3714.3 3409.4 41520.3 592.1 592.1
1302 1303 1915 NaN NaN NaN Stephen BROWETT 2017-05-04T01:14:55.000 2017-05-04T01:09:49.700 Sen 364 ... 2017-05-04T11:45:01.000 2017-05-04T00:12:50.800 614 2017-05-04T00:12:50.800 614 4495.0 4189.7 42301.0 770.8 770.8

4 rows × 24 columns

In [34]:

In [102]:
# key = "subGun Time"
key = "subChip Time"

rob_time = astro_df[key][158]/60.
mat_time = astro_df[key][737]/60.
steve_time = astro_df[key][1302]/60.
sadie_time = astro_df[key][576]/60.

mean_time = df[key].mean()/60
median_time = df[key].median()/60

plt.hist(df[key]/60., bins = 100)

plt.plot([rob_time, rob_time], [0, 70], lw = 2, label = "Rob")
plt.plot([mat_time, mat_time], [0, 70], lw = 2, label = "Mat")
plt.plot([steve_time, steve_time], [0, 70], lw = 2, label = "Steve")
plt.plot([sadie_time, sadie_time], [0, 70], lw = 2, label = "Sadie")


plt.plot([mean_time, mean_time], [0, 70], lw = 2, color = "Black", ls = ":", label = "Mean")
plt.plot([median_time, median_time], [0, 70], lw = 2, color = "Black", ls = "--", label = "Median")
plt.xlabel(key.replace("sub", "") + " Minutes")
plt.legend()
Out[102]:
<matplotlib.legend.Legend at 0x11f82a358>
In [106]:
plot_hist_with_astronomers(df=df, astro_df=astro_df, key="subBeat the Bridge")

Chip Time vs Bridge Time

In [107]:
keyx = "subChip Time"
keyy = "subBeat the Bridge"

corr_co = np.corrcoef(df[keyx]/60., df[keyy]/60.)

plt.scatter(df[keyx]/60., df[keyy]/60.)

plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", "") + " Minutes")
Out[107]:
<matplotlib.text.Text at 0x11fe25be0>
In [108]:
print(corr_co[1,0])
0.981240820274

Time vs Bib Number

In [109]:
keyx = "subChip Time"
keyy = "Bib No"

corr_co = np.corrcoef(df[keyx]/60., df[keyy])

plt.scatter(df[keyx]/60., df[keyy])

plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", ""))
Out[109]:
<matplotlib.text.Text at 0x11fee6e48>
In [110]:
print(corr_co[1,0])
0.0808167937219
In [38]:
# plt.scatter(df["Pos"], df["subChip Time"])
# plt.scatter(df["subChip Time"], df["subBeat the Bridge"])
plt.scatter(df["Pos"], df["G/Pos"])
Out[38]:
<matplotlib.collections.PathCollection at 0x11a574978>
In [39]:
# print(df.groupby("Gender"))
plt.scatter((df["subGun Time"] - df["subChip Time"])/60., df["subGun Time"]/60.)
Out[39]:
<matplotlib.collections.PathCollection at 0x1152feb00>
In [40]:
plt.scatter(df["subChip Time"]/60., df["Bib No"])
Out[40]:
<matplotlib.collections.PathCollection at 0x11a581470>
In [41]:
df.
  File "<ipython-input-41-d230d845cd15>", line 1
    df.
       ^
SyntaxError: invalid syntax
In [ ]:
df.columns
In [ ]:
fig = plt.figure(figsize=[8, 4])
fig.subplots_adjust(left = 0.09, bottom = 0.13, top = 0.99,
                    right = 0.99, hspace=0, wspace = 0)

ax1 = fig.add_subplot(111)

ax1.scatter(df[df["Club"] == "NaN"]["subChip Time"]/60., df[df["Club"] == "NaN"]["subBeat the Bridge"]/60., color = "Orange")
# ax1.scatter(df[df["Club"] != "NaN"]["subChip Time"]/60., df[df["Club"] != "NaN"]["subBeat the Bridge"]/60., color = "Blue")
In [ ]:

In [77]:
clubs = df["Club"].unique()
In [82]:
clubs = [clubs[i] for i in np.arange(len(clubs)) if i != 1]
In [111]:
keyx = "subChip Time"
keyy = "subBeat the Bridge"

corr_co = np.corrcoef(df[keyx][df["Club"].isin(clubs)]/60., df[keyy][df["Club"].isin(clubs)]/60.)

plt.scatter(df[keyx][df["Club"].isin(clubs)]/60., df[keyy][df["Club"].isin(clubs)]/60.)
# plt.scatter(df[keyx][df["Club"].isin(np.invert(clubs))]/60., df[keyy][df["Club"].isin(np.invert(clubs))]/60.)

plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", "") + " Minutes")
Out[111]:
<matplotlib.text.Text at 0x1203097b8>
In [115]:
df[["Club", "Name", "subChip Time"]][df["Club"].isin(clubs)]
Out[115]:
Club Name subChip Time
0 ITCHEN SPITFIRES/ EALING SOUTHALL AND MIDDLESEX James HUGHES 2205.3
3 SOUTHAMPTON AC David BLACKMAN 2237.3
4 LORDSHILL ROADRUNNERS David REYNOLDS 2210.4
7 EASTLEIGH RUNNING CLUB James JOHNSON 2308.8
8 LORDSHILL ROADRUNNERS Alan GRAHAM 2283.8
11 SOUTHAMPTON AC Laura BRENTON 2255.1
12 RYDE HARRIERS Cole PEARCE 2356.5
13 STUBBINGTON GREEN RUNNERS Robert CRANSTONE 2384.0
14 SOUTHAMPTON AC Jordan MCRITCHIE 2388.7
15 LORDSHILL ROADRUNNERS Alice BOWLES 2395.1
17 BASINGSTOKE & MID HANTS AC Jared ROLFE 2401.5
18 FLEET & CROOKHAM AC Helen COZENS 2418.3
19 STUBBINGTON GREEN RUNNERS James LEE 2430.8
22 CITY OF SALISBURY ATHLETIC AND RUNNING CLUB Ian LAWRENCE 2476.9
23 BASILDON Miss Anna WHITFIELD 2395.3
25 ROMSEY ROAD RUNNERS Alex PRINSEP 2502.0
32 EASTLEIGH RUNNING CLUB Mark JONES 2449.9
33 RYDE HARRIERS Sean FURMIDGE 2547.9
37 VICTORY AC Darren KNIGHT 2562.4
40 LORDSHILL ROADRUNNERS Khalid ELKHEREIJI 2531.3
41 RYDE HARRIERS Harry FURMIDGE 2562.1
43 TOTTON RUNNING CLUB Eamonn RIVERS 2555.3
44 HEDGE END RUNNING CLUB Keith SHEPPARD 2587.2
47 HEDGE END RUNNING CLUB Kelly WICKENS 2621.6
49 WINCHESTER & DISTRICT AC Simon HARVEY 2636.7
66 HAVANT AC / PORTSMOUTH TRI Steve SQUIRES 2694.7
67 SOUTHAMPTON TRI CLUB Stuart SMITH 2702.6
70 LORDSHILL ROADRUNNERS Dean JONES 2613.5
73 ITCHEN SPITFIRES RUNNING CLUB Chris DANN 2713.0
78 ANDOVER AC Kay NOYCE 2756.6
... ... ... ...
1419 GOSPORT ROAD RUNNERS Karen HARDING 4544.2
1420 STUBBINGTON GREEN RUNNERS Kate VAUGHAN 4426.6
1421 STUBBINGTON GREEN RUNNERS Catherine CHAMBERLAIN 4428.0
1429 LORDSHILL ROADRUNNERS Danni MILWAIN 4457.6
1432 RUN4FUN Karen MANVELL 4432.0
1433 LORDSHILL ROADRUNNERS Lizi MOORCRAFT 4432.1
1439 GOSPORT ROAD RUNNERS Shona RUST 4470.9
1440 SOLENT RUNNING SISTERS Rebecca BARBER 4461.1
1446 LORDSHILL ROADRUNNERS John MORRISON 4503.0
1459 LORDSHILL ROADRUNNERS Stephanie ROSE 4534.6
1464 ROMSEY ROAD RUNNERS Naomi FARRINGTON 4560.0
1474 SOUTHAMPTON AC Lynda MASSEY 4686.1
1478 REDHILL DISTRICT ROYAL MAIL Douglas HAY 4777.3
1493 HATCH WARREN RUNNERS Anthony NICHOLLS 4649.3
1494 HATCH WARREN RUNNERS Paul FIELDING 4648.9
1506 SWEATSHOP RUNNING COMMUNITY Richard ISAACS 4707.9
1509 SOLENT RUNNING SISTERS Deborah HARVEY 4828.9
1537 BAFFINS FIT CLUB Claire GASSON 4854.9
1538 JCI UK Sylvia NAMAGANDA 4927.0
1549 VICTORY AC Tracy LONG 5033.2
1556 LORDSHILL ROADRUNNERS Carol TOWNSEND 5052.7
1565 RUN4FUN Emma WINNELL 5035.6
1567 WORTHY RUNNERS Susan REEVES 5056.2
1572 LORDSHILL ROADRUNNERS Laura TOMEI 5094.4
1582 SOLENT RUNNING SISTERS Dee ATYEO 5202.4
1584 SOLENT RUNNING SISTERS Hilary RANGER 5223.0
1590 HORLEY HARRIERS Catriona SAWYERS 5371.4
1605 ITCHEN SPITFIRES RUNNING CLUB Matthew DENNIS 5714.8
1610 DAWLISH COASTERS Patricia CLATWORTHY 6397.1
1615 ITCHEN SPITFIRES RUNNING CLUB Russell MEDDINGS 7245.5

271 rows × 3 columns

In [ ]:
# convert_to_ap_Time(df)
t0 = Time("2017-04-26T00:00:00.000", format="isot")
In [ ]:

In [ ]:
t1 = df["Gun Time"].values[0]
In [ ]:
t1
In [ ]:
t1 - t0
In [ ]:
col = df["Gun Time"] - t0
In [ ]:
x = col[0]
In [ ]:
x.
In [ ]:
col.sec
In [ ]: