# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy import stats
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Set style for all visualizations
sns.set_palette('husl')
plt.style.use("seaborn-v0_8")

# Configure pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)

START_YEAR = 1999
END_YEAR = 2019

# Create a list of dataframes
dfs = []
for year in range(START_YEAR, END_YEAR + 1):
    df = pd.read_csv(f'./data/results-{year}.csv')
    df['year'] = year
    dfs.append(df)

# Concatenate all dataframes
results = pd.concat(dfs, ignore_index=True)

# Drop rows with missing values, even though we know there are none
results = results.dropna()

display(results)

# Convert time columns to datetime
time_columns = ['time_full', 'split_5k', 'split_10k', 'split_15k', 'split_20k', 
                'time_half', 'split_25k', 'split_30k', 'split_35k', 'split_40k']

for col in time_columns:
    dt = pd.to_datetime(results[col], format='%H:%M:%S')
    results[f'{col}_minutes'] = (
        dt.dt.hour * 60 +
        dt.dt.minute +
        dt.dt.second / 60
    )

# Remove any rows where the time is under 100 minutes
results = results[results['time_full_minutes'] >= 100]

# Basic information about the dataset
display("Dataset Shape:", results.shape)
display("Columns:", results.columns.tolist())
display("Data Types:", results.dtypes)
display("Missing Values:", results.isnull().sum())
display("Mean:", results['time_full_minutes'].mean())
display("Median:", results['time_full_minutes'].median())

'Dataset Shape:'

(290389, 28)

'Columns:'

['place_overall',
 'first_name',
 'last_name',
 'nationality',
 'club',
 'time_full',
 'gender',
 'split_5k',
 'split_10k',
 'split_15k',
 'split_20k',
 'time_half',
 'split_25k',
 'split_30k',
 'split_35k',
 'split_40k',
 'Race',
 'year',
 'time_full_minutes',
 'split_5k_minutes',
 'split_10k_minutes',
 'split_15k_minutes',
 'split_20k_minutes',
 'time_half_minutes',
 'split_25k_minutes',
 'split_30k_minutes',
 'split_35k_minutes',
 'split_40k_minutes']

'Data Types:'

place_overall          int64
first_name            object
last_name             object
nationality           object
club                  object
                      ...   
time_half_minutes    float64
split_25k_minutes    float64
split_30k_minutes    float64
split_35k_minutes    float64
split_40k_minutes    float64
Length: 28, dtype: object

'Missing Values:'

place_overall        0
first_name           0
last_name            0
nationality          0
club                 0
                    ..
time_half_minutes    0
split_25k_minutes    0
split_30k_minutes    0
split_35k_minutes    0
split_40k_minutes    0
Length: 28, dtype: int64

'Mean:'

np.float64(241.469336476244)

plt.figure(figsize=(12, 6))
sns.histplot(data=results, x='time_full_minutes', bins=50)
plt.title('Distribution of Marathon Finish Times, Berlin (1999-2019)')
plt.xlabel('Finish Time (minutes)')
plt.ylabel('Count')
plt.show()

gender_counts = results['gender'].value_counts()

plt.figure(figsize=(8, 6))
plt.pie(gender_counts, labels=gender_counts.index, autopct='%1.1f%%')
plt.title('Gender Distribution in Marathon Participants, Berlin (1999-2019)')
plt.show()

split_columns = [col for col in results.columns if 'minutes' in col]
correlation_matrix = results[split_columns].corr()

plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation between Marathon Splits, Berlin (1999-2019)')
plt.show()

# Compare finish times between genders
male_times = results[results['gender'] == 'M']['time_full_minutes']
female_times = results[results['gender'] == 'W']['time_full_minutes']

# Calculate basic statistics
male_mean = male_times.mean()
female_mean = female_times.mean()
male_median = male_times.median()
female_median = female_times.median()

# Perform t-test to check if the difference is statistically significant
t_stat, pvalue = stats.ttest_ind(male_times, female_times, equal_var=False)

# Visualize the distributions with a histogram
plt.figure(figsize=(12, 6))
sns.histplot(male_times, color='blue', alpha=0.5, label='Male', kde=True)
sns.histplot(female_times, color='red', alpha=0.5, label='Female', kde=True)
plt.axvline(male_mean, color='blue', linestyle='--', label=f'Male Mean: {male_mean:.2f} min')
plt.axvline(female_mean, color='red', linestyle='--', label=f'Female Mean: {female_mean:.2f} min')
plt.title('Distribution of Marathon Finish Times by Gender, Berlin (1999-2019)')
plt.xlabel('Finish Time (minutes)')
plt.ylabel('Count')
plt.legend()
plt.show()

display("Statistical Analysis 1: Gender Performance Difference")
display(f"Male Mean: {male_mean:.2f} minutes")
display(f"Female Mean: {female_mean:.2f} minutes")
display(f"Difference: {abs(male_mean - female_mean):.2f} minutes")
display(f"Male Median: {male_median:.2f} minutes")
display(f"Female Median: {female_median:.2f} minutes")
display(f"T-statistic: {t_stat:.4f}")
display(f"P-value: {pvalue:.4f}")

'Statistical Analysis 1: Gender Performance Difference'

'Male Mean: 235.74 minutes'

'Female Mean: 263.07 minutes'

'Difference: 27.33 minutes'

'Male Median: 232.02 minutes'

'Female Median: 260.12 minutes'

'T-statistic: -146.8780'

'P-value: 0.0000'

yearly_means = results.groupby('year')['time_full_minutes'].mean()

# Visualize the trend with a line plot
plt.figure(figsize=(12, 6))
yearly_means.plot(kind='line', marker='o')
plt.title('Average Marathon Finish Time by Year, Berlin (1999-2019)')
plt.xlabel('Year')
plt.ylabel('Average Finish Time (minutes)')
plt.grid(True)
plt.show()

# Perform linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(yearly_means.index, yearly_means.values)

display("Statistical Analysis 2: Year-over-Year Performance Trend")
display(f"Slope: {slope:.4f}")
display(f"R-squared: {r_value**2:.4f}")
display(f"P-value: {p_value:.4f}")

'Statistical Analysis 2: Year-over-Year Performance Trend'

'Slope: 0.5230'

'R-squared: 0.3861'

'P-value: 0.0034'

# Calculate pace (minutes per kilometer) for each split
for i in range(1, len(split_columns)):
    split_distance = 5  # Each split is 5km
    results[f'pace_{i}'] = (results[split_columns[i]] - results[split_columns[i-1]]) / split_distance

# Box plot of pace distribution across splits
plt.figure(figsize=(12, 6))
pace_columns = [col for col in results.columns if 'pace_' in col]
results[pace_columns].boxplot()
plt.title('Pace Distribution Across Marathon 5k Splits, Berlin (1999-2019)')
plt.xlabel('Split Number')
plt.ylabel('Pace (minutes per kilometer)')
plt.show()

# Perform ANOVA test for pace differences across splits
pace_data = [results[col].dropna() for col in pace_columns]
f_stat, p_value = stats.f_oneway(*pace_data)

display("Statistical Analysis 3: Pace Analysis")
display(f"F-statistic: {f_stat:.4f}")
display(f"P-value: {p_value:.4f}")

'Statistical Analysis 3: Pace Analysis'

'F-statistic: 6325841.9236'

'P-value: 0.0000'

df = results.copy()
df['gender_code'] = df['gender'].str.strip().str.upper().str[0]

avg_overall = (
    df
    .groupby('year', as_index=False)['time_full_minutes']
    .mean()
)
avg_male = (
    df[df['gender_code']=='M']
    .groupby('year', as_index=False)['time_full_minutes']
    .mean()
)
avg_female = (
    df[df['gender_code']=='W']
    .groupby('year', as_index=False)['time_full_minutes']
    .mean()
)

display("Average Finish Time per Year", avg_overall)
display("Average Finish Time per Year for Males", avg_male)
display("Average Finish Time per Year for Females", avg_female)

'Average Finish Time per Year'

'Average Finish Time per Year for Males'

'Average Finish Time per Year for Females'

# Fit linear trend and forecast 2020–2050
def fit_and_forecast(df, start=2020, end=2050):
    if len(df) < 2:
        return pd.DataFrame({'year': [], 'pred': []})
    X = df[['year']].values
    y = df['time_full_minutes'].values
    model = LinearRegression().fit(X, y)
    future_years = np.arange(start, end + 1)
    preds = model.predict(future_years.reshape(-1, 1))
    return pd.DataFrame({'year': future_years, 'pred': preds})

fc_overall = fit_and_forecast(avg_overall)
fc_male    = fit_and_forecast(avg_male)
fc_female  = fit_and_forecast(avg_female)

# Plot each series on its own figure
series = [
    ('Overall Average', avg_overall, fc_overall),
    ('Male Average',    avg_male,    fc_male),
    ('Female Average',  avg_female,  fc_female)
]

for title, hist, fc in series:
    plt.figure(figsize=(8, 4))
    plt.plot(hist['year'], hist['time_full_minutes'],
             marker='o', linestyle='-', label='Historical')
    if not fc.empty:
        plt.plot(fc['year'], fc['pred'],
                 marker='x', linestyle='--', label='Forecast')
    plt.title(f"{title} Finish Time")
    plt.xlabel("Year")
    plt.ylabel("Average Finish Time (min)")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()

forecast_df = None
for fc, key in [(fc_overall, 'overall'),
                (fc_male,    'male'),
                (fc_female,  'female')]:
    col = f'pred_{key}'
    df_tmp = fc.rename(columns={'pred': col})
    if forecast_df is None:
        forecast_df = df_tmp
    else:
        forecast_df = forecast_df.merge(df_tmp, on='year', how='outer')

display(forecast_df.head(11))

# Compute 5th-percentile finish time per year
p5_overall = (
    df
    .groupby('year', as_index=False)['time_full_minutes']
    .quantile(0.05)
    .rename(columns={'time_full_minutes':'time_5pct'})
)
p5_male = (
    df[df['gender_code']=='M']
    .groupby('year', as_index=False)['time_full_minutes']
    .quantile(0.05)
    .rename(columns={'time_full_minutes':'time_5pct'})
)
p5_female = (
    df[df['gender_code']=='W']
    .groupby('year', as_index=False)['time_full_minutes']
    .quantile(0.05)
    .rename(columns={'time_full_minutes':'time_5pct'})
)

display("5th Percentile Finish Time per Year", p5_overall)
display("5th Percentile Finish Time per Year for Males", p5_male)
display("5th Percentile Finish Time per Year for Females", p5_female)

'5th Percentile Finish Time per Year'

'5th Percentile Finish Time per Year for Males'

'5th Percentile Finish Time per Year for Females'

# Helper to fit & forecast
def fit_and_forecast(df, label, start=2020, end=2050):
    if len(df) < 2:
        return pd.DataFrame({'year':[], f'pred_{label}':[]})
    X = df[['year']].values
    y = df['time_5pct'].values
    model = LinearRegression().fit(X, y)
    future = np.arange(start, end+1)
    preds = model.predict(future.reshape(-1,1))
    return pd.DataFrame({'year':future, f'pred_{label}':preds})

fc_overall = fit_and_forecast(p5_overall, 'overall')
fc_male    = fit_and_forecast(p5_male,    'male')
fc_female  = fit_and_forecast(p5_female,  'female')

# Plot each on its own figure
series = [
    ('Overall 5th-%ile', p5_overall, fc_overall),
    ('Male 5th-%ile',    p5_male,    fc_male),
    ('Female 5th-%ile',  p5_female,  fc_female),
]

for title, hist, fc in series:
    plt.figure(figsize=(8,4))
    plt.plot(hist['year'], hist['time_5pct'],
             marker='o', linestyle='-', label='Historical')
    if not fc.empty:
        plt.plot(fc['year'], fc[f'pred_{title.split()[0].lower()}'],
                 marker='x', linestyle='--', label='Forecast')
    plt.title(f"{title} Finish Time")
    plt.xlabel("Year")
    plt.ylabel("5th Percentile Finish Time (min)")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()

forecast_df = None
for fc, key in [
    (fc_overall, 'overall'),
    (fc_male,    'male'),
    (fc_female,  'female')
]:
    if forecast_df is None:
        forecast_df = fc.copy()
    else:
        forecast_df = forecast_df.merge(fc, on='year', how='outer')

display(forecast_df.head(11))

SPLIT_COLS = [
    'split_5k_minutes', 'split_10k_minutes', 'split_15k_minutes',
    'split_20k_minutes', 'time_half_minutes',  'split_25k_minutes',
    'split_30k_minutes', 'split_35k_minutes', 'split_40k_minutes'
]

# Compute the 5th-percentile finish time by year & nationality
df_p5 = (
    results
    .groupby(['year','nationality'], as_index=False)['time_full_minutes']
    .quantile(0.05)
    .rename(columns={'time_full_minutes':'time_5pct'})
)

# Compute mean split times by year & nationality
mean_splits = (
    results
    .groupby(['year','nationality'], as_index=False)[SPLIT_COLS]
    .mean(numeric_only=True)
)

# Merge the two dataframes on the year and nationality columns
df = pd.merge(df_p5, mean_splits, on=['year','nationality'], how='inner')

display("5th Percentile Finish Time by Year and Nationality", df_p5)
display("Mean Split Times by Year and Nationality", mean_splits)
display("Merged Dataframe", df)

'5th Percentile Finish Time by Year and Nationality'

'Mean Split Times by Year and Nationality'

'Merged Dataframe'

top20 = (
    df.groupby('nationality')['time_5pct']
      .mean()
      .nsmallest(20)
      .index
)
df = df[df['nationality'].isin(top20)].reset_index(drop=True)

nat_dummies = pd.get_dummies(df['nationality'], prefix='nat', drop_first=True)

# Build feature matrix X and target y
X = pd.concat([ df[['year']], nat_dummies, df[SPLIT_COLS] ], axis=1)
y = df['time_5pct']

# Train/test split (keep test indices for plotting)
X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
    X, y, df.index, test_size=0.2, random_state=42
)

# Hyperparameter tuning via RandomizedSearchCV
param_dist = {
    'n_estimators':    [100, 200, 500],
    'max_depth':       [None, 5, 10, 20],
    'min_samples_leaf':[1, 2, 5],
    'max_features':    ['sqrt', 'log2', 0.5]
}
rf_base = RandomForestRegressor(random_state=42)

search = RandomizedSearchCV(
    rf_base,
    param_dist,
    n_iter=30,
    cv=5,
    scoring='r2',
    n_jobs=-1,
    random_state=42
)
search.fit(X_train, y_train)

display("Best hyperparameters:", search.best_params_)

'Best hyperparameters:'

{'n_estimators': 500,
 'min_samples_leaf': 1,
 'max_features': 0.5,
 'max_depth': 5}

# Fit final model with best params
rf_final = RandomForestRegressor(
    **search.best_params_, 
    random_state=42,
    n_jobs=-1
)
rf_final.fit(X_train, y_train)

# Evaluate on the test set
y_pred = rf_final.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2  = r2_score(y_test, y_pred)

display(f"Final MSE: {mse:.2f}")
display(f"Final R² : {r2:.3f}")

'Final MSE: 113.53'

'Final R² : 0.671'

# Prepare DataFrame for plotting colored by nationality
df_vis = pd.DataFrame({
    'actual':      y_test,
    'predicted':   y_pred,
    'nationality': df.loc[idx_test, 'nationality']
})

# Plot Actual vs Predicted
plt.figure(figsize=(10, 6))
for nat, grp in df_vis.groupby('nationality'):
    plt.scatter(grp['actual'], grp['predicted'], alpha=0.7, label=nat)
mn, mx = df_vis[['actual','predicted']].min().min(), df_vis[['actual','predicted']].max().max()
plt.plot([mn, mx], [mn, mx], 'k--', lw=1)
plt.xlabel("Actual 5th-Percentile Time (min)")
plt.ylabel("Predicted 5th-Percentile Time (min)")
plt.title("RandomForest Predictions by Nationality (Top 20)")
plt.legend(bbox_to_anchor=(1.05,1), loc='upper left')
plt.tight_layout()
plt.show()

# Plot Top 20 Feature Importances
importances = pd.Series(
    rf_final.feature_importances_,
    index=X.columns
).nlargest(20).sort_values(ascending=True)

plt.figure(figsize=(6,8))
importances.plot(kind='barh')
plt.title("Top 20 Feature Importances")
plt.xlabel("Importance")
plt.tight_layout()
plt.show()

	place_overall	first_name	last_name	nationality	club	time_full	gender	split_5k	split_10k	split_15k	split_20k	time_half	split_25k	split_30k	split_35k	split_40k	Race	year
0	1	Josephat	Kiprono	KEN	Kenia	02:06:44	M	00:15:09	00:30:21	00:45:25	01:00:34	01:03:54	01:14:54	01:30:10	01:45:05	02:00:08	Berlin	1999
1	2	Takayuki	Inubushi	JPN	Japan	02:06:57	M	00:15:09	00:30:21	00:45:27	01:00:36	01:03:54	01:14:56	01:30:18	01:45:21	02:00:18	Berlin	1999
2	3	Samson	Kandie	KEN	Kenia	02:08:31	M	00:15:10	00:30:21	00:45:27	01:00:35	01:03:54	01:14:55	01:30:18	01:45:59	02:01:41	Berlin	1999
3	4	Hicham	Chatt	MAR	Marokko	02:09:56	M	00:15:11	00:30:23	00:45:27	01:00:36	01:03:55	01:14:56	01:30:24	01:46:14	02:02:52	Berlin	1999
4	5	Henry	Cherono	KEN	Kenia	02:10:37	M	00:15:10	00:30:21	00:00:00	01:00:35	01:03:55	01:14:55	01:30:18	01:46:14	02:02:58	Berlin	1999
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
678702	30739	Wolfgang	Biedermann	GER	SC . LT SAD ,Spiridon	07:12:30	M	00:39:11	01:26:52	02:15:21	03:09:25	03:22:13	04:06:45	05:04:06	05:56:57	06:49:33	Berlin	2019
678705	13243	Elvira	Ernst	GER	261 Fearless	07:14:44	W	00:46:10	01:34:15	02:23:21	03:12:21	03:22:44	04:02:34	04:56:52	05:50:44	06:48:44	Berlin	2019
678706	30740	Gert	Kleinschmidt	GER	Dessauer SV 97	07:15:28	M	00:49:05	01:42:38	02:38:02	03:27:30	03:37:40	04:16:57	05:09:49	06:01:58	06:54:25	Berlin	2019
678709	30741	Oscar	Martinez	USA	BRIDERUNNERS	07:19:28	M	00:37:26	01:18:14	02:05:06	02:59:24	03:10:51	03:56:15	04:58:07	06:01:29	06:56:38	Berlin	2019
678710	30742	Dietmar	John	GER	Bayerische Laufzeitung-Der Che	07:26:09	M	00:42:11	01:29:17	02:21:20	03:13:21	03:24:52	04:09:27	05:07:35	06:04:09	07:01:16	Berlin	2019

	year	time_full_minutes
0	1999	230.064991
1	2000	240.524645
2	2001	233.795560
3	2002	232.986569
4	2004	235.138728
...	...	...
15	2015	240.178294
16	2016	243.868154
17	2017	244.849287
18	2018	251.155402
19	2019	243.218337

	year	time_full_minutes
0	1999	226.954815
1	2000	236.320251
2	2001	229.583860
3	2002	229.875422
4	2004	230.189993
...	...	...
15	2015	234.356227
16	2016	237.275377
17	2017	237.856427
18	2018	243.476952
19	2019	234.839135

	year	time_full_minutes
0	1999	248.983984
1	2000	261.797333
2	2001	256.749426
3	2002	255.008837
4	2004	256.924887
...	...	...
15	2015	260.288286
16	2016	264.953039
17	2017	264.489884
18	2018	270.961225
19	2019	265.641949

	year	pred_overall	pred_male	pred_female
0	2020	246.700278	239.066187	267.355158
1	2021	247.223248	239.390388	267.846358
2	2022	247.746218	239.714589	268.337559
3	2023	248.269189	240.038790	268.828760
4	2024	248.792159	240.362990	269.319960
...	...	...	...	...
6	2026	249.838100	241.011392	270.302362
7	2027	250.361071	241.335593	270.793562
8	2028	250.884041	241.659793	271.284763
9	2029	251.407012	241.983994	271.775964
10	2030	251.929982	242.308195	272.267164

Spring 2025 Data Science Project: Berlin Marathon Analysis¶

Team Members and Contributions¶

Contributions:¶

1. Introduction¶

2. Data Curation and Preprocessing¶

Data Source¶

Initial Data Overview¶

Data Loading and Preprocessing¶

3. Exploratory Data Analysis¶

Summary Statistics¶

Distribution of Finish Times¶

Distribution of Genders¶

Correlation Between Splits¶

Statistical Analysis 1: Gender Performance Difference¶

Statistical Analysis 2: Year-over-Year Performance Trend¶

Statistical Analysis 3: Pace Analysis¶

4. & 5. Primary Analysis / Visualization & Results¶

Linear Regression Model¶

6. Insights and Conclusions¶

Limitations and Future Work¶

References¶

	year	time_5pct
0	1999	174.600000
1	2000	178.503333
2	2001	176.500000
3	2002	172.883333
4	2004	175.809167
...	...	...
15	2015	174.815833
16	2016	175.596667
17	2017	176.086667
18	2018	176.410000
19	2019	171.783333

	year	time_5pct
0	1999	173.366667
1	2000	177.485000
2	2001	175.033333
3	2002	171.716667
4	2004	173.750000
...	...	...
15	2015	171.958333
16	2016	172.193333
17	2017	172.290833
18	2018	172.530000
19	2019	167.253333

	year	time_5pct
0	1999	198.833333
1	2000	204.840833
2	2001	204.325000
3	2002	196.910000
4	2004	201.631667
...	...	...
15	2015	199.773333
16	2016	201.045833
17	2017	200.101667
18	2018	199.150000
19	2019	197.733333

	year	pred_overall	pred_male	pred_female
0	2020	176.409344	172.740013	201.315839
1	2021	176.341105	172.542768	201.175376
2	2022	176.272866	172.345522	201.034913
3	2023	176.204627	172.148277	200.894450
4	2024	176.136389	171.951031	200.753987
...	...	...	...	...
6	2026	175.999911	171.556540	200.473062
7	2027	175.931672	171.359294	200.332599
8	2028	175.863433	171.162048	200.192136
9	2029	175.795194	170.964803	200.051673
10	2030	175.726955	170.767557	199.911210

	year	nationality	time_5pct
0	1999	ALG	197.746667
1	1999	ARG	181.234167
2	1999	AUS	176.785000
3	1999	AUT	168.150000
4	1999	BEL	144.116667
...	...	...	...
1613	2019	VIE	202.687500
1614	2019	WLS	238.513333
1615	2019	YEM	291.883333
1616	2019	ZAM	153.538333
1617	2019	ZIM	289.350000

	year	nationality	split_5k_minutes	split_10k_minutes	split_15k_minutes	split_20k_minutes	time_half_minutes	split_25k_minutes	split_30k_minutes	split_35k_minutes	split_40k_minutes
0	1999	ALG	27.458333	53.725000	34.233333	103.650000	109.441667	128.716667	155.941667	180.675000	205.825000
1	1999	ARG	0.000000	41.858333	62.775000	83.908333	88.675000	105.308333	128.008333	151.566667	176.675000
2	1999	AUS	22.180952	50.952381	75.569048	99.783333	105.314286	124.195238	150.426190	177.683333	206.145238
3	1999	AUT	16.486777	50.064738	72.923691	98.985399	104.813912	124.610055	151.518113	176.200895	207.681612
4	1999	BEL	17.378836	47.292328	69.284392	93.743386	97.434921	117.550794	142.859524	168.600529	194.963492
...	...	...	...	...	...	...	...	...	...	...	...
1613	2019	VIE	24.350000	48.100000	72.316667	95.541667	100.666667	119.141667	142.841667	166.483333	192.325000
1614	2019	WLS	28.333333	56.900000	85.755556	114.477778	120.655556	143.761111	173.461111	202.966667	233.827778
1615	2019	YEM	31.583333	64.800000	98.333333	132.183333	139.583333	167.016667	202.716667	240.600000	276.733333
1616	2019	ZAM	25.283333	50.983333	77.038889	102.533333	108.111111	128.777778	155.600000	182.977778	212.466667
1617	2019	ZIM	31.766667	62.866667	94.200000	125.516667	132.316667	157.166667	189.900000	226.216667	269.466667