-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Prepare clean datasets for Shiny app
- Loading branch information
Showing
6 changed files
with
335 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
"""Clean raw datasets for the Shiny app.""" | ||
|
||
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_clean_datasets.ipynb. | ||
|
||
# %% auto 0 | ||
__all__ = ['parent_dir', 'log_dir', 'data_dir', 'clean_data_dir', 'df'] | ||
|
||
# %% ../nbs/04_clean_datasets.ipynb 2 | ||
import warnings | ||
warnings.filterwarnings('ignore') | ||
|
||
import json | ||
import logging | ||
import os | ||
import requests | ||
|
||
import pandas as pd | ||
|
||
# NOTE: Had to install the package with the following command for the import to work. | ||
# python3 -m pip install -e '.[dev]' | ||
from .utils import * | ||
|
||
# %% ../nbs/04_clean_datasets.ipynb 4 | ||
try: | ||
# This will work when running as a script | ||
script_dir = os.path.dirname(os.path.abspath(__file__)) | ||
except NameError: | ||
# This will work when running in a Jupyter notebook | ||
script_dir = os.getcwd() | ||
|
||
parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir)) | ||
log_dir = os.path.join(parent_dir, 'logs') | ||
data_dir = os.path.join(parent_dir, 'data') | ||
clean_data_dir = os.path.join(parent_dir, 'data/clean') | ||
|
||
if not os.path.exists(log_dir): | ||
os.makedirs(log_dir) | ||
|
||
if not os.path.exists(data_dir): | ||
os.makedirs(data_dir) | ||
|
||
if not os.path.exists(clean_data_dir): | ||
os.makedirs(clean_data_dir) | ||
|
||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'clean_datasets.log'), filemode='a') | ||
|
||
# %% ../nbs/04_clean_datasets.ipynb 7 | ||
with open(os.path.join(data_dir, 'matches.txt'), encoding='utf-8') as f: | ||
matches = json.loads(f.readlines()[-1])['matches'] | ||
|
||
df = [] | ||
for match in matches: | ||
df.append({ | ||
'start_at': match['start_date'], | ||
'end_at': match['end_date'], | ||
'home_team': match['participants'][0]['name'], | ||
'away_team': match['participants'][1]['name'], | ||
'score': match['winning_margin'], | ||
}) | ||
df = pd.DataFrame(df) | ||
df['start_at'] = pd.to_datetime(df['start_at']) | ||
df['end_at'] = pd.to_datetime(df['start_at']) | ||
df['match_id'] = df.index + 1 | ||
df = df[['match_id', 'start_at', 'end_at', 'home_team', 'away_team', 'score']] | ||
|
||
df.to_csv(os.path.join(clean_data_dir, 'matches.csv'), index=False) | ||
|
||
print(df.shape) | ||
df.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.