Skip to content

Commit

Permalink
Prepare clean datasets for Shiny app
Browse files Browse the repository at this point in the history
  • Loading branch information
bkowshik committed Sep 15, 2024
1 parent d8215b0 commit 00e8992
Show file tree
Hide file tree
Showing 6 changed files with 335 additions and 26 deletions.
29 changes: 6 additions & 23 deletions frontend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,17 @@ def value():
return f"{input.daterange()[0]} to {input.daterange()[1]}"

script_dir = os.path.dirname(os.path.abspath(__file__))
print(script_dir)
parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
log_dir = os.path.join(parent_dir, 'logs')
data_dir = os.path.join(parent_dir, 'data')
data_dir = os.path.join(parent_dir, 'data/clean') # Shiny app will use cleaned datasets.

@render.data_frame
def matches_df():

with open(os.path.join(data_dir, 'matches.txt'), encoding='utf-8') as f:
matches = json.loads(f.readlines()[-1])['matches']

df = []
for match in matches:
df.append({
'start_at': match['start_date'],
'end_at': match['end_date'],
'home_team': match['participants'][0]['name'],
'away_team': match['participants'][1]['name'],
'score': match['winning_margin'],
})
df = pd.DataFrame(df)
df['date'] = pd.to_datetime(df['start_at']).dt.strftime("%Y-%m-%d")
df['start_at'] = pd.to_datetime(df['start_at'])
df['end_at'] = pd.to_datetime(df['start_at'])
df['start_time'] = pd.to_datetime(df['start_at']).dt.strftime("%H:%M")
df['end_time'] = pd.to_datetime(df['end_at']).dt.strftime("%H:%M")
df['match_id'] = df.index + 1
print(df.head())
df = pd.read_csv(os.path.join(data_dir, 'matches.csv'), parse_dates=["start_at", "end_at"])
df['date'] = df['start_at'].dt.strftime("%Y-%m-%d")
df['start_time'] = df['start_at'].dt.strftime("%I:%M %p")
df['end_time'] = df['end_at'].dt.strftime("%I:%M %p")

df_render = df.sort_values(by="start_at", ascending=False) \
.loc[(df['date'] >= str(input.daterange()[0])) & (df['date'] <= str(input.daterange()[1])),
Expand All @@ -70,4 +53,4 @@ def matches_df():
width="100%"
)

app = App(app_ui, server)
app = App(app_ui, server)
3 changes: 2 additions & 1 deletion isl_2024/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
'doc_host': 'https://bkowshik.github.io',
'git_url': 'https://github.com/bkowshik/isl-2024',
'lib_path': 'isl_2024'},
'syms': { 'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')},
'syms': { 'isl_2024.clean_datasets': {},
'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')},
'isl_2024.scrape_live_stats': { 'isl_2024.scrape_live_stats.fetch_live_stats': ( 'scrape_live_stats.html#fetch_live_stats',
'isl_2024/scrape_live_stats.py')},
'isl_2024.scrape_matches': {},
Expand Down
69 changes: 69 additions & 0 deletions isl_2024/clean_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Clean raw datasets for the Shiny app."""

# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_clean_datasets.ipynb.

# %% auto 0
__all__ = ['parent_dir', 'log_dir', 'data_dir', 'clean_data_dir', 'df']

# %% ../nbs/04_clean_datasets.ipynb 2
import warnings
warnings.filterwarnings('ignore')

import json
import logging
import os
import requests

import pandas as pd

# NOTE: Had to install the package with the following command for the import to work.
# python3 -m pip install -e '.[dev]'
from .utils import *

# %% ../nbs/04_clean_datasets.ipynb 4
try:
# This will work when running as a script
script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
# This will work when running in a Jupyter notebook
script_dir = os.getcwd()

parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
log_dir = os.path.join(parent_dir, 'logs')
data_dir = os.path.join(parent_dir, 'data')
clean_data_dir = os.path.join(parent_dir, 'data/clean')

if not os.path.exists(log_dir):
os.makedirs(log_dir)

if not os.path.exists(data_dir):
os.makedirs(data_dir)

if not os.path.exists(clean_data_dir):
os.makedirs(clean_data_dir)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'clean_datasets.log'), filemode='a')

# %% ../nbs/04_clean_datasets.ipynb 7
with open(os.path.join(data_dir, 'matches.txt'), encoding='utf-8') as f:
matches = json.loads(f.readlines()[-1])['matches']

df = []
for match in matches:
df.append({
'start_at': match['start_date'],
'end_at': match['end_date'],
'home_team': match['participants'][0]['name'],
'away_team': match['participants'][1]['name'],
'score': match['winning_margin'],
})
df = pd.DataFrame(df)
df['start_at'] = pd.to_datetime(df['start_at'])
df['end_at'] = pd.to_datetime(df['start_at'])
df['match_id'] = df.index + 1
df = df[['match_id', 'start_at', 'end_at', 'home_team', 'away_team', 'score']]

df.to_csv(os.path.join(clean_data_dir, 'matches.csv'), index=False)

print(df.shape)
df.head()
2 changes: 1 addition & 1 deletion isl_2024/scrape_wallstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
log_dir = os.path.join(parent_dir, 'logs')
data_dir = os.path.join(parent_dir, 'data/wallstream')

if not os.path.exists(log_dir):
os.makedirs(log_dir)

Expand Down
2 changes: 1 addition & 1 deletion nbs/03_scrape_wallstream.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n",
"log_dir = os.path.join(parent_dir, 'logs')\n",
"data_dir = os.path.join(parent_dir, 'data/wallstream')\n",
" \n",
"\n",
"if not os.path.exists(log_dir):\n",
" os.makedirs(log_dir)\n",
"\n",
Expand Down
Loading

0 comments on commit 00e8992

Please sign in to comment.