diff --git a/frontend/app.py b/frontend/app.py index 8a0e53a..245a29e 100644 --- a/frontend/app.py +++ b/frontend/app.py @@ -24,34 +24,17 @@ def value(): return f"{input.daterange()[0]} to {input.daterange()[1]}" script_dir = os.path.dirname(os.path.abspath(__file__)) - print(script_dir) parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir)) log_dir = os.path.join(parent_dir, 'logs') - data_dir = os.path.join(parent_dir, 'data') + data_dir = os.path.join(parent_dir, 'data/clean') # Shiny app will use cleaned datasets. @render.data_frame def matches_df(): - with open(os.path.join(data_dir, 'matches.txt'), encoding='utf-8') as f: - matches = json.loads(f.readlines()[-1])['matches'] - - df = [] - for match in matches: - df.append({ - 'start_at': match['start_date'], - 'end_at': match['end_date'], - 'home_team': match['participants'][0]['name'], - 'away_team': match['participants'][1]['name'], - 'score': match['winning_margin'], - }) - df = pd.DataFrame(df) - df['date'] = pd.to_datetime(df['start_at']).dt.strftime("%Y-%m-%d") - df['start_at'] = pd.to_datetime(df['start_at']) - df['end_at'] = pd.to_datetime(df['start_at']) - df['start_time'] = pd.to_datetime(df['start_at']).dt.strftime("%H:%M") - df['end_time'] = pd.to_datetime(df['end_at']).dt.strftime("%H:%M") - df['match_id'] = df.index + 1 - print(df.head()) + df = pd.read_csv(os.path.join(data_dir, 'matches.csv'), parse_dates=["start_at", "end_at"]) + df['date'] = df['start_at'].dt.strftime("%Y-%m-%d") + df['start_time'] = df['start_at'].dt.strftime("%I:%M %p") + df['end_time'] = df['end_at'].dt.strftime("%I:%M %p") df_render = df.sort_values(by="start_at", ascending=False) \ .loc[(df['date'] >= str(input.daterange()[0])) & (df['date'] <= str(input.daterange()[1])), @@ -70,4 +53,4 @@ def matches_df(): width="100%" ) -app = App(app_ui, server) \ No newline at end of file +app = App(app_ui, server) diff --git a/isl_2024/_modidx.py b/isl_2024/_modidx.py index d3422f3..c4bd0b1 100644 --- a/isl_2024/_modidx.py +++ b/isl_2024/_modidx.py @@ -5,7 +5,8 @@ 'doc_host': 'https://bkowshik.github.io', 'git_url': 'https://github.com/bkowshik/isl-2024', 'lib_path': 'isl_2024'}, - 'syms': { 'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')}, + 'syms': { 'isl_2024.clean_datasets': {}, + 'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')}, 'isl_2024.scrape_live_stats': { 'isl_2024.scrape_live_stats.fetch_live_stats': ( 'scrape_live_stats.html#fetch_live_stats', 'isl_2024/scrape_live_stats.py')}, 'isl_2024.scrape_matches': {}, diff --git a/isl_2024/clean_datasets.py b/isl_2024/clean_datasets.py new file mode 100644 index 0000000..d63eb51 --- /dev/null +++ b/isl_2024/clean_datasets.py @@ -0,0 +1,69 @@ +"""Clean raw datasets for the Shiny app.""" + +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/04_clean_datasets.ipynb. + +# %% auto 0 +__all__ = ['parent_dir', 'log_dir', 'data_dir', 'clean_data_dir', 'df'] + +# %% ../nbs/04_clean_datasets.ipynb 2 +import warnings +warnings.filterwarnings('ignore') + +import json +import logging +import os +import requests + +import pandas as pd + +# NOTE: Had to install the package with the following command for the import to work. +# python3 -m pip install -e '.[dev]' +from .utils import * + +# %% ../nbs/04_clean_datasets.ipynb 4 +try: + # This will work when running as a script + script_dir = os.path.dirname(os.path.abspath(__file__)) +except NameError: + # This will work when running in a Jupyter notebook + script_dir = os.getcwd() + +parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir)) +log_dir = os.path.join(parent_dir, 'logs') +data_dir = os.path.join(parent_dir, 'data') +clean_data_dir = os.path.join(parent_dir, 'data/clean') + +if not os.path.exists(log_dir): + os.makedirs(log_dir) + +if not os.path.exists(data_dir): + os.makedirs(data_dir) + +if not os.path.exists(clean_data_dir): + os.makedirs(clean_data_dir) + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'clean_datasets.log'), filemode='a') + +# %% ../nbs/04_clean_datasets.ipynb 7 +with open(os.path.join(data_dir, 'matches.txt'), encoding='utf-8') as f: + matches = json.loads(f.readlines()[-1])['matches'] + +df = [] +for match in matches: + df.append({ + 'start_at': match['start_date'], + 'end_at': match['end_date'], + 'home_team': match['participants'][0]['name'], + 'away_team': match['participants'][1]['name'], + 'score': match['winning_margin'], + }) +df = pd.DataFrame(df) +df['start_at'] = pd.to_datetime(df['start_at']) +df['end_at'] = pd.to_datetime(df['start_at']) +df['match_id'] = df.index + 1 +df = df[['match_id', 'start_at', 'end_at', 'home_team', 'away_team', 'score']] + +df.to_csv(os.path.join(clean_data_dir, 'matches.csv'), index=False) + +print(df.shape) +df.head() diff --git a/isl_2024/scrape_wallstream.py b/isl_2024/scrape_wallstream.py index 0d0432a..cf839fa 100644 --- a/isl_2024/scrape_wallstream.py +++ b/isl_2024/scrape_wallstream.py @@ -29,7 +29,7 @@ parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir)) log_dir = os.path.join(parent_dir, 'logs') data_dir = os.path.join(parent_dir, 'data/wallstream') - + if not os.path.exists(log_dir): os.makedirs(log_dir) diff --git a/nbs/03_scrape_wallstream.ipynb b/nbs/03_scrape_wallstream.ipynb index fc01e59..b28a582 100644 --- a/nbs/03_scrape_wallstream.ipynb +++ b/nbs/03_scrape_wallstream.ipynb @@ -67,7 +67,7 @@ "parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n", "log_dir = os.path.join(parent_dir, 'logs')\n", "data_dir = os.path.join(parent_dir, 'data/wallstream')\n", - " \n", + "\n", "if not os.path.exists(log_dir):\n", " os.makedirs(log_dir)\n", "\n", diff --git a/nbs/04_clean_datasets.ipynb b/nbs/04_clean_datasets.ipynb new file mode 100644 index 0000000..3369c67 --- /dev/null +++ b/nbs/04_clean_datasets.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| default_exp clean_datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clean datasets\n", + "\n", + "> Clean raw datasets for the Shiny app." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import json\n", + "import logging\n", + "import os\n", + "import requests\n", + "\n", + "import pandas as pd\n", + "\n", + "# NOTE: Had to install the package with the following command for the import to work.\n", + "# python3 -m pip install -e '.[dev]'\n", + "from isl_2024.utils import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "try:\n", + " # This will work when running as a script\n", + " script_dir = os.path.dirname(os.path.abspath(__file__))\n", + "except NameError:\n", + " # This will work when running in a Jupyter notebook\n", + " script_dir = os.getcwd()\n", + "\n", + "parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n", + "log_dir = os.path.join(parent_dir, 'logs')\n", + "data_dir = os.path.join(parent_dir, 'data')\n", + "clean_data_dir = os.path.join(parent_dir, 'data/clean')\n", + "\n", + "if not os.path.exists(log_dir):\n", + " os.makedirs(log_dir)\n", + "\n", + "if not os.path.exists(data_dir):\n", + " os.makedirs(data_dir)\n", + "\n", + "if not os.path.exists(clean_data_dir):\n", + " os.makedirs(clean_data_dir)\n", + "\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'clean_datasets.log'), filemode='a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matches" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(84, 6)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " | match_id | \n", + "start_at | \n", + "end_at | \n", + "home_team | \n", + "away_team | \n", + "score | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "1 | \n", + "2024-09-13 19:30:00+05:30 | \n", + "2024-09-13 19:30:00+05:30 | \n", + "Mohun Bagan Super Giant | \n", + "Mumbai City FC | \n", + "2 - 2 | \n", + "
1 | \n", + "2 | \n", + "2024-09-14 17:00:00+05:30 | \n", + "2024-09-14 17:00:00+05:30 | \n", + "Odisha FC | \n", + "Chennaiyin FC | \n", + "2 - 3 | \n", + "
2 | \n", + "3 | \n", + "2024-09-14 19:30:00+05:30 | \n", + "2024-09-14 19:30:00+05:30 | \n", + "Bengaluru FC | \n", + "East Bengal FC | \n", + "1 - 0 | \n", + "
3 | \n", + "4 | \n", + "2024-09-15 19:30:00+05:30 | \n", + "2024-09-15 19:30:00+05:30 | \n", + "Kerala Blasters FC | \n", + "Punjab FC | \n", + "\n", + " |
4 | \n", + "5 | \n", + "2024-09-16 19:30:00+05:30 | \n", + "2024-09-16 19:30:00+05:30 | \n", + "Mohammedan SC | \n", + "NorthEast United FC | \n", + "\n", + " |