From 2f93fdd4b1062019da98019cb15318f23a073faf Mon Sep 17 00:00:00 2001 From: Bhargav Kowshik Date: Sat, 14 Sep 2024 18:24:16 +0530 Subject: [PATCH] Scrape match commentary --- isl_2024/_modidx.py | 3 +- isl_2024/scrape_live_stats.py | 12 +- isl_2024/scrape_wallstream.py | 53 +++++++ ...stats.ipynb => 02_scrape_live_stats.ipynb} | 2 +- nbs/03_scrape_wallstream.ipynb | 135 ++++++++++++++++++ 5 files changed, 197 insertions(+), 8 deletions(-) create mode 100644 isl_2024/scrape_wallstream.py rename nbs/{01_scrape_live_stats.ipynb => 02_scrape_live_stats.ipynb} (99%) create mode 100644 nbs/03_scrape_wallstream.ipynb diff --git a/isl_2024/_modidx.py b/isl_2024/_modidx.py index b222107..afd3e9b 100644 --- a/isl_2024/_modidx.py +++ b/isl_2024/_modidx.py @@ -7,4 +7,5 @@ 'lib_path': 'isl_2024'}, 'syms': { 'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')}, 'isl_2024.scrape_live_stats': {}, - 'isl_2024.scrape_matches': {}}} + 'isl_2024.scrape_matches': {}, + 'isl_2024.scrape_wallstream': {}}} diff --git a/isl_2024/scrape_live_stats.py b/isl_2024/scrape_live_stats.py index a51926c..5c75d89 100644 --- a/isl_2024/scrape_live_stats.py +++ b/isl_2024/scrape_live_stats.py @@ -1,11 +1,11 @@ """Live stats of a match.""" -# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_scrape_live_stats.ipynb. +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_scrape_live_stats.ipynb. # %% auto 0 __all__ = ['parent_dir', 'log_dir', 'data_dir', 'match_id', 'url', 'headers', 'response'] -# %% ../nbs/01_scrape_live_stats.ipynb 2 +# %% ../nbs/02_scrape_live_stats.ipynb 2 import warnings warnings.filterwarnings('ignore') @@ -14,7 +14,7 @@ import os import requests -# %% ../nbs/01_scrape_live_stats.ipynb 4 +# %% ../nbs/02_scrape_live_stats.ipynb 4 try: # This will work when running as a script script_dir = os.path.dirname(os.path.abspath(__file__)) @@ -34,8 +34,8 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_live_stats.log'), filemode='a') -# %% ../nbs/01_scrape_live_stats.ipynb 5 -match_id = 66794 +# %% ../nbs/02_scrape_live_stats.ipynb 5 +match_id = 66795 url = f'https://www.indiansuperleague.com/football/live/india_sl_stats/json/{match_id}.json' headers = { 'accept': '*/*', @@ -44,7 +44,7 @@ } response = requests.get(url, headers=headers) -# %% ../nbs/01_scrape_live_stats.ipynb 6 +# %% ../nbs/02_scrape_live_stats.ipynb 6 if response.status_code == 200: logging.info('API request successful. Content length: {}'.format(len(response.content))) with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f: diff --git a/isl_2024/scrape_wallstream.py b/isl_2024/scrape_wallstream.py new file mode 100644 index 0000000..6be426d --- /dev/null +++ b/isl_2024/scrape_wallstream.py @@ -0,0 +1,53 @@ +"""Match commentary.""" + +# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_scrape_wallstream.ipynb. + +# %% auto 0 +__all__ = ['parent_dir', 'log_dir', 'data_dir', 'match_id', 'url', 'headers', 'response'] + +# %% ../nbs/03_scrape_wallstream.ipynb 2 +import warnings +warnings.filterwarnings('ignore') + +import json +import logging +import os +import requests + +# %% ../nbs/03_scrape_wallstream.ipynb 4 +try: + # This will work when running as a script + script_dir = os.path.dirname(os.path.abspath(__file__)) +except NameError: + # This will work when running in a Jupyter notebook + script_dir = os.getcwd() + +parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir)) +log_dir = os.path.join(parent_dir, 'logs') +data_dir = os.path.join(parent_dir, 'data/wallstream') + +if not os.path.exists(log_dir): + os.makedirs(log_dir) + +if not os.path.exists(data_dir): + os.makedirs(data_dir) + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_wallstream.log'), filemode='a') + +# %% ../nbs/03_scrape_wallstream.ipynb 5 +match_id = 66795 +url = f"https://www.indiansuperleague.com/functions/wallstream/?sport_id=2&client_id=5KEUfrMT/+2lgecJyh42zA==&match_id={match_id}" +headers = { + 'accept': '*/*', + 'referer': 'https://www.indiansuperleague.com/', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36' +} +response = requests.get(url, headers=headers) + +# %% ../nbs/03_scrape_wallstream.ipynb 6 +if response.status_code == 200: + logging.info('API request successful. Content length: {}'.format(len(response.content))) + with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f: + f.write(response.text + "\n") +else: + logging.error('API request failed. Status code: {}'.format(response.status_code)) diff --git a/nbs/01_scrape_live_stats.ipynb b/nbs/02_scrape_live_stats.ipynb similarity index 99% rename from nbs/01_scrape_live_stats.ipynb rename to nbs/02_scrape_live_stats.ipynb index af3a263..852c170 100644 --- a/nbs/01_scrape_live_stats.ipynb +++ b/nbs/02_scrape_live_stats.ipynb @@ -80,7 +80,7 @@ "outputs": [], "source": [ "#| export\n", - "match_id = 66794\n", + "match_id = 66795\n", "url = f'https://www.indiansuperleague.com/football/live/india_sl_stats/json/{match_id}.json'\n", "headers = {\n", " 'accept': '*/*',\n", diff --git a/nbs/03_scrape_wallstream.ipynb b/nbs/03_scrape_wallstream.ipynb new file mode 100644 index 0000000..7d654e0 --- /dev/null +++ b/nbs/03_scrape_wallstream.ipynb @@ -0,0 +1,135 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| default_exp scrape_wallstream" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Scrape wallstream\n", + "\n", + "> Match commentary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import json\n", + "import logging\n", + "import os\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "from nbdev.showdoc import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "try:\n", + " # This will work when running as a script\n", + " script_dir = os.path.dirname(os.path.abspath(__file__))\n", + "except NameError:\n", + " # This will work when running in a Jupyter notebook\n", + " script_dir = os.getcwd()\n", + "\n", + "parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n", + "log_dir = os.path.join(parent_dir, 'logs')\n", + "data_dir = os.path.join(parent_dir, 'data/wallstream')\n", + " \n", + "if not os.path.exists(log_dir):\n", + " os.makedirs(log_dir)\n", + "\n", + "if not os.path.exists(data_dir):\n", + " os.makedirs(data_dir)\n", + "\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_wallstream.log'), filemode='a')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "match_id = 66795\n", + "url = f\"https://www.indiansuperleague.com/functions/wallstream/?sport_id=2&client_id=5KEUfrMT/+2lgecJyh42zA==&match_id={match_id}\"\n", + "headers = {\n", + " 'accept': '*/*',\n", + " 'referer': 'https://www.indiansuperleague.com/',\n", + " 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'\n", + "}\n", + "response = requests.get(url, headers=headers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "if response.status_code == 200:\n", + " logging.info('API request successful. Content length: {}'.format(len(response.content)))\n", + " with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:\n", + " f.write(response.text + \"\\n\")\n", + "else:\n", + " logging.error('API request failed. Status code: {}'.format(response.status_code))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "import nbdev; nbdev.nbdev_export()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}