Scrape match commentary

bkowshik · Sep 14, 2024 · 2f93fdd · 2f93fdd
1 parent 2db98b6
commit 2f93fdd
Show file tree

Hide file tree

Showing 5 changed files with 197 additions and 8 deletions.
diff --git a/isl_2024/_modidx.py b/isl_2024/_modidx.py
@@ -7,4 +7,5 @@
                 'lib_path': 'isl_2024'},
   'syms': { 'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')},
             'isl_2024.scrape_live_stats': {},
-            'isl_2024.scrape_matches': {}}}
+            'isl_2024.scrape_matches': {},
+            'isl_2024.scrape_wallstream': {}}}
diff --git a/isl_2024/scrape_live_stats.py b/isl_2024/scrape_live_stats.py
@@ -1,11 +1,11 @@
 """Live stats of a match."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_scrape_live_stats.ipynb.
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_scrape_live_stats.ipynb.
 
 # %% auto 0
 __all__ = ['parent_dir', 'log_dir', 'data_dir', 'match_id', 'url', 'headers', 'response']
 
-# %% ../nbs/01_scrape_live_stats.ipynb 2
+# %% ../nbs/02_scrape_live_stats.ipynb 2
 import warnings
 warnings.filterwarnings('ignore')
 
@@ -14,7 +14,7 @@
 import os
 import requests
 
-# %% ../nbs/01_scrape_live_stats.ipynb 4
+# %% ../nbs/02_scrape_live_stats.ipynb 4
 try:
     # This will work when running as a script
     script_dir = os.path.dirname(os.path.abspath(__file__))
@@ -34,8 +34,8 @@
 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_live_stats.log'), filemode='a')
 
-# %% ../nbs/01_scrape_live_stats.ipynb 5
-match_id = 66794
+# %% ../nbs/02_scrape_live_stats.ipynb 5
+match_id = 66795
 url = f'https://www.indiansuperleague.com/football/live/india_sl_stats/json/{match_id}.json'
 headers = {
     'accept': '*/*',
@@ -44,7 +44,7 @@
 }
 response = requests.get(url, headers=headers)
 
-# %% ../nbs/01_scrape_live_stats.ipynb 6
+# %% ../nbs/02_scrape_live_stats.ipynb 6
 if response.status_code == 200:
     logging.info('API request successful. Content length: {}'.format(len(response.content)))
     with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:

diff --git a/isl_2024/scrape_wallstream.py b/isl_2024/scrape_wallstream.py
@@ -0,0 +1,53 @@
+"""Match commentary."""
+
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_scrape_wallstream.ipynb.
+
+# %% auto 0
+__all__ = ['parent_dir', 'log_dir', 'data_dir', 'match_id', 'url', 'headers', 'response']
+
+# %% ../nbs/03_scrape_wallstream.ipynb 2
+import warnings
+warnings.filterwarnings('ignore')
+
+import json
+import logging
+import os
+import requests
+
+# %% ../nbs/03_scrape_wallstream.ipynb 4
+try:
+    # This will work when running as a script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+except NameError:
+    # This will work when running in a Jupyter notebook
+    script_dir = os.getcwd()
+
+parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
+log_dir = os.path.join(parent_dir, 'logs')
+data_dir = os.path.join(parent_dir, 'data/wallstream')
+
+if not os.path.exists(log_dir):
+    os.makedirs(log_dir)
+
+if not os.path.exists(data_dir):
+    os.makedirs(data_dir)
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_wallstream.log'), filemode='a')
+
+# %% ../nbs/03_scrape_wallstream.ipynb 5
+match_id = 66795
+url = f"https://www.indiansuperleague.com/functions/wallstream/?sport_id=2&client_id=5KEUfrMT/+2lgecJyh42zA==&match_id={match_id}"
+headers = {
+    'accept': '*/*',
+    'referer': 'https://www.indiansuperleague.com/',
+    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
+}
+response = requests.get(url, headers=headers)
+
+# %% ../nbs/03_scrape_wallstream.ipynb 6
+if response.status_code == 200:
+    logging.info('API request successful. Content length: {}'.format(len(response.content)))
+    with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:
+        f.write(response.text + "\n")
+else:
+    logging.error('API request failed. Status code: {}'.format(response.status_code))
diff --git a/nbs/01_scrape_live_stats.ipynb → nbs/02_scrape_live_stats.ipynb b/nbs/01_scrape_live_stats.ipynb → nbs/02_scrape_live_stats.ipynb
@@ -80,7 +80,7 @@
    "outputs": [],
    "source": [
     "#| export\n",
-    "match_id = 66794\n",
+    "match_id = 66795\n",
     "url = f'https://www.indiansuperleague.com/football/live/india_sl_stats/json/{match_id}.json'\n",
     "headers = {\n",
     "    'accept': '*/*',\n",

diff --git a/nbs/03_scrape_wallstream.ipynb b/nbs/03_scrape_wallstream.ipynb
@@ -0,0 +1,135 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "#| default_exp scrape_wallstream"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Scrape wallstream\n",
+    "\n",
+    "> Match commentary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "import json\n",
+    "import logging\n",
+    "import os\n",
+    "import requests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "from nbdev.showdoc import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "try:\n",
+    "    # This will work when running as a script\n",
+    "    script_dir = os.path.dirname(os.path.abspath(__file__))\n",
+    "except NameError:\n",
+    "    # This will work when running in a Jupyter notebook\n",
+    "    script_dir = os.getcwd()\n",
+    "\n",
+    "parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n",
+    "log_dir = os.path.join(parent_dir, 'logs')\n",
+    "data_dir = os.path.join(parent_dir, 'data/wallstream')\n",
+    "    \n",
+    "if not os.path.exists(log_dir):\n",
+    "    os.makedirs(log_dir)\n",
+    "\n",
+    "if not os.path.exists(data_dir):\n",
+    "    os.makedirs(data_dir)\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_wallstream.log'), filemode='a')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "match_id = 66795\n",
+    "url = f\"https://www.indiansuperleague.com/functions/wallstream/?sport_id=2&client_id=5KEUfrMT/+2lgecJyh42zA==&match_id={match_id}\"\n",
+    "headers = {\n",
+    "    'accept': '*/*',\n",
+    "    'referer': 'https://www.indiansuperleague.com/',\n",
+    "    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'\n",
+    "}\n",
+    "response = requests.get(url, headers=headers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "if response.status_code == 200:\n",
+    "    logging.info('API request successful. Content length: {}'.format(len(response.content)))\n",
+    "    with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:\n",
+    "        f.write(response.text + \"\\n\")\n",
+    "else:\n",
+    "    logging.error('API request failed. Status code: {}'.format(response.status_code))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "import nbdev; nbdev.nbdev_export()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}