Skip to content

Commit

Permalink
Scrape match commentary
Browse files Browse the repository at this point in the history
  • Loading branch information
bkowshik committed Sep 14, 2024
1 parent 2db98b6 commit 2f93fdd
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 8 deletions.
3 changes: 2 additions & 1 deletion isl_2024/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
'lib_path': 'isl_2024'},
'syms': { 'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')},
'isl_2024.scrape_live_stats': {},
'isl_2024.scrape_matches': {}}}
'isl_2024.scrape_matches': {},
'isl_2024.scrape_wallstream': {}}}
12 changes: 6 additions & 6 deletions isl_2024/scrape_live_stats.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Live stats of a match."""

# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_scrape_live_stats.ipynb.
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_scrape_live_stats.ipynb.

# %% auto 0
__all__ = ['parent_dir', 'log_dir', 'data_dir', 'match_id', 'url', 'headers', 'response']

# %% ../nbs/01_scrape_live_stats.ipynb 2
# %% ../nbs/02_scrape_live_stats.ipynb 2
import warnings
warnings.filterwarnings('ignore')

Expand All @@ -14,7 +14,7 @@
import os
import requests

# %% ../nbs/01_scrape_live_stats.ipynb 4
# %% ../nbs/02_scrape_live_stats.ipynb 4
try:
# This will work when running as a script
script_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -34,8 +34,8 @@

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_live_stats.log'), filemode='a')

# %% ../nbs/01_scrape_live_stats.ipynb 5
match_id = 66794
# %% ../nbs/02_scrape_live_stats.ipynb 5
match_id = 66795
url = f'https://www.indiansuperleague.com/football/live/india_sl_stats/json/{match_id}.json'
headers = {
'accept': '*/*',
Expand All @@ -44,7 +44,7 @@
}
response = requests.get(url, headers=headers)

# %% ../nbs/01_scrape_live_stats.ipynb 6
# %% ../nbs/02_scrape_live_stats.ipynb 6
if response.status_code == 200:
logging.info('API request successful. Content length: {}'.format(len(response.content)))
with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:
Expand Down
53 changes: 53 additions & 0 deletions isl_2024/scrape_wallstream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Match commentary."""

# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_scrape_wallstream.ipynb.

# %% auto 0
__all__ = ['parent_dir', 'log_dir', 'data_dir', 'match_id', 'url', 'headers', 'response']

# %% ../nbs/03_scrape_wallstream.ipynb 2
import warnings
warnings.filterwarnings('ignore')

import json
import logging
import os
import requests

# %% ../nbs/03_scrape_wallstream.ipynb 4
try:
# This will work when running as a script
script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
# This will work when running in a Jupyter notebook
script_dir = os.getcwd()

parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
log_dir = os.path.join(parent_dir, 'logs')
data_dir = os.path.join(parent_dir, 'data/wallstream')

if not os.path.exists(log_dir):
os.makedirs(log_dir)

if not os.path.exists(data_dir):
os.makedirs(data_dir)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_wallstream.log'), filemode='a')

# %% ../nbs/03_scrape_wallstream.ipynb 5
match_id = 66795
url = f"https://www.indiansuperleague.com/functions/wallstream/?sport_id=2&client_id=5KEUfrMT/+2lgecJyh42zA==&match_id={match_id}"
headers = {
'accept': '*/*',
'referer': 'https://www.indiansuperleague.com/',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers)

# %% ../nbs/03_scrape_wallstream.ipynb 6
if response.status_code == 200:
logging.info('API request successful. Content length: {}'.format(len(response.content)))
with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:
f.write(response.text + "\n")
else:
logging.error('API request failed. Status code: {}'.format(response.status_code))
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"outputs": [],
"source": [
"#| export\n",
"match_id = 66794\n",
"match_id = 66795\n",
"url = f'https://www.indiansuperleague.com/football/live/india_sl_stats/json/{match_id}.json'\n",
"headers = {\n",
" 'accept': '*/*',\n",
Expand Down
135 changes: 135 additions & 0 deletions nbs/03_scrape_wallstream.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"#| default_exp scrape_wallstream"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrape wallstream\n",
"\n",
"> Match commentary."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"import json\n",
"import logging\n",
"import os\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"from nbdev.showdoc import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"try:\n",
" # This will work when running as a script\n",
" script_dir = os.path.dirname(os.path.abspath(__file__))\n",
"except NameError:\n",
" # This will work when running in a Jupyter notebook\n",
" script_dir = os.getcwd()\n",
"\n",
"parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n",
"log_dir = os.path.join(parent_dir, 'logs')\n",
"data_dir = os.path.join(parent_dir, 'data/wallstream')\n",
" \n",
"if not os.path.exists(log_dir):\n",
" os.makedirs(log_dir)\n",
"\n",
"if not os.path.exists(data_dir):\n",
" os.makedirs(data_dir)\n",
"\n",
"logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_wallstream.log'), filemode='a')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"match_id = 66795\n",
"url = f\"https://www.indiansuperleague.com/functions/wallstream/?sport_id=2&client_id=5KEUfrMT/+2lgecJyh42zA==&match_id={match_id}\"\n",
"headers = {\n",
" 'accept': '*/*',\n",
" 'referer': 'https://www.indiansuperleague.com/',\n",
" 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'\n",
"}\n",
"response = requests.get(url, headers=headers)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"if response.status_code == 200:\n",
" logging.info('API request successful. Content length: {}'.format(len(response.content)))\n",
" with open(os.path.join(data_dir, f'{match_id}.txt'), 'a') as f:\n",
" f.write(response.text + \"\\n\")\n",
"else:\n",
" logging.error('API request failed. Status code: {}'.format(response.status_code))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"import nbdev; nbdev.nbdev_export()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

0 comments on commit 2f93fdd

Please sign in to comment.