Skip to content

Commit

Permalink
First scraper to download match summary
Browse files Browse the repository at this point in the history
  • Loading branch information
bkowshik committed Sep 14, 2024
1 parent 9239ad5 commit 1bceee8
Show file tree
Hide file tree
Showing 8 changed files with 236 additions and 215 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,4 @@ cython_debug/

# Added by Bhargav.
_proc/
data/
59 changes: 11 additions & 48 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
# isl-2024
# Indian Super League, 2024


<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

This file will become your README and also the index of your
documentation.
## Installation

## Developer Guide
Install latest from the GitHub
[repository](https://github.com/bkowshik/isl-2024):

If you are new to using `nbdev` here are some useful pointers to get you
started.
``` bash
$ pip install git+https://github.com/bkowshik/isl-2024.git
```

### Install isl_2024 in Development mode
## Development

``` sh
``` bash
# make sure isl_2024 package is installed in development mode
$ pip install -e .

Expand All @@ -24,44 +25,6 @@ $ pip install -e .
$ nbdev_prepare
```

## Usage

### Installation

Install latest from the GitHub
[repository](https://github.com/bkowshik/isl-2024):

``` sh
$ pip install git+https://github.com/bkowshik/isl-2024.git
```

or from [conda](https://anaconda.org/bkowshik/isl-2024)

``` sh
$ conda install -c bkowshik isl_2024
```

or from [pypi](https://pypi.org/project/isl-2024/)

``` sh
$ pip install isl_2024
```

### Documentation

Documentation can be found hosted on this GitHub
[repository](https://github.com/bkowshik/isl-2024)’s
[pages](https://bkowshik.github.io/isl-2024/). Additionally you can find
package manager specific guidelines on
[conda](https://anaconda.org/bkowshik/isl-2024) and
[pypi](https://pypi.org/project/isl-2024/) respectively.

## How to use

Fill me in please! Don’t forget code examples:

``` python
1+1
```
## Scrapers

2
1. Scrape matches.
2 changes: 1 addition & 1 deletion isl_2024/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
'doc_host': 'https://bkowshik.github.io',
'git_url': 'https://github.com/bkowshik/isl-2024',
'lib_path': 'isl_2024'},
'syms': {'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')}}}
'syms': {'isl_2024.core': {'isl_2024.core.foo': ('core.html#foo', 'isl_2024/core.py')}, 'isl_2024.scrape_matches': {}}}
52 changes: 52 additions & 0 deletions isl_2024/scrape_matches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Script data about matches."""

# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_scrape_matches.ipynb.

# %% auto 0
__all__ = ['parent_dir', 'log_dir', 'data_dir', 'url', 'headers', 'response']

# %% ../nbs/01_scrape_matches.ipynb 2
import warnings
warnings.filterwarnings('ignore')

import json
import logging
import os
import requests

# %% ../nbs/01_scrape_matches.ipynb 4
try:
# This will work when running as a script
script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
# This will work when running in a Jupyter notebook
script_dir = os.getcwd()

parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))
log_dir = os.path.join(parent_dir, 'logs')
data_dir = os.path.join(parent_dir, 'data')

if not os.path.exists(log_dir):
os.makedirs(log_dir)

if not os.path.exists(data_dir):
os.makedirs(data_dir)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_matches.log'), filemode='a')

# %% ../nbs/01_scrape_matches.ipynb 5
url = 'https://www.indiansuperleague.com/default.aspx?methodtype=3&client=3747164737&sport=2&league=india_sl_stats&timezone=0530&language=&tournament=india_sl_stats_2024'
headers = {
'accept': '*/*',
'referer': 'https://www.indiansuperleague.com/',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=headers)

# %% ../nbs/01_scrape_matches.ipynb 6
if response.status_code == 200:
logging.info('API request successful. Content length: {}'.format(len(response.content)))
with open(os.path.join(data_dir, 'scrape_matches.txt'), 'a') as f:
f.write(response.text + "\n")
else:
logging.error('API request failed. Status code: {}'.format(response.status_code))
61 changes: 0 additions & 61 deletions nbs/00_core.ipynb

This file was deleted.

134 changes: 134 additions & 0 deletions nbs/01_scrape_matches.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"#| default_exp scrape_matches"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Scrape matches\n",
"\n",
"> Script data about matches."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"import json\n",
"import logging\n",
"import os\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"from nbdev.showdoc import *"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"try:\n",
" # This will work when running as a script\n",
" script_dir = os.path.dirname(os.path.abspath(__file__))\n",
"except NameError:\n",
" # This will work when running in a Jupyter notebook\n",
" script_dir = os.getcwd()\n",
"\n",
"parent_dir = os.path.abspath(os.path.join(script_dir, os.pardir))\n",
"log_dir = os.path.join(parent_dir, 'logs')\n",
"data_dir = os.path.join(parent_dir, 'data')\n",
" \n",
"if not os.path.exists(log_dir):\n",
" os.makedirs(log_dir)\n",
"\n",
"if not os.path.exists(data_dir):\n",
" os.makedirs(data_dir)\n",
"\n",
"logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(log_dir, 'scrape_matches.log'), filemode='a')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"url = 'https://www.indiansuperleague.com/default.aspx?methodtype=3&client=3747164737&sport=2&league=india_sl_stats&timezone=0530&language=&tournament=india_sl_stats_2024'\n",
"headers = {\n",
" 'accept': '*/*',\n",
" 'referer': 'https://www.indiansuperleague.com/',\n",
" 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36'\n",
"}\n",
"response = requests.get(url, headers=headers)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| export\n",
"if response.status_code == 200:\n",
" logging.info('API request successful. Content length: {}'.format(len(response.content)))\n",
" with open(os.path.join(data_dir, 'scrape_matches.txt'), 'a') as f:\n",
" f.write(response.text + \"\\n\")\n",
"else:\n",
" logging.error('API request failed. Status code: {}'.format(response.status_code))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"import nbdev; nbdev.nbdev_export()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit 1bceee8

Please sign in to comment.