-
Notifications
You must be signed in to change notification settings - Fork 0
/
jobrapido.py
46 lines (34 loc) · 1.36 KB
/
jobrapido.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import asyncio
import json
import re
import aiohttp
from bs4 import BeautifulSoup
from classes import Job
async def fetch_page(session, url: str) -> str:
async with session.get(url) as response:
return await response.text()
async def get_jobrapido_page(search: str, page: int) -> list:
jobs = []
url = f"https://br.jobrapido.com/Vagas-de-Emprego-para-{search}?p={page}"
async with aiohttp.ClientSession() as session:
html_body = await fetch_page(session, url)
soup = BeautifulSoup(html_body, "html.parser")
possible_jobs = soup.find_all("div", {"class": "result-item js-result-item"})
for job_element in possible_jobs:
jobs_str = job_element.attrs['data-advert']
job_attributes = json.loads(jobs_str)
job = Job(
title=re.sub(re.compile(r'<.*?>'), '', job_attributes['title']), # exclude tags
company=job_attributes['company'],
location=job_attributes['location'],
link=job_attributes['openAdvertUrl'],
)
jobs.append(job)
return jobs
async def get_jobrapido(search: str) -> list:
tasks = []
for page in range(1, 20):
tasks.append(get_jobrapido_page(search, page))
results = await asyncio.gather(*tasks)
jobs = [job for sublist in results for job in sublist]
return jobs