-
Notifications
You must be signed in to change notification settings - Fork 1
/
stackoverflow.py
42 lines (31 loc) · 983 Bytes
/
stackoverflow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import requests
from bs4 import BeautifulSoup
import json
res = requests.get("https://law.stackexchange.com/questions")
print(res.text)
soup = BeautifulSoup(res.text, "html.parser")
print(soup)
questions_data = {
"questions": []
}
questions = soup.select(".question-summary")
# get data for one question
# print(questions[0].select_one('.question-hyperlink').getText())
# get data for all questions
for que in questions:
q = que.select_one('.question-hyperlink').getText()
vote_count = que.select_one('.vote-count-post').getText()
views = que.select_one('.views').attrs['title']
questions_data['questions'].append({
"questions": q,
"views": views,
"vote_count": vote_count
})
# print(views)
# print(vote_count)
# print(q)
json_data = json.dumps(questions_data)
print(json_data)
# copy output json from terminal and validate using the following site
# https://jsonlint.com/
# see README.txt for sample scraped data