-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsteam_store_scrapper.py
91 lines (68 loc) · 2.43 KB
/
steam_store_scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import requests
from bs4 import BeautifulSoup
import sqlite3
# Connect to our database
connection = sqlite3.connect('steam_prices.db')
# Make our cursor
c = connection.cursor()
# Max ammount of pages at steam
PAGE_NUM = 2653
def steam_scrapper(page_num):
# Make the lists to store everything
game_titles = []
game_links = []
game_prices = []
# Link to visit every page
url_page = "https://store.steampowered.com/search/?page="
# Keep track of the page we are on
page = 1
while(page != page_num):
# Make the connection and the soup
r = requests.get(url_page + str(page))
soup = BeautifulSoup(r.text,'html.parser')
try:
# Get all the search items
search_results = soup.find(id="search_resultsRows")
# Look at all the titles
titles = search_results.find_all("span","title")
for i in titles:
game_titles.append(i.text.strip())
# Get all links to games
links_to_games = search_results.find_all("a","search_result_row ds_collapse_flag")
# Get all the links to the games
for i in links_to_games:
game_links.append(i['href'])
# Get all the prices
for i in links_to_games:
try:
price = i.find("div","responsive_search_name_combined").find("div","col search_price responsive_secondrow").text.strip()
if len(price) is 0:
price = "no price found"
game_prices.append(price)
except:
pass
except:
print("something went wrong when scrapping")
pass
page +=1
return [game_titles,game_links,game_prices]
def data_entry(items):
count = len(items[0])
while(count != 0):
try:
game_name = items[0][count]
game_link = items[1][count]
game_price = items[2][count]
data(game_name,game_link,game_price)
except:
pass
print("something went wrong when getting the data from lists at " + str(count))
count -= 1
def data(game_name,game_link,game_price):
c.execute("INSERT INTO prices (gameName, price, link) VALUES (?,?,?)",
(game_name, game_price, game_link))
connection.commit()
items = steam_scrapper(PAGE_NUM)
data_entry(items)
c.close()
connection.close()