-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape.py
43 lines (39 loc) · 1.67 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import time
import urllib.request
import json
from bs4 import BeautifulSoup
from csv import writer
from datetime import datetime
def append_list_as_row(file_name, list_of_elem):
# Open file in append mode
with open(file_name, 'a+', newline='') as write_obj:
# Create a writer object from csv module
csv_writer = writer(write_obj)
# Add contents of list as last row in the csv file
csv_writer.writerow(list_of_elem)
record = 11 * 6 # run the script for 66 record events (i.e. 11 hours w/ 10 min freq)
while record > 0:
source = urllib.request.urlopen('https://portal.rockgympro.com/portal/public/bc4d3be86f2f8564a4e5e4f9151f6bf6/occupancy?&iframeid=occupancyCounter&fId=1837').read()
soup = BeautifulSoup(source,'html.parser')
# find the person count in the json bit stored in the html
# cleaning json is not intuitive...
rawJ = soup.find_all('script')[2]
J = str(rawJ)
J1 = J.split("'CNP' : ") # this is the climb nittany data, they also had other gym
J2 = J1[1].split(';')
J3 = J2[0].replace("\n", "")
J3 = J3.split("'BGT' : ")[0] # had to add this to get rid of the new 'BGT' line in the JSON...
J4 = J3.rsplit(',',1)
J5 = J4[0].replace("'",'"') # not sure why their code uses '' instead of ""
s = json.loads(J5)
c = s["count"]
# get the date and time
now = datetime.now()
ct = now.strftime("%D %H:%M:%S")
wkd = now.weekday()
# write the data to csv
append_list_as_row('data.csv', [ct,wkd,c])
print(str(c)+' people at gym on '+ct)
record -= 1 # remove one record log
# sleep for 10 minutes
time.sleep(10*60)