-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadd_age.py
55 lines (50 loc) · 1.54 KB
/
add_age.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import csv
import sys
import json
import util
import pprint
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from urllib.request import urlopen
from util import make_list
from util import print_table
import make_main
pp = pprint.PrettyPrinter()
INDEX_MIN = 2
link_horseinfo = "https://racing.hkjc.com/racing/information/chinese/Horse/HorseSearch.aspx?HorseName=&SearchType=BrandNumber&BrandNumber="
def get_age(url, horse_id):
# get response from url
tables = []
while len(tables) < INDEX_MIN:
# print(len(tables), end=' ')
driver = webdriver.Chrome()
driver.get(url + horse_id)
time.sleep(3)
# driver.implicitly_wait(3)
soup = BeautifulSoup(driver.page_source, 'lxml')
tables = soup.find_all('table')
driver.quit()
if (len(tables) <= 4):
return "-"
if make_list(tables[4])[0][0] != "出生地 / 馬齡":
return "-"
age = make_list(tables[4])[0][2].split("/")[1].lstrip(' ')
# print(horse_id, age)
return age
# read csv
filename = sys.argv[1]
table_main = []
with open(filename) as csvfile:
spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
cnt = -1
min, max = 0, 2147483647
for row in spamreader:
cnt += 1
if cnt == 0: continue
if cnt < min or cnt > max: continue
horse_id = row[9].split('(')[1][:-1]
if row[23] == "":
row[23] = get_age(link_horseinfo, horse_id)
util.write_table_append([row], "output/Data Base (2018-2019) new.csv")