-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathgetHtml.py
47 lines (40 loc) · 1.3 KB
/
getHtml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# use urllib to get resources
import socket
import urllib.request
class GetHtml:
def __init__(self):
self._url = 'http://www.google.com'
def set(self, url, header=None, retryTimes=10):
'''
url: url to get
header: as {'Accept':'application/json'}
'''
self._url = url
self._header = header
self._retryTimes = retryTimes
def get(self):
req = urllib.request.Request(self._url)
if(self._header):
for key in self._header:
req.add_header(key, self._header[key])
is_error = True
s_retry = 0
r_data = None
while(is_error and s_retry < 10):
try:
r = urllib.request.urlopen(req, timeout=5)
r_data = r.read()
is_error = False
except urllib.error.HTTPError:
is_error = True
s_retry += 1
print('HTTPError Retry', s_retry, 'times')
except urllib.error.URLError:
is_error = True
s_retry += 1
print('URLError Retry', s_retry, 'times')
except socket.timeout:
is_error = True
s_retry += 1
print('SOCKETError Retry', s_retry, 'times')
return r_data