-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcrawlertools.py
44 lines (42 loc) · 1.34 KB
/
crawlertools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
import os
import hashlib
def get_image(url, path='', overwrite=0, hash=False): # overwrite 0 - skip, 1 - rename, 2 - overwrite
try:
s = requests.get(url)
except Exception as e:
print("Couldn't get image "+url)
print(e)
return
try:
filename = s.headers.get('Content-Disposition').replace('filename=', '', 1)
except:
filename = url.rsplit('/')[-1]
try:
filename, format = filename.rsplit('.')
format = '.'+format
except:
format = ''
try:
if path and not os.path.exists(path):
os.makedirs(path)
except:
pass
if hash:
filename = hashlib.md5(s.content).hexdigest()
if overwrite == 0:
if os.path.exists(os.path.join(path, filename+format)):
print(filename+format+' exists, skipped.')
return
elif overwrite == 1:
original_filename = filename
num = 2
while os.path.exists(os.path.join(path, filename+format)):
filename = original_filename+' ('+str(num)+')'
num += 1
try:
with open(os.path.join(path, filename+format), 'wb') as f:
f.write(s.content)
except Exception as e:
print("Couldn't get "+os.path.join(path, filename+format)+" : "+str(e))
print('Saved '+url+' as '+filename+format)