-
Notifications
You must be signed in to change notification settings - Fork 23
/
handlers.py
54 lines (41 loc) · 1.72 KB
/
handlers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import copy
import json
import logging
from smart_open import smart_open
logger = logging.getLogger(__name__)
class JSONWriter:
def __init__(self, name):
self._file = smart_open(name, 'w')
def page(self, page, content):
if page is not None and page != "":
newcontent = copy.copy(content)
newcontent["url"] = page
self._file.write(json.dumps(newcontent) + "\n")
else:
logger.info("Skipping page %s, page attribute is missing", page)
def finish(self):
self._file.close()
class CSVWriter:
# Note: The CSVWriter has several bugs and assumptions, as documented below.
def __init__(self, name):
self._file = smart_open(name, 'w')
def page(self, page, content):
if page is not None and page != "":
page = page.encode("utf-8")
page = page.replace('"', '')
page = page.replace('"', '')
self._file.write('"%(page)s"' % {'page': page})
# for type in content:
# For CSV, read only these fields, in only this order.
newcontent = {}
for type in ['d:Title', 'd:Description', 'priority', 'topic']:
newcontent[type] = content[type].encode("utf-8")
newcontent[type] = newcontent[type].replace('"', '')
newcontent[type] = newcontent[type].replace('"', '')
# BUG: Convert comma to something else? Otherwise, it will trip up the CSV parser.
self._file.write(',"%s"' % newcontent[type])
self._file.write("\n")
else:
logger.info("Skipping page %s, page attribute is missing", page)
def finish(self):
self._file.close()