-
Notifications
You must be signed in to change notification settings - Fork 8
/
fix_tags.py
71 lines (57 loc) · 1.63 KB
/
fix_tags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pyes
from couchbase import Couchbase
import urllib2
import json
tb = Couchbase.connect("tags")
doc = urllib2.urlopen('http://localhost:8092/tags/_design/dev_qa/_view/get_tag_by_id').read()
doc = json.loads(doc)
count = len(doc['rows'])
i = 0
for row in doc['rows']:
i += 1
tag = tb.get(row['id']).value
tag['tid'] = i
print tag
tb.set(row['id'], tag)
if tag['count'] == 0:
tb.delete(row['id'])
count = count -1
print len(doc['rows'])
tb.set('tcount', len(doc['rows']))
es_conn = pyes.ES('http://localhost:9200/')
tags_mapping = {
'tag': {
'boost': 1.0,
'index': 'analyzed',
'store': 'yes',
'type': 'string',
"term_vector": "with_positions_offsets"
},
'tid': {
'boost': 1.0,
'index': 'not_analyzed',
'store': 'yes',
'type': 'integer',
"term_vector": "with_positions_offsets"
}
}
# Initialize indices for different buckets
try:
es_conn.indices.delete_index("questions")
es_conn.indices.create_index("tags")
except:
pass
es_conn.indices.put_mapping("tags-type", {'properties':tags_mapping}, ["tags"])
rows = urllib2.urlopen('http://localhost:8092/tags/_design/dev_qa/_view/get_tag_by_id').read()
rows = json.loads(rows)['rows']
tids_list = []
for row in rows:
tids_list.append(str(row['id']))
if len(tids_list) != 0:
val_res = tb.get_multi(tids_list)
tags = []
for tid in tids_list:
tags.append(val_res[str(tid)].value)
for tag in tags:
es_conn.index({'tag':tag['tag'], 'tid':tag['tid'], 'position':tag['tid']}, 'tags', 'tags-type', tag['tid'])
es_conn.indices.refresh('tags')