forked from nihaljn/news-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrie.py
148 lines (114 loc) · 4.99 KB
/
trie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class Node:
def __init__(self):
self.prefixes = 0
self.words = 0
# self.children = [None for i in range (128)]
self.children = {}
def add(self, suffix, currentIndex):
'''Adds suffix[currenIndex : ] to self'''
if currentIndex == len(suffix):
self.words = self.words + 1
self.prefixes = self.prefixes + 1
else:
self.prefixes = self.prefixes + 1
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# newNode = Node()
# self.children[ord(firstChar)] = newNode
if ord(firstChar) not in self.children:
newNode = Node()
self.children[ord(firstChar)] = newNode
currentIndex += 1
self.children[ord(firstChar)].add(suffix, currentIndex)
def count_words(self, suffix, currentIndex):
'''Returns the count of words ending with suffix[currentIndex]'''
if currentIndex == len(suffix):
return self.words
else:
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# return 0
if ord(firstChar) not in self.children:
return 0
else:
currentIndex += 1
return self.children[ord(firstChar)].count_words(suffix, currentIndex)
def count_prefixes(self, suffix, currentIndex):
'''Returns the count of prefixes ending with suffix[currentIndex]'''
if currentIndex == len(suffix):
return self.prefixes
else:
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# return 0
if ord(firstChar) not in self.children:
return 0
else:
currentIndex += 1
return self.children[ord(firstChar)].count_prefixes(suffix, currentIndex)
class CollectionNode(Node):
def __init__(self):
super(CollectionNode,self).__init__()
self.documents = set()
self.titles = set()
def add_title(self, suffix, currentIndex, docID):
'''Adds suffix[currenIndex : ] to self and appends docID to self.titles
at the end of suffix'''
if currentIndex == len(suffix):
self.words = self.words + 1
self.prefixes = self.prefixes + 1
self.titles.add(docID)
else:
self.prefixes = self.prefixes + 1
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# newNode = CollectionNode()
# self.children[ord(firstChar)] = newNode
if ord(firstChar) not in self.children:
newNode = CollectionNode()
self.children[ord(firstChar)] = newNode
currentIndex += 1
self.children[ord(firstChar)].add_title(suffix, currentIndex, docID)
def add_document(self, suffix, currentIndex, docID):
'''Adds docID to self.documents at the end of suffix'''
if currentIndex == len(suffix):
self.words = self.words + 1
self.prefixes = self.prefixes + 1
self.documents.add(docID)
else:
self.prefixes = self.prefixes + 1
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# newNode = CollectionNode()
# self.children[ord(firstChar)] = newNode
if ord(firstChar) not in self.children:
newNode = CollectionNode()
self.children[ord(firstChar)] = newNode
currentIndex += 1
self.children[ord(firstChar)].add_document(suffix, currentIndex, docID)
def get_doc_list(self, suffix, currentIndex):
'''Returns list of documents containing suffix'''
if currentIndex == len(suffix):
return self.documents
else:
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# return []
if ord(firstChar) not in self.children:
return []
else:
currentIndex += 1
return self.children[ord(firstChar)].get_doc_list(suffix, currentIndex)
def get_title_list(self, suffix, currentIndex):
'''Returns list of titles that contain suffix'''
if currentIndex == len(suffix):
return self.titles
else:
firstChar = suffix[currentIndex]
# if self.children[ord(firstChar)] == None:
# return []
if ord(firstChar) not in self.children:
return []
else:
currentIndex += 1
return self.children[ord(firstChar)].get_title_list(suffix, currentIndex)