-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsuffix-trie.cpp
101 lines (96 loc) · 2.96 KB
/
suffix-trie.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#include <cstring>
#include <iostream>
#include <string>
const int N = 1000000, // maximum possible number of nodes in suffix tree
INF = 1000000000; // infinity constant
std::string a =
"aaaaa"; // input string for which the suffix tree is being built
int t[N][26], // array of transitions (state, letter)
l[N], // left...
r[N], // ...and right boundaries of the substring of a which correspond to
// incoming edge
p[N], // parent of the node
s[N], // suffix link
tv, // the node of the current suffix (if we're mid-edge, the lower node of
// the edge)
tp, // position in the string which corresponds to the position on the edge
// (between l[tv] and r[tv], inclusive)
ts, // the number of nodes
la; // the current character in the string
void ukkadd(int c) { // add character s to the tree
suff:; // we'll return here after each transition to the suffix (and will add
// character again)
if (r[tv] < tp) { // check whether we're still within the boundaries of the
// current edge
// if we're not, find the next edge. If it doesn't exist, create a leaf and
// add it to the tree
if (t[tv][c] == -1) {
t[tv][c] = ts;
l[ts] = la;
p[ts++] = tv;
tv = s[tv];
tp = r[tv] + 1;
goto suff;
}
tv = t[tv][c];
tp = l[tv];
} // otherwise just proceed to the next edge
if (tp == -1 || c == a[tp] - 'a')
tp++; // if the letter on the edge equal c, go down that edge
else {
// otherwise split the edge in two with middle in node ts
l[ts] = l[tv];
r[ts] = tp - 1;
p[ts] = p[tv];
t[ts][a[tp] - 'a'] = tv;
// add leaf ts+1. It corresponds to transition through c.
t[ts][c] = ts + 1;
l[ts + 1] = la;
p[ts + 1] = ts;
// update info for the current node - remember to mark ts as parent of tv
l[tv] = tp;
p[tv] = ts;
t[p[ts]][a[l[ts]] - 'a'] = ts;
ts += 2;
// prepare for descent
// tp will mark where are we in the current suffix
tv = s[p[ts - 2]];
tp = l[ts - 2];
// while the current suffix is not over, descend
while (tp <= r[ts - 2]) {
tv = t[tv][a[tp] - 'a'];
tp += r[tv] - l[tv] + 1;
}
// if we're in a node, add a suffix link to it, otherwise add the link to ts
// (we'll create ts on next iteration).
if (tp == r[ts - 2] + 1)
s[ts - 2] = tv;
else
s[ts - 2] = ts;
// add tp to the new edge and return to add letter to suffix
tp = r[tv] - (tp - r[ts - 2]) + 2;
goto suff;
}
}
void build() {
ts = 2;
tv = 0;
tp = 0;
std::fill(r, r + N, (int)a.size() - 1);
// initialize data for the root of the tree
s[0] = 1;
l[0] = -1;
r[0] = -1;
l[1] = -1;
r[1] = -1;
memset(t, -1, sizeof t);
std::fill(t[1], t[1] + 26, 0);
// add the text to the tree, letter by letter
for (la = 0; la < (int)a.size(); ++la)
ukkadd(a[la] - 'a');
}
int main(int argc, char const *argv[]) {
/* code */
build();
return 0;
}