forked from williamfiset/Algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LongestCommonPrefixArray.java
105 lines (93 loc) · 3.14 KB
/
LongestCommonPrefixArray.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/**
* This file shows you how to use a suffix array to construct the Longest Common Prefix (LCP) array
* using the kasai algorithm.
*
* <p>Time complexity: O(nlogn) for suffix array construct and O(n) for longest common prefix array
* construction, so O(nlogn) overall
*
* @author William Fiset, [email protected]
*/
package com.williamfiset.algorithms.strings;
import java.util.*;
// Example usage
public class LongestCommonPrefixArray {
public static void main(String[] args) {
String str = "abcbbccaabcaabc";
SuffixArray sa = new SuffixArray(str);
sa.display();
System.out.println("The LCP array is: " + Arrays.toString(sa.lcp));
}
public static class SuffixArray {
// ALPHABET_SZ is the default alphabet size, this may need to be much
// larger if you're using the LCS method with multiple sentinels
int ALPHABET_SZ = 256, N;
int[] T, lcp, sa, sa2, rank, tmp, c;
public SuffixArray(String str) {
this(toIntArray(str));
}
private static int[] toIntArray(String s) {
int[] text = new int[s.length()];
for (int i = 0; i < s.length(); i++) text[i] = s.charAt(i);
return text;
}
// Designated constructor
public SuffixArray(int[] text) {
T = text;
N = text.length;
sa = new int[N];
sa2 = new int[N];
rank = new int[N];
c = new int[Math.max(ALPHABET_SZ, N)];
construct();
kasai();
}
private void construct() {
int i, p, r;
for (i = 0; i < N; ++i) c[rank[i] = T[i]]++;
for (i = 1; i < ALPHABET_SZ; ++i) c[i] += c[i - 1];
for (i = N - 1; i >= 0; --i) sa[--c[T[i]]] = i;
for (p = 1; p < N; p <<= 1) {
for (r = 0, i = N - p; i < N; ++i) sa2[r++] = i;
for (i = 0; i < N; ++i) if (sa[i] >= p) sa2[r++] = sa[i] - p;
Arrays.fill(c, 0, ALPHABET_SZ, 0);
for (i = 0; i < N; ++i) c[rank[i]]++;
for (i = 1; i < ALPHABET_SZ; ++i) c[i] += c[i - 1];
for (i = N - 1; i >= 0; --i) sa[--c[rank[sa2[i]]]] = sa2[i];
for (sa2[sa[0]] = r = 0, i = 1; i < N; ++i) {
if (!(rank[sa[i - 1]] == rank[sa[i]]
&& sa[i - 1] + p < N
&& sa[i] + p < N
&& rank[sa[i - 1] + p] == rank[sa[i] + p])) r++;
sa2[sa[i]] = r;
}
tmp = rank;
rank = sa2;
sa2 = tmp;
if (r == N - 1) break;
ALPHABET_SZ = r + 1;
}
}
// Use Kasai algorithm to build LCP array
private void kasai() {
lcp = new int[N];
int[] inv = new int[N];
for (int i = 0; i < N; i++) inv[sa[i]] = i;
for (int i = 0, len = 0; i < N; i++) {
if (inv[i] > 0) {
int k = sa[inv[i] - 1];
while ((i + len < N) && (k + len < N) && T[i + len] == T[k + len]) len++;
lcp[inv[i] - 1] = len;
if (len > 0) len--;
}
}
}
public void display() {
System.out.printf("-----i-----SA-----LCP---Suffix\n");
for (int i = 0; i < N; i++) {
int suffixLen = N - sa[i];
String suffix = new String(T, sa[i], suffixLen);
System.out.printf("% 7d % 7d % 7d %s\n", i, sa[i], lcp[i], suffix);
}
}
}
}