-
Notifications
You must be signed in to change notification settings - Fork 522
/
SuffixArray.java
126 lines (115 loc) · 5.02 KB
/
SuffixArray.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package strings;
import java.util.*;
import java.util.stream.IntStream;
// https://en.wikipedia.org/wiki/Suffix_array
public class SuffixArray {
// build suffix array in O(n*log(n))
public static int[] suffixArray(CharSequence S) {
int n = S.length();
// Stable sort of characters.
// Same characters are sorted by their position in descending order.
// E.g. last character which represents suffix of length 1 should be ordered first among same characters.
int[] sa = IntStream.range(0, n)
.mapToObj(i -> n - 1 - i)
.sorted(Comparator.comparingInt(S::charAt))
.mapToInt(Integer::intValue)
.toArray();
int[] classes = S.chars().toArray();
// sa[i] - suffix on i'th position after sorting by first len characters
// classes[i] - equivalence class of the i'th suffix after sorting by first len characters
for (int len = 1; len < n; len *= 2) {
// Calculate classes for suffixes of length len * 2
int[] c = classes.clone();
for (int i = 0; i < n; i++) {
// Condition sa[i - 1] + len < n emulates 0-symbol at the end of the string.
// A separate class is created for each suffix followed by emulated 0-symbol.
classes[sa[i]] = i > 0 && c[sa[i - 1]] == c[sa[i]] && sa[i - 1] + len < n
&& c[sa[i - 1] + len / 2] == c[sa[i] + len / 2]
? classes[sa[i - 1]]
: i;
}
// Suffixes are already sorted by first len characters
// Now sort suffixes by first len * 2 characters
int[] cnt = IntStream.range(0, n).toArray();
int[] s = sa.clone();
for (int i = 0; i < n; i++) {
// s[i] - order of suffixes sorted by first len characters
// (s[i] - len) - order of suffixes sorted only by second len characters
int s1 = s[i] - len;
// sort only suffixes of length > len, others are already sorted
if (s1 >= 0)
sa[cnt[classes[s1]]++] = s1;
}
}
return sa;
}
// sort rotations of a string in O(n*log(n))
public static int[] rotationArray(CharSequence S) {
int n = S.length();
int[] sa = IntStream.range(0, n)
.boxed()
.sorted(Comparator.comparingInt(S::charAt))
.mapToInt(Integer::intValue)
.toArray();
int[] classes = S.chars().toArray();
for (int len = 1; len < n; len *= 2) {
int[] c = classes.clone();
for (int i = 0; i < n; i++)
classes[sa[i]] =
i > 0 && c[sa[i - 1]] == c[sa[i]] && c[(sa[i - 1] + len / 2) % n] == c[(sa[i] + len / 2) % n]
? classes[sa[i - 1]]
: i;
int[] cnt = IntStream.range(0, n).toArray();
int[] s = sa.clone();
for (int i = 0; i < n; i++) {
int s1 = (s[i] - len + n) % n;
sa[cnt[classes[s1]]++] = s1;
}
}
return sa;
}
// longest common prefixes array in O(n)
public static int[] lcp(int[] sa, CharSequence s) {
int n = sa.length;
int[] rank = new int[n];
for (int i = 0; i < n; i++) rank[sa[i]] = i;
int[] lcp = new int[n - 1];
for (int i = 0, h = 0; i < n; i++) {
if (rank[i] < n - 1) {
for (int j = sa[rank[i] + 1]; Math.max(i, j) + h < s.length() && s.charAt(i + h) == s.charAt(j + h);
++h)
;
lcp[rank[i]] = h;
if (h > 0)
--h;
}
}
return lcp;
}
// Usage example
public static void main(String[] args) {
String s1 = "abcab";
int[] sa1 = suffixArray(s1);
// print suffixes in lexicographic order
for (int p : sa1) System.out.println(s1.substring(p));
System.out.println("lcp = " + Arrays.toString(lcp(sa1, s1)));
// random test
Random rnd = new Random(1);
for (int step = 0; step < 100000; step++) {
int n = rnd.nextInt(100) + 1;
StringBuilder s = rnd.ints(n, 0, 10).collect(
StringBuilder::new, (sb, i) -> sb.append((char) ('a' + i)), StringBuilder::append);
int[] sa = suffixArray(s);
int[] ra = rotationArray(s.toString() + '\0');
int[] lcp = lcp(sa, s);
for (int i = 0; i + 1 < n; i++) {
String a = s.substring(sa[i]);
String b = s.substring(sa[i + 1]);
if (a.compareTo(b) >= 0 || !a.substring(0, lcp[i]).equals(b.substring(0, lcp[i]))
|| (a + " ").charAt(lcp[i]) == (b + " ").charAt(lcp[i]) || sa[i] != ra[i + 1])
throw new RuntimeException();
}
}
System.out.println("Test passed");
}
}