-
Notifications
You must be signed in to change notification settings - Fork 522
/
AhoCorasick.java
65 lines (57 loc) · 1.82 KB
/
AhoCorasick.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package strings;
// https://en.wikipedia.org/wiki/Aho–Corasick_algorithm
public class AhoCorasick {
final int ALPHABET_SIZE = 26;
final int MAX_STATES = 200_000;
int[][] transitions = new int[MAX_STATES][ALPHABET_SIZE];
int[] sufflink = new int[MAX_STATES];
int[] escape = new int[MAX_STATES];
int states = 1;
public int addString(String s) {
int v = 0;
for (char c : s.toCharArray()) {
c -= 'a';
if (transitions[v][c] == 0) {
transitions[v][c] = states++;
}
v = transitions[v][c];
}
escape[v] = v;
return v;
}
public void buildLinks() {
int[] q = new int[MAX_STATES];
for (int s = 0, t = 1; s < t;) {
int v = q[s++];
int u = sufflink[v];
if (escape[v] == 0) {
escape[v] = escape[u];
}
for (int c = 0; c < ALPHABET_SIZE; c++) {
if (transitions[v][c] != 0) {
q[t++] = transitions[v][c];
sufflink[transitions[v][c]] = v != 0 ? transitions[u][c] : 0;
} else {
transitions[v][c] = transitions[u][c];
}
}
}
}
// Usage example
public static void main(String[] args) {
AhoCorasick ahoCorasick = new AhoCorasick();
ahoCorasick.addString("a");
ahoCorasick.addString("aa");
ahoCorasick.addString("abaaa");
ahoCorasick.buildLinks();
int[][] t = ahoCorasick.transitions;
int[] e = ahoCorasick.escape;
String s = "abaa";
int state = 0;
for (int i = 0; i < s.length(); i++) {
state = t[state][s.charAt(i) - 'a'];
if (e[state] != 0)
System.out.println(i);
}
}
}