1 module dkh..string;
2 
3 /// Suffix Arrayのデータを保持する構造体
4 struct SA(T) {
5     T[] s; /// 元の文字列
6     ///sa, rsa, lcp
7     int[] sa, rsa, lcp;
8     this(in T[] s) {
9         size_t n = s.length;
10         this.s = s.dup;
11         sa = new int[](n+1);
12         rsa = new int[](n+1);
13         lcp = new int[](n);
14     }
15 }
16 
17 int[] sais(T)(in T[] _s, int B = 200) {
18     import std.conv, std.algorithm, std.range;
19     int n = _s.length.to!int;
20     int[] sa = new int[](n+1);
21     if (n == 0) return sa;
22 
23     auto s = _s.map!"a+1".array ~ T(0); B++; // add 0 to last
24     // ls
25     bool[] ls = new bool[](n+1);
26     ls[n] = true;
27     foreach_reverse (i; 0..n) {
28         ls[i] = (s[i] == s[i+1]) ? ls[i+1] : (s[i] < s[i+1]);
29     }
30     // sum(l[0], s[0], l[1], s[1], ...)
31     int[] sumL = new int[](B+1), sumS = new int[](B+1);        
32     s.each!((i, c) => !ls[i] ? sumS[c]++ : sumL[c+1]++);
33     foreach (i; 0..B) {
34         sumL[i+1] += sumS[i];
35         sumS[i+1] += sumL[i+1];
36     }
37 
38     void induce(in int[] lms) {
39         sa[] = -1;
40         auto buf0 = sumS.dup;
41         foreach (d; lms) {
42             sa[buf0[s[d]]++] = d;
43         }
44         auto buf1 = sumL.dup;
45         foreach (v; sa) {
46             if (v >= 1 && !ls[v-1]) {
47                 sa[buf1[s[v-1]]++] = v-1;
48             }
49         }
50         auto buf2 = sumL.dup;
51         foreach_reverse (v; sa) {
52             if (v >= 1 && ls[v-1]) {
53                 sa[--buf2[s[v-1]+1]] = v-1;
54             }
55         }
56     }
57     
58     int[] lms = iota(1, n+1).filter!(i => !ls[i-1] && ls[i]).array;
59     int[] lmsMap = new int[](n+1);
60     lmsMap[] = -1; lms.each!((i, v) => lmsMap[v] = i.to!int);
61 
62     induce(lms);
63     
64     if (lms.length >= 2) {
65         int m = lms.length.to!int - 1;
66         // sort lms
67         auto lms2 = sa.filter!(v => lmsMap[v] != -1).array;
68         int recN = 1;
69         int[] recS = new int[](m);
70         recS[lmsMap[lms2[1]]] = 1;
71         foreach (i; 2..m+1) {
72             int l = lms2[i-1], r = lms2[i];
73             int nl = lms[lmsMap[l]+1], nr = lms[lmsMap[r]+1];
74             if (cmp(s[l..nl+1], s[r..nr+1])) recN++;
75             recS[lmsMap[lms2[i]]] = recN;
76         }
77         //re induce
78         induce(lms.indexed(sais!int(recS, recN)).array);
79     }
80 
81     return sa;
82 }
83 
84 
85 /// SAを返す. 内部でSA-ISを使用しており, 各文字は[0, B)に収まっている必要がある
86 SA!T suffixArray(T)(in T[] _s, int B = 200) {
87     import std.conv, std.algorithm;
88     int n = _s.length.to!int;
89     auto saInfo = SA!T(_s);
90     if (n == 0) return saInfo;
91     
92     with (saInfo) {
93         sa = sais(_s, B);
94         //rsa
95         sa.each!((i, v) => rsa[v] = i.to!int);
96         //lcp
97         int h = 0;
98         foreach (i; 0..n) {
99             int j = sa[rsa[i]-1];
100             if (h > 0) h--;
101             for (; j+h < n && i+h < n; h++) {
102                 if (s[j+h] != s[i+h]) break;
103             }
104             lcp[rsa[i]-1] = h;
105         }
106     }
107     return saInfo;
108 }
109 
110 ///
111 unittest {
112     import std.algorithm : equal, map;
113     string s = "abracadabra";
114     auto saInfo = s.suffixArray;
115     assert(equal(saInfo.sa.map!(i => s[i..$]), [
116         "",
117         "a",
118         "abra",
119         "abracadabra",
120         "acadabra",
121         "adabra",
122         "bra",
123         "bracadabra",
124         "cadabra",
125         "dabra",
126         "ra",
127         "racadabra",
128     ]));
129 }
130 
131 unittest {
132     import dkh.ascii;
133     import std.algorithm, std.conv, std.stdio;
134     import std.random;
135     import std.typecons;
136 
137     SA!T naive(T)(in T[] s) {
138         int n = s.length.to!int;
139         auto res = SA!T(s);
140         res.s = s.dup;
141         alias P = Tuple!(string, int);
142         P[] buf = new P[](n+1);
143         foreach (i; 0..n+1) {
144             buf[i] = P(s[i..$].idup, i);
145         }
146         sort(buf);
147         foreach (i, v; buf) {
148             res.sa[i] = v[1];
149         }
150         foreach (i; 0..n+1) {
151             res.rsa[res.sa[i]] = i;
152         }
153         foreach (i; 0..n) {
154             res.lcp[i] = commonPrefix(buf[i][0], buf[i+1][0]).length.to!int;
155         }
156         return res;
157     }
158 
159     void f() {
160         int n = uniform(1, 100);
161         ASCIIString s;
162         foreach (j; 0..n) {
163             s ~= uniform('a', 'z');
164         }
165         auto l = naive(s);
166         auto r = suffixArray(s);
167         assert(equal(l.s, r.s));
168         assert(equal(l.sa, r.sa));
169         assert(equal(l.rsa, r.rsa));
170         assert(equal(l.lcp, r.lcp));
171     }
172     import dkh.stopwatch;
173     auto ti = benchmark!f(1000);
174     writeln("SAIS Random1000: ", ti[0].toMsecs);
175 }