1 module dkh..string; 2 3 /// Suffix Arrayのデータを保持する構造体 4 struct SA(T) { 5 T[] s; /// 元の文字列 6 ///sa, rsa, lcp 7 int[] sa, rsa, lcp; 8 this(in T[] s) { 9 size_t n = s.length; 10 this.s = s.dup; 11 sa = new int[](n+1); 12 rsa = new int[](n+1); 13 lcp = new int[](n); 14 } 15 } 16 17 int[] sais(T)(in T[] _s, int B = 200) { 18 import std.conv, std.algorithm, std.range; 19 int n = _s.length.to!int; 20 int[] sa = new int[](n+1); 21 if (n == 0) return sa; 22 23 auto s = _s.map!"a+1".array ~ T(0); B++; // add 0 to last 24 // ls 25 bool[] ls = new bool[](n+1); 26 ls[n] = true; 27 foreach_reverse (i; 0..n) { 28 ls[i] = (s[i] == s[i+1]) ? ls[i+1] : (s[i] < s[i+1]); 29 } 30 // sum(l[0], s[0], l[1], s[1], ...) 31 int[] sumL = new int[](B+1), sumS = new int[](B+1); 32 s.each!((i, c) => !ls[i] ? sumS[c]++ : sumL[c+1]++); 33 foreach (i; 0..B) { 34 sumL[i+1] += sumS[i]; 35 sumS[i+1] += sumL[i+1]; 36 } 37 38 void induce(in int[] lms) { 39 sa[] = -1; 40 auto buf0 = sumS.dup; 41 foreach (d; lms) { 42 sa[buf0[s[d]]++] = d; 43 } 44 auto buf1 = sumL.dup; 45 foreach (v; sa) { 46 if (v >= 1 && !ls[v-1]) { 47 sa[buf1[s[v-1]]++] = v-1; 48 } 49 } 50 auto buf2 = sumL.dup; 51 foreach_reverse (v; sa) { 52 if (v >= 1 && ls[v-1]) { 53 sa[--buf2[s[v-1]+1]] = v-1; 54 } 55 } 56 } 57 58 int[] lms = iota(1, n+1).filter!(i => !ls[i-1] && ls[i]).array; 59 int[] lmsMap = new int[](n+1); 60 lmsMap[] = -1; lms.each!((i, v) => lmsMap[v] = i.to!int); 61 62 induce(lms); 63 64 if (lms.length >= 2) { 65 int m = lms.length.to!int - 1; 66 // sort lms 67 auto lms2 = sa.filter!(v => lmsMap[v] != -1).array; 68 int recN = 1; 69 int[] recS = new int[](m); 70 recS[lmsMap[lms2[1]]] = 1; 71 foreach (i; 2..m+1) { 72 int l = lms2[i-1], r = lms2[i]; 73 int nl = lms[lmsMap[l]+1], nr = lms[lmsMap[r]+1]; 74 if (cmp(s[l..nl+1], s[r..nr+1])) recN++; 75 recS[lmsMap[lms2[i]]] = recN; 76 } 77 //re induce 78 induce(lms.indexed(sais!int(recS, recN)).array); 79 } 80 81 return sa; 82 } 83 84 85 /// SAを返す. 内部でSA-ISを使用しており, 各文字は[0, B)に収まっている必要がある 86 SA!T suffixArray(T)(in T[] _s, int B = 200) { 87 import std.conv, std.algorithm; 88 int n = _s.length.to!int; 89 auto saInfo = SA!T(_s); 90 if (n == 0) return saInfo; 91 92 with (saInfo) { 93 sa = sais(_s, B); 94 //rsa 95 sa.each!((i, v) => rsa[v] = i.to!int); 96 //lcp 97 int h = 0; 98 foreach (i; 0..n) { 99 int j = sa[rsa[i]-1]; 100 if (h > 0) h--; 101 for (; j+h < n && i+h < n; h++) { 102 if (s[j+h] != s[i+h]) break; 103 } 104 lcp[rsa[i]-1] = h; 105 } 106 } 107 return saInfo; 108 } 109 110 /// 111 unittest { 112 import std.algorithm : equal, map; 113 string s = "abracadabra"; 114 auto saInfo = s.suffixArray; 115 assert(equal(saInfo.sa.map!(i => s[i..$]), [ 116 "", 117 "a", 118 "abra", 119 "abracadabra", 120 "acadabra", 121 "adabra", 122 "bra", 123 "bracadabra", 124 "cadabra", 125 "dabra", 126 "ra", 127 "racadabra", 128 ])); 129 } 130 131 unittest { 132 import dkh.ascii; 133 import std.algorithm, std.conv, std.stdio; 134 import std.random; 135 import std.typecons; 136 137 SA!T naive(T)(in T[] s) { 138 int n = s.length.to!int; 139 auto res = SA!T(s); 140 res.s = s.dup; 141 alias P = Tuple!(string, int); 142 P[] buf = new P[](n+1); 143 foreach (i; 0..n+1) { 144 buf[i] = P(s[i..$].idup, i); 145 } 146 sort(buf); 147 foreach (i, v; buf) { 148 res.sa[i] = v[1]; 149 } 150 foreach (i; 0..n+1) { 151 res.rsa[res.sa[i]] = i; 152 } 153 foreach (i; 0..n) { 154 res.lcp[i] = commonPrefix(buf[i][0], buf[i+1][0]).length.to!int; 155 } 156 return res; 157 } 158 159 void f() { 160 int n = uniform(1, 100); 161 ASCIIString s; 162 foreach (j; 0..n) { 163 s ~= uniform('a', 'z'); 164 } 165 auto l = naive(s); 166 auto r = suffixArray(s); 167 assert(equal(l.s, r.s)); 168 assert(equal(l.sa, r.sa)); 169 assert(equal(l.rsa, r.rsa)); 170 assert(equal(l.lcp, r.lcp)); 171 } 172 import dkh.stopwatch; 173 auto ti = benchmark!f(1000); 174 writeln("SAIS Random1000: ", ti[0].toMsecs); 175 }