1 module dkh..string; 2 3 /// Suffix Array Informations 4 struct SA(T) { 5 T[] s; /// base string 6 int[] sa; /// suffix array 7 int[] rsa; /// reverse sa, rsa[sa[i]] == i 8 int[] lcp; /// lcp 9 this(in T[] s) { 10 size_t n = s.length; 11 this.s = s.dup; 12 sa = new int[](n+1); 13 rsa = new int[](n+1); 14 lcp = new int[](n); 15 } 16 } 17 18 int[] sais(T)(in T[] _s, int B = 200) { 19 import std.conv, std.algorithm, std.range; 20 int n = _s.length.to!int; 21 int[] sa = new int[](n+1); 22 if (n == 0) return sa; 23 24 auto s = _s.map!"a+1".array ~ T(0); B++; // add 0 to last 25 // ls 26 bool[] ls = new bool[](n+1); 27 ls[n] = true; 28 foreach_reverse (i; 0..n) { 29 ls[i] = (s[i] == s[i+1]) ? ls[i+1] : (s[i] < s[i+1]); 30 } 31 // sum(l[0], s[0], l[1], s[1], ...) 32 int[] sumL = new int[](B+1), sumS = new int[](B+1); 33 s.each!((i, c) => !ls[i] ? sumS[c]++ : sumL[c+1]++); 34 foreach (i; 0..B) { 35 sumL[i+1] += sumS[i]; 36 sumS[i+1] += sumL[i+1]; 37 } 38 39 void induce(in int[] lms) { 40 sa[] = -1; 41 auto buf0 = sumS.dup; 42 foreach (d; lms) { 43 sa[buf0[s[d]]++] = d; 44 } 45 auto buf1 = sumL.dup; 46 foreach (v; sa) { 47 if (v >= 1 && !ls[v-1]) { 48 sa[buf1[s[v-1]]++] = v-1; 49 } 50 } 51 auto buf2 = sumL.dup; 52 foreach_reverse (v; sa) { 53 if (v >= 1 && ls[v-1]) { 54 sa[--buf2[s[v-1]+1]] = v-1; 55 } 56 } 57 } 58 59 int[] lms = iota(1, n+1).filter!(i => !ls[i-1] && ls[i]).array; 60 int[] lmsMap = new int[](n+1); 61 lmsMap[] = -1; lms.each!((i, v) => lmsMap[v] = i.to!int); 62 63 induce(lms); 64 65 if (lms.length >= 2) { 66 int m = lms.length.to!int - 1; 67 // sort lms 68 auto lms2 = sa.filter!(v => lmsMap[v] != -1).array; 69 int recN = 1; 70 int[] recS = new int[](m); 71 recS[lmsMap[lms2[1]]] = 1; 72 foreach (i; 2..m+1) { 73 int l = lms2[i-1], r = lms2[i]; 74 int nl = lms[lmsMap[l]+1], nr = lms[lmsMap[r]+1]; 75 if (cmp(s[l..nl+1], s[r..nr+1])) recN++; 76 recS[lmsMap[lms2[i]]] = recN; 77 } 78 //re induce 79 induce(lms.indexed(sais!int(recS, recN)).array); 80 } 81 82 return sa; 83 } 84 85 86 /// return SA!T. each character must be inside [T(0), T(B)). 87 SA!T suffixArray(T)(in T[] _s, int B = 200) { 88 import std.conv, std.algorithm; 89 int n = _s.length.to!int; 90 auto saInfo = SA!T(_s); 91 if (n == 0) return saInfo; 92 93 with (saInfo) { 94 sa = sais(_s, B); 95 //rsa 96 sa.each!((i, v) => rsa[v] = i.to!int); 97 //lcp 98 int h = 0; 99 foreach (i; 0..n) { 100 int j = sa[rsa[i]-1]; 101 if (h > 0) h--; 102 for (; j+h < n && i+h < n; h++) { 103 if (s[j+h] != s[i+h]) break; 104 } 105 lcp[rsa[i]-1] = h; 106 } 107 } 108 return saInfo; 109 } 110 111 /// 112 unittest { 113 import std.algorithm : equal, map; 114 string s = "abracadabra"; 115 auto saInfo = s.suffixArray; 116 assert(equal(saInfo.sa.map!(i => s[i..$]), [ 117 "", 118 "a", 119 "abra", 120 "abracadabra", 121 "acadabra", 122 "adabra", 123 "bra", 124 "bracadabra", 125 "cadabra", 126 "dabra", 127 "ra", 128 "racadabra", 129 ])); 130 } 131 132 unittest { 133 import dkh.ascii; 134 import std.algorithm, std.conv, std.stdio; 135 import std.random; 136 import std.typecons; 137 138 SA!T naive(T)(in T[] s) { 139 int n = s.length.to!int; 140 auto res = SA!T(s); 141 res.s = s.dup; 142 alias P = Tuple!(string, int); 143 P[] buf = new P[](n+1); 144 foreach (i; 0..n+1) { 145 buf[i] = P(s[i..$].idup, i); 146 } 147 sort(buf); 148 foreach (i, v; buf) { 149 res.sa[i] = v[1]; 150 } 151 foreach (i; 0..n+1) { 152 res.rsa[res.sa[i]] = i; 153 } 154 foreach (i; 0..n) { 155 res.lcp[i] = commonPrefix(buf[i][0], buf[i+1][0]).length.to!int; 156 } 157 return res; 158 } 159 160 void f() { 161 int n = uniform(1, 100); 162 ASCIIString s; 163 foreach (j; 0..n) { 164 s ~= uniform('a', 'z'); 165 } 166 auto l = naive(s); 167 auto r = suffixArray(s); 168 assert(equal(l.s, r.s)); 169 assert(equal(l.sa, r.sa)); 170 assert(equal(l.rsa, r.rsa)); 171 assert(equal(l.lcp, r.lcp)); 172 } 173 import dkh.stopwatch; 174 auto ti = benchmark!f(1000); 175 writeln("SAIS Random1000: ", ti[0].toMsecs); 176 }