github.com/d-tsuji/suffixarray@v0.0.0-20200625031310-5b0c40604e73/suffixarray.go (about) 1 package suffixarray 2 3 import ( 4 "strings" 5 ) 6 7 type Manber struct { 8 // length of input string 9 N int 10 11 // input text (ASCII only) 12 Text string 13 14 // offset of ith string in order 15 Index []int 16 17 // Rank of ith string 18 Rank []int 19 20 // Rank of ith string (temporary) 21 newrank []int 22 23 offset int 24 } 25 26 // New creates a new Manber. 27 func New(s string) *Manber { 28 n := len(s) 29 m := &Manber{ 30 N: n, 31 Text: s, 32 Index: make([]int, n+1), 33 Rank: make([]int, n+1), 34 newrank: make([]int, n+1), 35 } 36 // sentinels 37 m.Index[n] = n 38 m.Rank[n] = -1 39 40 return m 41 } 42 43 // Build builds a SuffixArray. 44 // Building time is O(N (logN)^2) where N is the 45 // size of the input string data. 46 func (m *Manber) Build() { 47 m.msd() 48 m.doit() 49 } 50 51 func (m *Manber) LookupAll(p string) []int { 52 var left, right int 53 54 // Find the maximum index where the result of strings.Compare is -1. 55 l := 0 56 r := m.N 57 for r-l > 1 { 58 mid := (l + r) >> 1 59 cmp := strings.Compare(m.Text[m.Index[mid]:min(m.Index[mid]+len(p), m.N)], p) 60 if cmp < 0 { 61 l = mid 62 } else { 63 r = mid 64 } 65 } 66 left = l 67 68 // Find the maximum index where the result of strings.Compare is 0. 69 l = 0 70 r = m.N 71 for r-l > 1 { 72 mid := (l + r) >> 1 73 cmp := strings.Compare(m.Text[m.Index[mid]:min(m.Index[mid]+len(p), m.N)], p) 74 if cmp <= 0 { 75 l = mid 76 } else { 77 r = mid 78 } 79 } 80 right = l 81 82 result := make([]int, 0, right-left) 83 for i := left + 1; i <= right; i++ { 84 result = append(result, m.Index[i]) 85 } 86 return result 87 } 88 89 func (m *Manber) msd() { 90 const R int = 256 91 92 // calculate frequencies 93 freq := make([]int, R) 94 for i := 0; i < m.N; i++ { 95 freq[m.Text[i]]++ 96 } 97 98 // calculate cumulative frequencies 99 cumm := make([]int, R) 100 for i := 1; i < R; i++ { 101 cumm[i] = cumm[i-1] + freq[i-1] 102 } 103 104 // compute ranks 105 for i := 0; i < m.N; i++ { 106 m.Rank[i] = cumm[m.Text[i]] 107 } 108 109 // sort by first char 110 for i := 0; i < m.N; i++ { 111 m.Index[cumm[m.Text[i]]] = i 112 cumm[m.Text[i]]++ 113 } 114 } 115 116 func (m *Manber) doit() { 117 for m.offset = 1; m.offset < m.N; m.offset += m.offset { 118 var count int 119 for i := 1; i <= m.N; i++ { 120 if m.Rank[m.Index[i]] == m.Rank[m.Index[i-1]] { 121 count++ 122 } else if count > 0 { 123 // sort 124 left := i - 1 - count 125 right := i - 1 126 m.quicksort(left, right) 127 128 // now fix up ranks 129 r := m.Rank[m.Index[left]] 130 for j := left + 1; j <= right; j++ { 131 if m.less(m.Index[j-1], m.Index[j]) { 132 r = m.Rank[m.Index[left]] + j - left 133 } 134 m.newrank[m.Index[j]] = r 135 } 136 137 // copy back - note can't update rank too eagerly 138 for j := left + 1; j <= right; j++ { 139 m.Rank[m.Index[j]] = m.newrank[m.Index[j]] 140 } 141 142 count = 0 143 } 144 } 145 } 146 } 147 148 // ----------------------------------------- 149 // Helper functions for comparing suffixes. 150 // ----------------------------------------- 151 152 func (m *Manber) quicksort(lo, hi int) { 153 if hi <= lo { 154 return 155 } 156 i := m.partition(lo, hi) 157 m.quicksort(lo, i-1) 158 m.quicksort(i+1, hi) 159 } 160 161 func (m *Manber) partition(lo, hi int) int { 162 i, j, v := lo-1, hi, m.Index[hi] 163 for { 164 // find item on left to swap 165 i++ 166 for m.less(m.Index[i], v) { 167 if i == hi { 168 break 169 } 170 i++ 171 } 172 173 // find item on right to swap 174 j-- 175 for m.less(v, m.Index[j]) { 176 if j == lo { 177 break 178 } 179 j-- 180 } 181 182 // check if pointers cross 183 if i >= j { 184 break 185 } 186 m.exch(i, j) 187 } 188 189 // swap with partition element 190 m.exch(i, hi) 191 192 return i 193 } 194 195 func (m *Manber) exch(i, j int) { 196 m.Index[i], m.Index[j] = m.Index[j], m.Index[i] 197 } 198 199 func (m *Manber) less(v, w int) bool { 200 return m.Rank[v+m.offset] < m.Rank[w+m.offset] 201 } 202 203 func min(a, b int) int { 204 if a < b { 205 return a 206 } else { 207 return b 208 } 209 }