github.com/vench/word_index@v0.3.1/index.go (about) 1 package word_index 2 3 import ( 4 "sort" 5 "strings" 6 "sync" 7 ) 8 9 const ( 10 tagAny = `*` 11 tagAnyRune = '*' 12 emptyFind = -1 13 ) 14 15 // 16 type Index interface { 17 Find(string) int 18 FindOff(string, int) int 19 FindAll(string) []int 20 FindAt(int, string) bool 21 Add(...string) 22 DocumentAt(int) (string, bool) 23 } 24 25 // 26 type variant struct { 27 query string 28 variants []string 29 } 30 31 // 32 type indexItem struct { 33 words []string 34 document string 35 } 36 37 func (i *indexItem) findInterpolation(query string, variants []string) bool { 38 39 if len(query) == 0 { 40 return false 41 } 42 43 var ( 44 mid int 45 low = 0 46 high = len(i.words) - 1 47 ) 48 49 for i.words[low][0] < query[0] && i.words[high][0] > query[0] { 50 mid = low + (int(query[0]-i.words[low][0])*(high-low))/int(i.words[high][0]-i.words[low][0]) 51 52 if i.words[mid] < query { 53 low = mid + 1 54 } else if i.words[mid] > query { 55 high = mid - 1 56 } else { 57 return true 58 } 59 } 60 61 if i.words[low][0] == query[0] { 62 for n := low; n < len(i.words); n++ { 63 if i.words[n] == query { 64 return true 65 } else if query[len(query)-1:] == tagAny { 66 for n := 0; n < len(query); n++ { 67 r := query[n] 68 if r == tagAnyRune { 69 return true 70 } else if len(i.words[low]) <= n || i.words[low][n] != r { 71 break 72 } 73 } 74 } else if len(variants) > 0 { 75 for _, variant := range variants { 76 if i.words[n] == variant { 77 return true 78 } 79 } 80 } 81 } 82 } 83 84 if i.words[high][0] == query[0] { 85 for n := high; n < len(i.words); n++ { 86 if i.words[n] == query { 87 return true 88 } else if query[len(query)-1:] == tagAny { 89 for j, r := range []rune(query) { 90 if r == tagAnyRune { 91 return true 92 } else if len(i.words[low]) <= n || rune(i.words[low][j]) != r { 93 break 94 } 95 } 96 } else if len(variants) > 0 { 97 for _, variant := range variants { 98 if i.words[n] == variant { 99 return true 100 } 101 } 102 } 103 } 104 } 105 106 return false 107 } 108 109 // 110 func (i *indexItem) findBin(query string, variants []string) bool { 111 112 if len(query) == 0 { 113 return false 114 } 115 116 low := 0 117 high := len(i.words) - 1 118 119 for low <= high { 120 median := (low + high) / 2 121 if i.words[median][0] < query[0] { 122 low = median + 1 123 } else { 124 high = median - 1 125 } 126 } 127 128 for low < len(i.words) && i.words[low][0] == query[0] { 129 if i.words[low] == query { 130 return true 131 } else if query[len(query)-1:] == tagAny { 132 for n := 0; n < len(query); n++ { 133 r := query[n] 134 if r == tagAnyRune { 135 return true 136 } else if len(i.words[low]) <= n || i.words[low][n] != r { 137 break 138 } 139 } 140 } else if len(variants) > 0 { 141 for _, variant := range variants { 142 if i.words[low] == variant { 143 return true 144 } 145 } 146 } 147 low++ 148 } 149 return false 150 } 151 152 // 153 type indexWord struct { 154 data []*indexItem 155 binSearch bool 156 } 157 158 func (i *indexWord) FindAll(str string) []int { 159 words := strings.Split(strings.ToLower(str), ` `) 160 variants := make([]*variant, len(words)) 161 for n, word := range words { 162 q, v := i.makeVariants(word) 163 vr := &variant{query: q, variants: v} 164 variants[n] = vr 165 } 166 167 result := make([]int, 0) 168 var offset = 0 169 for true { 170 i := i.findOff(variants, offset) 171 if i == emptyFind { 172 break 173 } 174 result = append(result, i) 175 offset = i + 1 176 } 177 return result 178 } 179 180 func (i *indexWord) FindOff(str string, offset int) int { 181 words := strings.Split(strings.ToLower(str), ` `) 182 variants := make([]*variant, len(words)) 183 for n, word := range words { 184 q, v := i.makeVariants(word) 185 vr := &variant{query: q, variants: v} 186 variants[n] = vr 187 } 188 return i.findOff(variants, offset) 189 } 190 191 func (i *indexWord) findOff(variants []*variant, offset int) int { 192 193 for index := offset; index < len(i.data); index++ { 194 d := i.data[index] 195 196 for _, v := range variants { 197 if i.binSearch { 198 if ok := d.findBin(v.query, v.variants); ok { 199 return index 200 } 201 } else { 202 if ok := d.findInterpolation(v.query, v.variants); ok { 203 return index 204 } 205 } 206 } 207 } 208 209 return emptyFind 210 } 211 212 // 213 func (i *indexWord) makeVariants(word string) (qWord string, variants []string) { 214 return makeVariants(word) 215 } 216 217 func makeVariants(word string) (string, []string) { 218 variants := make([]string, 0) 219 220 if len(word) > 0 && word[len(word)-1] == ')' { 221 base := make([]rune, 0) 222 start := false 223 variant := make([]rune, 0) 224 for _, r := range []rune(word) { 225 if r == tagAnyRune { 226 word = string(append(base, r)) 227 variants = make([]string, 0) 228 break 229 } 230 if r == ')' { 231 variants = append(variants, string(variant)) 232 break 233 } else if r == '(' { 234 start = true 235 variant = append(variant, base...) 236 variants = append(variants, string(variant)) 237 } else if start && r == '|' { 238 variants = append(variants, string(variant)) 239 variant = make([]rune, 0) 240 variant = append(variant, base...) 241 } else if start { 242 variant = append(variant, r) 243 } else { 244 base = append(base, r) 245 } 246 } 247 } 248 return word, variants 249 } 250 251 // 252 func (i *indexWord) Add(str ...string) { 253 for _, s := range str { 254 words := strings.Split(strings.ToLower(s), ` `) 255 256 k := 0 257 for k < len(words) { 258 if len(words[k]) == 0 { 259 words = append(words[:k], words[k+1:]...) 260 } else { 261 k++ 262 } 263 } 264 265 sort.Slice(words, func(i, j int) bool { 266 if words[i] < words[j] { 267 return true 268 } 269 return false 270 }) 271 272 n := indexItem{words: words, document: s} 273 i.data = append(i.data, &n) 274 } 275 } 276 277 // 278 func (i *indexWord) Find(str string) int { 279 return i.FindOff(str, 0) 280 } 281 282 // 283 func (i *indexWord) DocumentAt(index int) (string, bool) { 284 if len(i.data) > index && index >= 0 { 285 return i.data[index].document, true 286 } 287 return ``, false 288 } 289 290 // 291 func (i *indexWord) FindAt(index int, str string) bool { 292 if index < 0 || len(i.data) < index { 293 return false 294 } 295 words := strings.Split(strings.ToLower(str), ` `) 296 for _, word := range words { 297 query, variants := i.makeVariants(word) 298 if i.binSearch { 299 if ok := i.data[index].findBin(query, variants); ok { 300 return true 301 } 302 } else { 303 if ok := i.data[index].findInterpolation(query, variants); ok { 304 return true 305 } 306 } 307 308 } 309 return false 310 } 311 312 // 313 func NewIndex() Index { 314 return &indexWord{data: make([]*indexItem, 0), binSearch: true} 315 } 316 317 // 318 type indexWordSync struct { 319 indexWord 320 mx sync.RWMutex 321 } 322 323 func (i *indexWordSync) Add(str ...string) { 324 i.mx.Lock() 325 i.indexWord.Add(str...) 326 i.mx.Unlock() 327 } 328 329 func (i *indexWordSync) Find(str string) int { 330 i.mx.RLock() 331 defer i.mx.RUnlock() 332 return i.indexWord.Find(str) 333 } 334 335 func (i *indexWordSync) FindOff(str string, offset int) int { 336 i.mx.RLock() 337 defer i.mx.RUnlock() 338 return i.indexWord.FindOff(str, offset) 339 } 340 341 func (i *indexWordSync) DocumentAt(index int) (string, bool) { 342 i.mx.RLock() 343 defer i.mx.RUnlock() 344 return i.indexWord.DocumentAt(index) 345 } 346 347 func (i *indexWordSync) FindAt(index int, str string) bool { 348 i.mx.RLock() 349 defer i.mx.RUnlock() 350 return i.indexWord.FindAt(index, str) 351 } 352 353 func (i *indexWordSync) FindAll(str string) []int { 354 i.mx.RLock() 355 defer i.mx.RUnlock() 356 return i.indexWord.FindAll(str) 357 } 358 359 // 360 func NewIndexSync() Index { 361 return &indexWordSync{indexWord: indexWord{data: make([]*indexItem, 0), binSearch: true}} 362 }