github.com/vench/word_index@v0.3.1/matrix_index.go (about) 1 package word_index 2 3 import ( 4 "sort" 5 "strings" 6 ) 7 8 type MatrixIndex struct { 9 items []*matrixIndexItem 10 documents []string 11 } 12 13 func (m *MatrixIndex) Find(query string) int { 14 result := m.Query(query) 15 if len(result) > 0 { 16 return result[0] 17 } 18 return emptyFind 19 } 20 21 func (m *MatrixIndex) FindOff(query string, low int) int { 22 if m.FindAt(low, query) { 23 return low 24 } 25 return emptyFind 26 } 27 28 func (m *MatrixIndex) FindAll(query string) []int { 29 return m.Query(query) 30 } 31 32 func (m *MatrixIndex) FindAt(index int, query string) bool { 33 result := m.Query(query) 34 if len(result) == 0 { 35 return false 36 } 37 38 low, high := 0, len(result)-1 39 for low <= high { 40 median := (low + high) / 2 41 if result[median] < index { 42 low = median + 1 43 } else { 44 high = median - 1 45 } 46 } 47 return result[low] == index 48 } 49 50 func (m *MatrixIndex) Add(documents ...string) { 51 documents = append(m.documents, documents...) 52 m.Fit(documents...) 53 } 54 55 func (m *MatrixIndex) DocumentAt(index int) (string, bool) { 56 if len(m.documents) > index { 57 return m.documents[index], true 58 } 59 return "", false 60 } 61 62 func (m *MatrixIndex) Fit(documents ...string) error { 63 64 mWords := make(map[string]map[int]struct{}) 65 for index, document := range documents { 66 words := strings.Split(strings.ToLower(document), ` `) 67 for _, word := range words { 68 w, ok := mWords[word] 69 if !ok { 70 w = make(map[int]struct{}) 71 } 72 w[index] = struct{}{} 73 mWords[word] = w 74 } 75 } 76 77 items := make([]*matrixIndexItem, len(mWords)) 78 i := 0 79 for word, index := range mWords { 80 81 item := &matrixIndexItem{word: word, index: make([]int, len(index))} 82 j := 0 83 for inx, _ := range index { 84 item.index[j] = inx 85 j++ 86 } 87 88 sort.Slice(item.index, func(i, j int) bool { 89 return item.index[i] < item.index[j] 90 }) 91 92 items[i] = item 93 i++ 94 } 95 96 sort.Slice(items, func(i, j int) bool { 97 return items[i].word < items[j].word 98 }) 99 100 m.items = items 101 m.documents = documents 102 return nil 103 } 104 105 func (m *MatrixIndex) Query(query string) []int { 106 return m.QueryAndOr(query, false) 107 } 108 109 func (m *MatrixIndex) QueryAndOr(query string, useAnd bool) []int { 110 words := strings.Split(strings.ToLower(query), ` `) 111 high := len(m.items) - 1 112 results := make([][]int, len(words)) 113 for i, word := range words { 114 q, variants := makeVariants(word) 115 results[i] = m.findBin(q, variants, 0, high) 116 } 117 if useAnd { 118 return MergeOrderedArrayAnd(results) 119 } 120 return MergeOrderedArray(results) 121 } 122 123 func (m *MatrixIndex) findBin(word string, variants []string, low, high int) []int { 124 w := strings.TrimSpace(word) 125 if len(w) < 2 { 126 return []int{} 127 } 128 if w[len(w)-1] == tagAnyRune { 129 w = w[:len(w)-1] 130 } else if w[len(w)-1] == ')' { 131 for i := len(w) - 1; i >= 0; i-- { 132 if w[i] == '(' { 133 w = w[:i] 134 break 135 } 136 } 137 } 138 for low <= high { 139 median := (low + high) / 2 140 if m.items[median].word < w { 141 low = median + 1 142 } else { 143 high = median - 1 144 } 145 } 146 147 results := make([][]int, 0) 148 for low < len(m.items) && m.compareWord(m.items[low].word, word, variants) { 149 /*if len(result) == 0 { 150 result = m.items[low].index 151 } else { 152 result = MergeOrderedArray([][]int{result, m.items[low].index}) 153 } */ 154 results = append(results, m.items[low].index) 155 156 low++ 157 } 158 159 return MergeOrderedArray(results) 160 } 161 162 func (m *MatrixIndex) compareWord(word, query string, variants []string) bool { 163 if word == query { 164 return true 165 } 166 if query[len(query)-1:] == tagAny { 167 if word == query[:len(query)-1] { 168 return true 169 } 170 for n := 0; n < len(query); n++ { 171 r := query[n] 172 if r == tagAnyRune { 173 return true 174 } else if len(word) <= n || word[n] != r { 175 break 176 } 177 } 178 } 179 if len(variants) > 0 { 180 for _, variant := range variants { 181 if word == variant { 182 return true 183 } 184 } 185 } 186 return false 187 } 188 189 func MergeOrderedArray(a [][]int) []int { 190 maxLen := 0 191 maxValue := 0 192 193 for j := 0; j < len(a); j++ { 194 if len(a[j]) == 0 { 195 a = append(a[:j], a[j+1:]...) 196 continue 197 } 198 if len(a[j]) > maxLen { 199 maxLen = len(a[j]) 200 } 201 if maxValue < a[j][len(a[j])-1] { 202 maxValue = a[j][len(a[j])-1] 203 } 204 } 205 offsets := make([]int, len(a)) 206 maxValue++ 207 b := make([]int, 0, maxLen) 208 lastIndex := -1 209 minValue := maxValue 210 for true { 211 212 minIndexResult := -1 213 for j := 0; j < len(a); j++ { 214 if len(a[j]) > offsets[j] { 215 if a[j][offsets[j]] < minValue { 216 minValue = a[j][offsets[j]] 217 minIndexResult = j 218 } 219 } else { 220 a = append(a[:j], a[j+1:]...) 221 offsets = append(offsets[:j], offsets[j+1:]...) 222 j-- 223 } 224 } 225 if minIndexResult == -1 { 226 break 227 } 228 if lastIndex < minValue { 229 b = append(b, minValue) 230 lastIndex = minValue 231 } 232 minValue = maxValue 233 //a[minIndexResult] = a[minIndexResult][1:] 234 offsets[minIndexResult]++ 235 } 236 return b 237 } 238 239 func MergeOrderedArrayAnd(a [][]int) []int { 240 b := make([]int, 0) 241 minIndex := 0 242 for i := 1; i < len(a); i++ { 243 if len(a[minIndex]) > len(a[i]) { 244 minIndex = i 245 } 246 } 247 offsets := make([]int, len(a)) 248 for i, v := range a[minIndex] { 249 _ = i 250 has := true 251 for j := 0; j < len(a); j++ { 252 if j == minIndex { 253 continue 254 } 255 for ; offsets[j] < len(a[j]); offsets[j]++ { 256 if a[j][offsets[j]] > v { 257 has = false 258 break 259 } 260 if has = a[j][offsets[j]] == v; has { 261 break 262 } 263 } 264 if !has { 265 break 266 } 267 } 268 if has { 269 b = append(b, v) 270 } 271 } 272 return b 273 } 274 275 type matrixIndexItem struct { 276 word string 277 index []int 278 } 279 280 func NewMatrixIndex() *MatrixIndex { 281 return &MatrixIndex{} 282 }