github.com/vench/word_index@v0.3.1/index.go (about)

     1  package word_index
     2  
     3  import (
     4  	"sort"
     5  	"strings"
     6  	"sync"
     7  )
     8  
     9  const (
    10  	tagAny     = `*`
    11  	tagAnyRune = '*'
    12  	emptyFind  = -1
    13  )
    14  
    15  //
    16  type Index interface {
    17  	Find(string) int
    18  	FindOff(string, int) int
    19  	FindAll(string) []int
    20  	FindAt(int, string) bool
    21  	Add(...string)
    22  	DocumentAt(int) (string, bool)
    23  }
    24  
    25  //
    26  type variant struct {
    27  	query    string
    28  	variants []string
    29  }
    30  
    31  //
    32  type indexItem struct {
    33  	words    []string
    34  	document string
    35  }
    36  
    37  func (i *indexItem) findInterpolation(query string, variants []string) bool {
    38  
    39  	if len(query) == 0 {
    40  		return false
    41  	}
    42  
    43  	var (
    44  		mid  int
    45  		low  = 0
    46  		high = len(i.words) - 1
    47  	)
    48  
    49  	for i.words[low][0] < query[0] && i.words[high][0] > query[0] {
    50  		mid = low + (int(query[0]-i.words[low][0])*(high-low))/int(i.words[high][0]-i.words[low][0])
    51  
    52  		if i.words[mid] < query {
    53  			low = mid + 1
    54  		} else if i.words[mid] > query {
    55  			high = mid - 1
    56  		} else {
    57  			return true
    58  		}
    59  	}
    60  
    61  	if i.words[low][0] == query[0] {
    62  		for n := low; n < len(i.words); n++ {
    63  			if i.words[n] == query {
    64  				return true
    65  			} else if query[len(query)-1:] == tagAny {
    66  				for n := 0; n < len(query); n++ {
    67  					r := query[n]
    68  					if r == tagAnyRune {
    69  						return true
    70  					} else if len(i.words[low]) <= n || i.words[low][n] != r {
    71  						break
    72  					}
    73  				}
    74  			} else if len(variants) > 0 {
    75  				for _, variant := range variants {
    76  					if i.words[n] == variant {
    77  						return true
    78  					}
    79  				}
    80  			}
    81  		}
    82  	}
    83  
    84  	if i.words[high][0] == query[0] {
    85  		for n := high; n < len(i.words); n++ {
    86  			if i.words[n] == query {
    87  				return true
    88  			} else if query[len(query)-1:] == tagAny {
    89  				for j, r := range []rune(query) {
    90  					if r == tagAnyRune {
    91  						return true
    92  					} else if len(i.words[low]) <= n || rune(i.words[low][j]) != r {
    93  						break
    94  					}
    95  				}
    96  			} else if len(variants) > 0 {
    97  				for _, variant := range variants {
    98  					if i.words[n] == variant {
    99  						return true
   100  					}
   101  				}
   102  			}
   103  		}
   104  	}
   105  
   106  	return false
   107  }
   108  
   109  //
   110  func (i *indexItem) findBin(query string, variants []string) bool {
   111  
   112  	if len(query) == 0 {
   113  		return false
   114  	}
   115  
   116  	low := 0
   117  	high := len(i.words) - 1
   118  
   119  	for low <= high {
   120  		median := (low + high) / 2
   121  		if i.words[median][0] < query[0] {
   122  			low = median + 1
   123  		} else {
   124  			high = median - 1
   125  		}
   126  	}
   127  
   128  	for low < len(i.words) && i.words[low][0] == query[0] {
   129  		if i.words[low] == query {
   130  			return true
   131  		} else if query[len(query)-1:] == tagAny {
   132  			for n := 0; n < len(query); n++ {
   133  				r := query[n]
   134  				if r == tagAnyRune {
   135  					return true
   136  				} else if len(i.words[low]) <= n || i.words[low][n] != r {
   137  					break
   138  				}
   139  			}
   140  		} else if len(variants) > 0 {
   141  			for _, variant := range variants {
   142  				if i.words[low] == variant {
   143  					return true
   144  				}
   145  			}
   146  		}
   147  		low++
   148  	}
   149  	return false
   150  }
   151  
   152  //
   153  type indexWord struct {
   154  	data      []*indexItem
   155  	binSearch bool
   156  }
   157  
   158  func (i *indexWord) FindAll(str string) []int {
   159  	words := strings.Split(strings.ToLower(str), ` `)
   160  	variants := make([]*variant, len(words))
   161  	for n, word := range words {
   162  		q, v := i.makeVariants(word)
   163  		vr := &variant{query: q, variants: v}
   164  		variants[n] = vr
   165  	}
   166  
   167  	result := make([]int, 0)
   168  	var offset = 0
   169  	for true {
   170  		i := i.findOff(variants, offset)
   171  		if i == emptyFind {
   172  			break
   173  		}
   174  		result = append(result, i)
   175  		offset = i + 1
   176  	}
   177  	return result
   178  }
   179  
   180  func (i *indexWord) FindOff(str string, offset int) int {
   181  	words := strings.Split(strings.ToLower(str), ` `)
   182  	variants := make([]*variant, len(words))
   183  	for n, word := range words {
   184  		q, v := i.makeVariants(word)
   185  		vr := &variant{query: q, variants: v}
   186  		variants[n] = vr
   187  	}
   188  	return i.findOff(variants, offset)
   189  }
   190  
   191  func (i *indexWord) findOff(variants []*variant, offset int) int {
   192  
   193  	for index := offset; index < len(i.data); index++ {
   194  		d := i.data[index]
   195  
   196  		for _, v := range variants {
   197  			if i.binSearch {
   198  				if ok := d.findBin(v.query, v.variants); ok {
   199  					return index
   200  				}
   201  			} else {
   202  				if ok := d.findInterpolation(v.query, v.variants); ok {
   203  					return index
   204  				}
   205  			}
   206  		}
   207  	}
   208  
   209  	return emptyFind
   210  }
   211  
   212  //
   213  func (i *indexWord) makeVariants(word string) (qWord string, variants []string) {
   214  	return makeVariants(word)
   215  }
   216  
   217  func makeVariants(word string) (string, []string) {
   218  	variants := make([]string, 0)
   219  
   220  	if len(word) > 0 && word[len(word)-1] == ')' {
   221  		base := make([]rune, 0)
   222  		start := false
   223  		variant := make([]rune, 0)
   224  		for _, r := range []rune(word) {
   225  			if r == tagAnyRune {
   226  				word = string(append(base, r))
   227  				variants = make([]string, 0)
   228  				break
   229  			}
   230  			if r == ')' {
   231  				variants = append(variants, string(variant))
   232  				break
   233  			} else if r == '(' {
   234  				start = true
   235  				variant = append(variant, base...)
   236  				variants = append(variants, string(variant))
   237  			} else if start && r == '|' {
   238  				variants = append(variants, string(variant))
   239  				variant = make([]rune, 0)
   240  				variant = append(variant, base...)
   241  			} else if start {
   242  				variant = append(variant, r)
   243  			} else {
   244  				base = append(base, r)
   245  			}
   246  		}
   247  	}
   248  	return word, variants
   249  }
   250  
   251  //
   252  func (i *indexWord) Add(str ...string) {
   253  	for _, s := range str {
   254  		words := strings.Split(strings.ToLower(s), ` `)
   255  
   256  		k := 0
   257  		for k < len(words) {
   258  			if len(words[k]) == 0 {
   259  				words = append(words[:k], words[k+1:]...)
   260  			} else {
   261  				k++
   262  			}
   263  		}
   264  
   265  		sort.Slice(words, func(i, j int) bool {
   266  			if words[i] < words[j] {
   267  				return true
   268  			}
   269  			return false
   270  		})
   271  
   272  		n := indexItem{words: words, document: s}
   273  		i.data = append(i.data, &n)
   274  	}
   275  }
   276  
   277  //
   278  func (i *indexWord) Find(str string) int {
   279  	return i.FindOff(str, 0)
   280  }
   281  
   282  //
   283  func (i *indexWord) DocumentAt(index int) (string, bool) {
   284  	if len(i.data) > index && index >= 0 {
   285  		return i.data[index].document, true
   286  	}
   287  	return ``, false
   288  }
   289  
   290  //
   291  func (i *indexWord) FindAt(index int, str string) bool {
   292  	if index < 0 || len(i.data) < index {
   293  		return false
   294  	}
   295  	words := strings.Split(strings.ToLower(str), ` `)
   296  	for _, word := range words {
   297  		query, variants := i.makeVariants(word)
   298  		if i.binSearch {
   299  			if ok := i.data[index].findBin(query, variants); ok {
   300  				return true
   301  			}
   302  		} else {
   303  			if ok := i.data[index].findInterpolation(query, variants); ok {
   304  				return true
   305  			}
   306  		}
   307  
   308  	}
   309  	return false
   310  }
   311  
   312  //
   313  func NewIndex() Index {
   314  	return &indexWord{data: make([]*indexItem, 0), binSearch: true}
   315  }
   316  
   317  //
   318  type indexWordSync struct {
   319  	indexWord
   320  	mx sync.RWMutex
   321  }
   322  
   323  func (i *indexWordSync) Add(str ...string) {
   324  	i.mx.Lock()
   325  	i.indexWord.Add(str...)
   326  	i.mx.Unlock()
   327  }
   328  
   329  func (i *indexWordSync) Find(str string) int {
   330  	i.mx.RLock()
   331  	defer i.mx.RUnlock()
   332  	return i.indexWord.Find(str)
   333  }
   334  
   335  func (i *indexWordSync) FindOff(str string, offset int) int {
   336  	i.mx.RLock()
   337  	defer i.mx.RUnlock()
   338  	return i.indexWord.FindOff(str, offset)
   339  }
   340  
   341  func (i *indexWordSync) DocumentAt(index int) (string, bool) {
   342  	i.mx.RLock()
   343  	defer i.mx.RUnlock()
   344  	return i.indexWord.DocumentAt(index)
   345  }
   346  
   347  func (i *indexWordSync) FindAt(index int, str string) bool {
   348  	i.mx.RLock()
   349  	defer i.mx.RUnlock()
   350  	return i.indexWord.FindAt(index, str)
   351  }
   352  
   353  func (i *indexWordSync) FindAll(str string) []int {
   354  	i.mx.RLock()
   355  	defer i.mx.RUnlock()
   356  	return i.indexWord.FindAll(str)
   357  }
   358  
   359  //
   360  func NewIndexSync() Index {
   361  	return &indexWordSync{indexWord: indexWord{data: make([]*indexItem, 0), binSearch: true}}
   362  }