github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/stopwords/detector.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package stopwords 13 14 import ( 15 "sync" 16 17 "github.com/weaviate/weaviate/entities/models" 18 19 "github.com/pkg/errors" 20 ) 21 22 type StopwordDetector interface { 23 IsStopword(string) bool 24 } 25 26 type Detector struct { 27 sync.Mutex 28 stopwords map[string]struct{} 29 } 30 31 func NewDetectorFromConfig(config models.StopwordConfig) (*Detector, error) { 32 d, err := NewDetectorFromPreset(config.Preset) 33 if err != nil { 34 return nil, errors.Wrap(err, "failed to create new detector from config") 35 } 36 37 d.SetAdditions(config.Additions) 38 d.SetRemovals(config.Removals) 39 40 return d, nil 41 } 42 43 func NewDetectorFromPreset(preset string) (*Detector, error) { 44 var list []string 45 var ok bool 46 47 if preset != "" { 48 list, ok = Presets[preset] 49 if !ok { 50 return nil, errors.Errorf("preset %q not known to stopword detector", preset) 51 } 52 } 53 54 d := &Detector{ 55 stopwords: map[string]struct{}{}, 56 } 57 58 for _, word := range list { 59 d.stopwords[word] = struct{}{} 60 } 61 62 return d, nil 63 } 64 65 func (d *Detector) SetAdditions(additions []string) { 66 d.Lock() 67 defer d.Unlock() 68 69 for _, add := range additions { 70 d.stopwords[add] = struct{}{} 71 } 72 } 73 74 func (d *Detector) SetRemovals(removals []string) { 75 d.Lock() 76 defer d.Unlock() 77 78 for _, rem := range removals { 79 delete(d.stopwords, rem) 80 } 81 } 82 83 func (d *Detector) IsStopword(word string) bool { 84 d.Lock() 85 defer d.Unlock() 86 87 _, ok := d.stopwords[word] 88 return ok 89 }