github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/stopwords/detector.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package stopwords
    13  
    14  import (
    15  	"sync"
    16  
    17  	"github.com/weaviate/weaviate/entities/models"
    18  
    19  	"github.com/pkg/errors"
    20  )
    21  
    22  type StopwordDetector interface {
    23  	IsStopword(string) bool
    24  }
    25  
    26  type Detector struct {
    27  	sync.Mutex
    28  	stopwords map[string]struct{}
    29  }
    30  
    31  func NewDetectorFromConfig(config models.StopwordConfig) (*Detector, error) {
    32  	d, err := NewDetectorFromPreset(config.Preset)
    33  	if err != nil {
    34  		return nil, errors.Wrap(err, "failed to create new detector from config")
    35  	}
    36  
    37  	d.SetAdditions(config.Additions)
    38  	d.SetRemovals(config.Removals)
    39  
    40  	return d, nil
    41  }
    42  
    43  func NewDetectorFromPreset(preset string) (*Detector, error) {
    44  	var list []string
    45  	var ok bool
    46  
    47  	if preset != "" {
    48  		list, ok = Presets[preset]
    49  		if !ok {
    50  			return nil, errors.Errorf("preset %q not known to stopword detector", preset)
    51  		}
    52  	}
    53  
    54  	d := &Detector{
    55  		stopwords: map[string]struct{}{},
    56  	}
    57  
    58  	for _, word := range list {
    59  		d.stopwords[word] = struct{}{}
    60  	}
    61  
    62  	return d, nil
    63  }
    64  
    65  func (d *Detector) SetAdditions(additions []string) {
    66  	d.Lock()
    67  	defer d.Unlock()
    68  
    69  	for _, add := range additions {
    70  		d.stopwords[add] = struct{}{}
    71  	}
    72  }
    73  
    74  func (d *Detector) SetRemovals(removals []string) {
    75  	d.Lock()
    76  	defer d.Unlock()
    77  
    78  	for _, rem := range removals {
    79  		delete(d.stopwords, rem)
    80  	}
    81  }
    82  
    83  func (d *Detector) IsStopword(word string) bool {
    84  	d.Lock()
    85  	defer d.Unlock()
    86  
    87  	_, ok := d.stopwords[word]
    88  	return ok
    89  }