github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/tsearch/stopwords.go (about)

     1  // Copyright 2022 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tsearch
    12  
    13  import (
    14  	"bytes"
    15  	"embed"
    16  	"path"
    17  	"strings"
    18  )
    19  
    20  //go:embed stopwords/*
    21  var stopwordFS embed.FS
    22  
    23  var stopwordsMap map[string]map[string]struct{}
    24  
    25  func init() {
    26  	stopwordsMap = make(map[string]map[string]struct{})
    27  	dir, err := stopwordFS.ReadDir("stopwords")
    28  	if err != nil {
    29  		panic("error loading stopwords: " + err.Error())
    30  	}
    31  	for _, f := range dir {
    32  		filename := f.Name()
    33  		name := strings.TrimSuffix(filename, ".stop")
    34  		// N.B. we use path.Join here instead of filepath.Join because go:embed
    35  		// always uses forward slashes. https://github.com/golang/go/issues/45230
    36  		contents, err := stopwordFS.ReadFile(path.Join("stopwords", filename))
    37  		if err != nil {
    38  			panic("error loading stopwords: " + err.Error())
    39  		}
    40  		wordList := bytes.Fields(contents)
    41  		stopwordsMap[name] = make(map[string]struct{}, len(wordList))
    42  		for _, word := range wordList {
    43  			stopwordsMap[name][string(word)] = struct{}{}
    44  		}
    45  	}
    46  	// The simple text search config has no stopwords.
    47  	stopwordsMap["simple"] = nil
    48  }