github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/tsearch/stopwords.go (about) 1 // Copyright 2022 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tsearch 12 13 import ( 14 "bytes" 15 "embed" 16 "path" 17 "strings" 18 ) 19 20 //go:embed stopwords/* 21 var stopwordFS embed.FS 22 23 var stopwordsMap map[string]map[string]struct{} 24 25 func init() { 26 stopwordsMap = make(map[string]map[string]struct{}) 27 dir, err := stopwordFS.ReadDir("stopwords") 28 if err != nil { 29 panic("error loading stopwords: " + err.Error()) 30 } 31 for _, f := range dir { 32 filename := f.Name() 33 name := strings.TrimSuffix(filename, ".stop") 34 // N.B. we use path.Join here instead of filepath.Join because go:embed 35 // always uses forward slashes. https://github.com/golang/go/issues/45230 36 contents, err := stopwordFS.ReadFile(path.Join("stopwords", filename)) 37 if err != nil { 38 panic("error loading stopwords: " + err.Error()) 39 } 40 wordList := bytes.Fields(contents) 41 stopwordsMap[name] = make(map[string]struct{}, len(wordList)) 42 for _, word := range wordList { 43 stopwordsMap[name][string(word)] = struct{}{} 44 } 45 } 46 // The simple text search config has no stopwords. 47 stopwordsMap["simple"] = nil 48 }