github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/tok/stopwords.go (about) 1 /* 2 * Copyright 2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package tok 18 19 import ( 20 "github.com/blevesearch/bleve/analysis" 21 _ "github.com/blevesearch/bleve/analysis/lang/ar" // Needed for bleve language support. 22 _ "github.com/blevesearch/bleve/analysis/lang/bg" 23 _ "github.com/blevesearch/bleve/analysis/lang/ca" 24 _ "github.com/blevesearch/bleve/analysis/lang/ckb" 25 _ "github.com/blevesearch/bleve/analysis/lang/cs" 26 _ "github.com/blevesearch/bleve/analysis/lang/da" 27 _ "github.com/blevesearch/bleve/analysis/lang/de" 28 _ "github.com/blevesearch/bleve/analysis/lang/el" 29 _ "github.com/blevesearch/bleve/analysis/lang/en" 30 _ "github.com/blevesearch/bleve/analysis/lang/es" 31 _ "github.com/blevesearch/bleve/analysis/lang/eu" 32 _ "github.com/blevesearch/bleve/analysis/lang/fa" 33 _ "github.com/blevesearch/bleve/analysis/lang/fi" 34 _ "github.com/blevesearch/bleve/analysis/lang/fr" 35 _ "github.com/blevesearch/bleve/analysis/lang/ga" 36 _ "github.com/blevesearch/bleve/analysis/lang/gl" 37 _ "github.com/blevesearch/bleve/analysis/lang/hi" 38 _ "github.com/blevesearch/bleve/analysis/lang/hu" 39 _ "github.com/blevesearch/bleve/analysis/lang/hy" 40 _ "github.com/blevesearch/bleve/analysis/lang/id" 41 _ "github.com/blevesearch/bleve/analysis/lang/it" 42 _ "github.com/blevesearch/bleve/analysis/lang/nl" 43 _ "github.com/blevesearch/bleve/analysis/lang/no" 44 _ "github.com/blevesearch/bleve/analysis/lang/pt" 45 _ "github.com/blevesearch/bleve/analysis/lang/ro" 46 _ "github.com/blevesearch/bleve/analysis/lang/ru" 47 _ "github.com/blevesearch/bleve/analysis/lang/sv" 48 _ "github.com/blevesearch/bleve/analysis/lang/tr" 49 "github.com/golang/glog" 50 ) 51 52 var langStops = map[string]string{ 53 "ar": "stop_ar", 54 "bg": "stop_bg", 55 "ca": "stop_ca", 56 "ckb": "stop_ckb", 57 "cs": "stop_cs", 58 "da": "stop_da", 59 "de": "stop_de", 60 "el": "stop_el", 61 "en": "stop_en", 62 "es": "stop_es", 63 "eu": "stop_eu", 64 "fa": "stop_fa", 65 "fi": "stop_fi", 66 "fr": "stop_fr", 67 "ga": "stop_ga", 68 "gl": "stop_gl", 69 "hi": "stop_hi", 70 "hu": "stop_hu", 71 "hy": "stop_hy", 72 "id": "stop_id", 73 "it": "stop_it", 74 "nl": "stop_nl", 75 "no": "stop_no", 76 "pt": "stop_pt", 77 "ro": "stop_ro", 78 "ru": "stop_ru", 79 "sv": "stop_sv", 80 "tr": "stop_tr", 81 } 82 83 // filterStopwords filters stop words using an existing filter, imported here. 84 // If the lang filter is found, the we will forward requests to it. 85 // Returns filtered tokens if filter is found, otherwise returns tokens unmodified. 86 func filterStopwords(lang string, input analysis.TokenStream) analysis.TokenStream { 87 if len(input) == 0 { 88 return input 89 } 90 // check if we have stop words filter for this lang. 91 name, ok := langStops[lang] 92 if !ok { 93 return input 94 } 95 // get filter from concurrent cache so we dont recreate. 96 filter, err := bleveCache.TokenFilterNamed(name) 97 if err != nil { 98 glog.Errorf("Error while filtering %q stop words: %s", lang, err) 99 return input 100 } 101 if filter != nil { 102 return filter.Filter(input) 103 } 104 return input 105 }