github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/tok/stemmers.go (about) 1 /* 2 * Copyright 2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package tok 18 19 import ( 20 "github.com/blevesearch/bleve/analysis" 21 _ "github.com/blevesearch/bleve/analysis/lang/ar" // Needed for bleve language support. 22 _ "github.com/blevesearch/bleve/analysis/lang/cjk" 23 _ "github.com/blevesearch/bleve/analysis/lang/ckb" 24 _ "github.com/blevesearch/bleve/analysis/lang/da" 25 _ "github.com/blevesearch/bleve/analysis/lang/de" 26 _ "github.com/blevesearch/bleve/analysis/lang/es" 27 _ "github.com/blevesearch/bleve/analysis/lang/fi" 28 _ "github.com/blevesearch/bleve/analysis/lang/fr" 29 _ "github.com/blevesearch/bleve/analysis/lang/hi" 30 _ "github.com/blevesearch/bleve/analysis/lang/hu" 31 _ "github.com/blevesearch/bleve/analysis/lang/it" 32 _ "github.com/blevesearch/bleve/analysis/lang/nl" 33 _ "github.com/blevesearch/bleve/analysis/lang/no" 34 _ "github.com/blevesearch/bleve/analysis/lang/pt" 35 _ "github.com/blevesearch/bleve/analysis/lang/ro" 36 _ "github.com/blevesearch/bleve/analysis/lang/ru" 37 _ "github.com/blevesearch/bleve/analysis/lang/sv" 38 _ "github.com/blevesearch/bleve/analysis/lang/tr" 39 _ "github.com/blevesearch/bleve/analysis/token/porter" 40 "github.com/golang/glog" 41 ) 42 43 var langStemmers = map[string]string{ 44 "ar": "stemmer_ar", 45 "ckb": "stemmer_ckb", 46 "da": "stemmer_da_snowball", 47 "de": "stemmer_de_light", 48 "en": "stemmer_porter", 49 "es": "stemmer_es_light", 50 "fi": "stemmer_fi_snowball", 51 "fr": "stemmer_fr_light", 52 "hi": "stemmer_hi", 53 "hu": "stemmer_hu_snowball", 54 "it": "stemmer_it_light", 55 "ja": "cjk_bigram", 56 "ko": "cjk_bigram", 57 "nl": "stemmer_nl_snowball", 58 "no": "stemmer_no_snowball", 59 "pt": "stemmer_pt_light", 60 "ro": "stemmer_ro_snowball", 61 "ru": "stemmer_ru_snowball", 62 "sv": "stemmer_sv_snowball", 63 "tr": "stemmer_tr_snowball", 64 "zh": "cjk_bigram", 65 } 66 67 // filterStemmers filters stems using an existing filter, imported here. 68 // If the lang filter is found, the we will forward requests to it. 69 // Returns filtered tokens if filter is found, otherwise returns tokens unmodified. 70 func filterStemmers(lang string, input analysis.TokenStream) analysis.TokenStream { 71 if len(input) == 0 { 72 return input 73 } 74 // check if we have stemmer filter for this lang. 75 name, ok := langStemmers[lang] 76 if !ok { 77 return input 78 } 79 // get filter from concurrent cache so we dont recreate. 80 filter, err := bleveCache.TokenFilterNamed(name) 81 if err != nil { 82 glog.Errorf("Error while filtering %q stems: %s", lang, err) 83 return input 84 } 85 if filter != nil { 86 return filter.Filter(input) 87 } 88 return input 89 }