github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/analysis/core/stop.go (about) 1 package core 2 3 import ( 4 . "github.com/balzaczyy/golucene/analysis/util" 5 . "github.com/balzaczyy/golucene/core/analysis" 6 . "github.com/balzaczyy/golucene/core/analysis/tokenattributes" 7 "github.com/balzaczyy/golucene/core/util" 8 ) 9 10 // core/StopAnalyzer.java 11 12 /* An unmodifiable set containing some common English words that are not usually useful for searching. */ 13 var ENGLISH_STOP_WORDS_SET = map[string]bool{ 14 "a": true, "an": true, "and": true, "are": true, "as": true, "at": true, "be": true, "but": true, "by": true, 15 "for": true, "if": true, "in": true, "into": true, "is": true, "it": true, 16 "no": true, "not": true, "of": true, "on": true, "or": true, "such": true, 17 "that": true, "the": true, "their": true, "then": true, "there": true, "these": true, 18 "they": true, "this": true, "to": true, "was": true, "will": true, "with": true, 19 } 20 21 // core/StopFilter.java 22 23 /* 24 Removes stop words from a token stream. 25 26 You may specify the Version 27 compatibility when creating StopFilter: 28 29 - As of 3.1, StopFilter correctly handles Unicode 4.0 supplementary 30 characters in stopwords and position increments are preserved 31 */ 32 type StopFilter struct { 33 *FilteringTokenFilter 34 stopWords map[string]bool 35 termAtt CharTermAttribute 36 } 37 38 /* 39 Constructs a filter which removes words from the input TokenStream 40 that are named in the Set. 41 */ 42 func NewStopFilter(matchVersion util.Version, 43 in TokenStream, stopWords map[string]bool) *StopFilter { 44 45 ans := &StopFilter{stopWords: stopWords} 46 ans.FilteringTokenFilter = NewFilteringTokenFilter(ans, matchVersion, in) 47 ans.termAtt = ans.Attributes().Add("CharTermAttribute").(CharTermAttribute) 48 return ans 49 } 50 51 func (f *StopFilter) Accept() bool { 52 term := string(f.termAtt.Buffer()[:f.termAtt.Length()]) 53 _, ok := f.stopWords[term] 54 return !ok 55 }