github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/analysis/standard/analyzer.go (about) 1 package standard 2 3 import ( 4 . "github.com/balzaczyy/golucene/analysis/core" 5 . "github.com/balzaczyy/golucene/analysis/util" 6 . "github.com/balzaczyy/golucene/core/analysis" 7 "io" 8 ) 9 10 // standard/StandardAnalyzer.java 11 12 /* Default maximum allowed token length */ 13 const DEFAULT_MAX_TOKEN_LENGTH = 255 14 15 /* An unmodifiable set containing some common English words that are usually not useful for searching */ 16 var STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET 17 18 /* 19 Filters StandardTokenizer with StandardFilter, LowerCaseFilter and 20 StopFilter, using a list of English stop words. 21 22 You may specify the Version 23 compatibility when creating StandardAnalyzer: 24 25 - GoLucene supports 4.5+ only. 26 */ 27 type StandardAnalyzer struct { 28 *StopwordAnalyzerBase 29 stopWordSet map[string]bool 30 maxTokenLength int 31 } 32 33 /* Builds an analyzer with the given stop words. */ 34 func NewStandardAnalyzerWithStopWords(stopWords map[string]bool) *StandardAnalyzer { 35 ans := &StandardAnalyzer{ 36 stopWordSet: stopWords, 37 maxTokenLength: DEFAULT_MAX_TOKEN_LENGTH, 38 } 39 ans.StopwordAnalyzerBase = NewStopwordAnalyzerBaseWithStopWords(stopWords) 40 ans.Spi = ans 41 return ans 42 } 43 44 /* Buils an analyzer with the default stop words (STOP_WORDS_SET). */ 45 func NewStandardAnalyzer() *StandardAnalyzer { 46 return NewStandardAnalyzerWithStopWords(STOP_WORDS_SET) 47 } 48 49 func (a *StandardAnalyzer) CreateComponents(fieldName string, reader io.RuneReader) *TokenStreamComponents { 50 version := a.Version() 51 src := newStandardTokenizer(version, reader) 52 src.maxTokenLength = a.maxTokenLength 53 var tok TokenStream = newStandardFilter(version, src) 54 tok = NewLowerCaseFilter(version, tok) 55 tok = NewStopFilter(version, tok, a.stopWordSet) 56 ans := NewTokenStreamComponents(src, tok) 57 super := ans.SetReader 58 ans.SetReader = func(reader io.RuneReader) error { 59 src.maxTokenLength = a.maxTokenLength 60 return super(reader) 61 } 62 return ans 63 }