github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/analysis/standard/analyzer.go (about)

     1  package standard
     2  
     3  import (
     4  	. "github.com/balzaczyy/golucene/analysis/core"
     5  	. "github.com/balzaczyy/golucene/analysis/util"
     6  	. "github.com/balzaczyy/golucene/core/analysis"
     7  	"io"
     8  )
     9  
    10  // standard/StandardAnalyzer.java
    11  
    12  /* Default maximum allowed token length */
    13  const DEFAULT_MAX_TOKEN_LENGTH = 255
    14  
    15  /* An unmodifiable set containing some common English words that are usually not useful for searching */
    16  var STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET
    17  
    18  /*
    19  Filters StandardTokenizer with StandardFilter, LowerCaseFilter and
    20  StopFilter, using a list of English stop words.
    21  
    22  You may specify the Version
    23  compatibility when creating StandardAnalyzer:
    24  
    25  	- GoLucene supports 4.5+ only.
    26  */
    27  type StandardAnalyzer struct {
    28  	*StopwordAnalyzerBase
    29  	stopWordSet    map[string]bool
    30  	maxTokenLength int
    31  }
    32  
    33  /* Builds an analyzer with the given stop words. */
    34  func NewStandardAnalyzerWithStopWords(stopWords map[string]bool) *StandardAnalyzer {
    35  	ans := &StandardAnalyzer{
    36  		stopWordSet:    stopWords,
    37  		maxTokenLength: DEFAULT_MAX_TOKEN_LENGTH,
    38  	}
    39  	ans.StopwordAnalyzerBase = NewStopwordAnalyzerBaseWithStopWords(stopWords)
    40  	ans.Spi = ans
    41  	return ans
    42  }
    43  
    44  /* Buils an analyzer with the default stop words (STOP_WORDS_SET). */
    45  func NewStandardAnalyzer() *StandardAnalyzer {
    46  	return NewStandardAnalyzerWithStopWords(STOP_WORDS_SET)
    47  }
    48  
    49  func (a *StandardAnalyzer) CreateComponents(fieldName string, reader io.RuneReader) *TokenStreamComponents {
    50  	version := a.Version()
    51  	src := newStandardTokenizer(version, reader)
    52  	src.maxTokenLength = a.maxTokenLength
    53  	var tok TokenStream = newStandardFilter(version, src)
    54  	tok = NewLowerCaseFilter(version, tok)
    55  	tok = NewStopFilter(version, tok, a.stopWordSet)
    56  	ans := NewTokenStreamComponents(src, tok)
    57  	super := ans.SetReader
    58  	ans.SetReader = func(reader io.RuneReader) error {
    59  		src.maxTokenLength = a.maxTokenLength
    60  		return super(reader)
    61  	}
    62  	return ans
    63  }