github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/analysis/core/stop.go (about)

     1  package core
     2  
     3  import (
     4  	. "github.com/balzaczyy/golucene/analysis/util"
     5  	. "github.com/balzaczyy/golucene/core/analysis"
     6  	. "github.com/balzaczyy/golucene/core/analysis/tokenattributes"
     7  	"github.com/balzaczyy/golucene/core/util"
     8  )
     9  
    10  // core/StopAnalyzer.java
    11  
    12  /* An unmodifiable set containing some common English words that are not usually useful for searching. */
    13  var ENGLISH_STOP_WORDS_SET = map[string]bool{
    14  	"a": true, "an": true, "and": true, "are": true, "as": true, "at": true, "be": true, "but": true, "by": true,
    15  	"for": true, "if": true, "in": true, "into": true, "is": true, "it": true,
    16  	"no": true, "not": true, "of": true, "on": true, "or": true, "such": true,
    17  	"that": true, "the": true, "their": true, "then": true, "there": true, "these": true,
    18  	"they": true, "this": true, "to": true, "was": true, "will": true, "with": true,
    19  }
    20  
    21  // core/StopFilter.java
    22  
    23  /*
    24  Removes stop words from a token stream.
    25  
    26  You may specify the Version
    27  compatibility when creating StopFilter:
    28  
    29  	- As of 3.1, StopFilter correctly handles Unicode 4.0 supplementary
    30  	characters in stopwords and position increments are preserved
    31  */
    32  type StopFilter struct {
    33  	*FilteringTokenFilter
    34  	stopWords map[string]bool
    35  	termAtt   CharTermAttribute
    36  }
    37  
    38  /*
    39  Constructs a filter which removes words from the input TokenStream
    40  that are named in the Set.
    41  */
    42  func NewStopFilter(matchVersion util.Version,
    43  	in TokenStream, stopWords map[string]bool) *StopFilter {
    44  
    45  	ans := &StopFilter{stopWords: stopWords}
    46  	ans.FilteringTokenFilter = NewFilteringTokenFilter(ans, matchVersion, in)
    47  	ans.termAtt = ans.Attributes().Add("CharTermAttribute").(CharTermAttribute)
    48  	return ans
    49  }
    50  
    51  func (f *StopFilter) Accept() bool {
    52  	term := string(f.termAtt.Buffer()[:f.termAtt.Length()])
    53  	_, ok := f.stopWords[term]
    54  	return !ok
    55  }