github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/test_framework/analysis/mockAnalyzer.go

github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/test_framework/analysis/mockAnalyzer.go (about)

     1  package analysis
     2  
     3  import (
     4  	ca "github.com/balzaczyy/golucene/core/analysis"
     5  	auto "github.com/balzaczyy/golucene/core/util/automaton"
     6  	"math/rand"
     7  )
     8  
     9  // analysis/MockAnalyzer.java
    10  
    11  /*
    12  Analyzer for testing
    13  
    14  This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers
    15  for unit tests. If you are testing a custom component such as a
    16  queryparser or analyzer-wrapper that consumes analysis streams, it's
    17  a great idea to test it with the anlyzer instead. MockAnalyzer as the
    18  following behavior:
    19  
    20  1. By default, the assertions in MockTokenizer are tured on for extra
    21  checks that the consumer is consuming properly. These checks can be
    22  disabled with SetEnableChecks(bool).
    23  2. Payload data is randomly injected into the streams for more
    24  thorough testing of payloads.
    25  */
    26  type MockAnalyzer struct {
    27  	*ca.AnalyzerImpl
    28  	runAutomaton         *auto.CharacterRunAutomaton
    29  	lowerCase            bool
    30  	filter               *auto.CharacterRunAutomaton
    31  	positionIncrementgap int
    32  	random               *rand.Rand
    33  	previousMappings     map[string]int
    34  	enableChecks         bool
    35  	maxTokenLength       int
    36  }
    37  
    38  // Creates a new MockAnalyzer.
    39  func NewMockAnalyzer(r *rand.Rand, runAutomaton *auto.CharacterRunAutomaton, lowerCase bool, filter *auto.CharacterRunAutomaton) *MockAnalyzer {
    40  	return &MockAnalyzer{
    41  		AnalyzerImpl: ca.NewAnalyzerWithStrategy(ca.PER_FIELD_REUSE_STRATEGY),
    42  		// TODO: this should be solved in a different way; Random should not be shared (!)
    43  		random:           rand.New(rand.NewSource(r.Int63())),
    44  		runAutomaton:     runAutomaton,
    45  		lowerCase:        lowerCase,
    46  		filter:           filter,
    47  		previousMappings: make(map[string]int),
    48  		enableChecks:     true,
    49  		maxTokenLength:   DEFAULT_MAX_TOKEN_LENGTH,
    50  	}
    51  }
    52  
    53  func NewMockAnalyzer3(r *rand.Rand, runAutomation *auto.CharacterRunAutomaton, lowerCase bool) *MockAnalyzer {
    54  	return NewMockAnalyzer(r, runAutomation, lowerCase, EMPTY_STOPSET)
    55  }
    56  
    57  // Creates a Whitespace-lowercasing analyzer with no stopwords removal.
    58  func NewMockAnalyzerWithRandom(r *rand.Rand) *MockAnalyzer {
    59  	return NewMockAnalyzer3(r, WHITESPACE, true)
    60  }
    61  
    62  // analysis/MockTokenFilter.java
    63  
    64  var EMPTY_STOPSET = auto.NewCharacterRunAutomaton(auto.MakeEmpty())
    65  
    66  // analysis/MockTokenizer.java
    67  
    68  /*
    69  Tokenizer for testing.
    70  
    71  This tokenizer is a replacement for WHITESPACE, SIMPLE, and KEYWORD
    72  tokenizers. If you are writing a component such as a TokenFilter,
    73  it's a great idea to test it wrapping this tokenizer instead for
    74  extra checks. This tokenizer has the following behavior:
    75  
    76  1. An internal state-machine is used for checking consumer
    77  consistency. These checks can be disabled with DisableChecks(bool).
    78  2. For convenience, optionally lowercases terms that it outputs.
    79  */
    80  type MockTokenizer struct {
    81  }
    82  
    83  // Acts Similar to WhitespaceTokenizer
    84  var WHITESPACE = auto.NewCharacterRunAutomaton(auto.NewRegExp("[^ \t\r\n]+").ToAutomaton())