github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/test_framework/analysis/mockAnalyzer.go (about) 1 package analysis 2 3 import ( 4 ca "github.com/balzaczyy/golucene/core/analysis" 5 auto "github.com/balzaczyy/golucene/core/util/automaton" 6 "math/rand" 7 ) 8 9 // analysis/MockAnalyzer.java 10 11 /* 12 Analyzer for testing 13 14 This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers 15 for unit tests. If you are testing a custom component such as a 16 queryparser or analyzer-wrapper that consumes analysis streams, it's 17 a great idea to test it with the anlyzer instead. MockAnalyzer as the 18 following behavior: 19 20 1. By default, the assertions in MockTokenizer are tured on for extra 21 checks that the consumer is consuming properly. These checks can be 22 disabled with SetEnableChecks(bool). 23 2. Payload data is randomly injected into the streams for more 24 thorough testing of payloads. 25 */ 26 type MockAnalyzer struct { 27 *ca.AnalyzerImpl 28 runAutomaton *auto.CharacterRunAutomaton 29 lowerCase bool 30 filter *auto.CharacterRunAutomaton 31 positionIncrementgap int 32 random *rand.Rand 33 previousMappings map[string]int 34 enableChecks bool 35 maxTokenLength int 36 } 37 38 // Creates a new MockAnalyzer. 39 func NewMockAnalyzer(r *rand.Rand, runAutomaton *auto.CharacterRunAutomaton, lowerCase bool, filter *auto.CharacterRunAutomaton) *MockAnalyzer { 40 return &MockAnalyzer{ 41 AnalyzerImpl: ca.NewAnalyzerWithStrategy(ca.PER_FIELD_REUSE_STRATEGY), 42 // TODO: this should be solved in a different way; Random should not be shared (!) 43 random: rand.New(rand.NewSource(r.Int63())), 44 runAutomaton: runAutomaton, 45 lowerCase: lowerCase, 46 filter: filter, 47 previousMappings: make(map[string]int), 48 enableChecks: true, 49 maxTokenLength: DEFAULT_MAX_TOKEN_LENGTH, 50 } 51 } 52 53 func NewMockAnalyzer3(r *rand.Rand, runAutomation *auto.CharacterRunAutomaton, lowerCase bool) *MockAnalyzer { 54 return NewMockAnalyzer(r, runAutomation, lowerCase, EMPTY_STOPSET) 55 } 56 57 // Creates a Whitespace-lowercasing analyzer with no stopwords removal. 58 func NewMockAnalyzerWithRandom(r *rand.Rand) *MockAnalyzer { 59 return NewMockAnalyzer3(r, WHITESPACE, true) 60 } 61 62 // analysis/MockTokenFilter.java 63 64 var EMPTY_STOPSET = auto.NewCharacterRunAutomaton(auto.MakeEmpty()) 65 66 // analysis/MockTokenizer.java 67 68 /* 69 Tokenizer for testing. 70 71 This tokenizer is a replacement for WHITESPACE, SIMPLE, and KEYWORD 72 tokenizers. If you are writing a component such as a TokenFilter, 73 it's a great idea to test it wrapping this tokenizer instead for 74 extra checks. This tokenizer has the following behavior: 75 76 1. An internal state-machine is used for checking consumer 77 consistency. These checks can be disabled with DisableChecks(bool). 78 2. For convenience, optionally lowercases terms that it outputs. 79 */ 80 type MockTokenizer struct { 81 } 82 83 // Acts Similar to WhitespaceTokenizer 84 var WHITESPACE = auto.NewCharacterRunAutomaton(auto.NewRegExp("[^ \t\r\n]+").ToAutomaton())