github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/analysis/tokenizer.go (about) 1 package analysis 2 3 import ( 4 "io" 5 ) 6 7 // analysis/Tokenizer.java 8 9 /* 10 A Tokenizer is a TokenStream whose input is a Reader. 11 12 This is an abstract class; subclasses must override IncrementToken() 13 14 NOTE: Subclasses overriding IncrementToken() must call 15 Attributes().ClearAttributes() before setting attributes. 16 */ 17 type Tokenizer struct { 18 *TokenStreamImpl 19 // The text source for this Tokenizer 20 Input io.RuneReader 21 // Pending reader: not actually assigned to input until reset() 22 inputPending io.RuneReader 23 } 24 25 /* Constructs a token stream processing the given input. */ 26 func NewTokenizer(input io.RuneReader) *Tokenizer { 27 assert2(input != nil, "input must not be nil") 28 return &Tokenizer{ 29 TokenStreamImpl: NewTokenStream(), 30 inputPending: input, 31 Input: ILLEGAL_STATE_READER, 32 } 33 } 34 35 func (t *Tokenizer) Close() error { 36 if v, ok := t.Input.(io.Closer); ok { 37 err := v.Close() 38 if err != nil { 39 return err 40 } 41 } // optional close 42 t.inputPending = ILLEGAL_STATE_READER 43 t.Input = ILLEGAL_STATE_READER 44 return nil 45 } 46 47 /* 48 Return the corrected offset. If input is a CharFilter subclass, this 49 method calls CharFilter.correctOffset(), else returns currentOff. 50 */ 51 func (t *Tokenizer) CorrectOffset(currentOff int) int { 52 assert2(t.Input != nil, "this tokenizer is closed") 53 if v, ok := t.Input.(CharFilterService); ok { 54 return v.CorrectOffset(currentOff) 55 } 56 return currentOff 57 } 58 59 /* 60 Expert: Set a new reader on the Tokenizer. Typically, an analyzer (in 61 its tokenStream method) will use this to re-use a previously created 62 tokenizer. 63 */ 64 func (t *Tokenizer) SetReader(input io.RuneReader) error { 65 assert2(input != nil, "input must not be nil") 66 assert2(t.Input == ILLEGAL_STATE_READER, "TokenStream contract violation: close() call missing") 67 t.inputPending = input 68 return nil 69 } 70 71 func (t *Tokenizer) Reset() error { 72 t.Input = t.inputPending 73 t.inputPending = ILLEGAL_STATE_READER 74 return nil 75 } 76 77 var ILLEGAL_STATE_READER = new(illegalStateReader) 78 79 type illegalStateReader struct{} 80 81 func (r *illegalStateReader) ReadRune() (rune, int, error) { 82 panic("TokenStream contract violation: reset()/close() call missing, " + 83 "reset() called multiple times, or subclass does not call super.reset(). " + 84 "Please see Javadocs of TokenStream class for more information about the correct consuming workflow.") 85 } 86 87 func (r *illegalStateReader) String() string { return "ILLEGAL_STATE_READRE" }