github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/analysis/tokenizer.go (about)

     1  package analysis
     2  
     3  import (
     4  	"io"
     5  )
     6  
     7  // analysis/Tokenizer.java
     8  
     9  /*
    10  A Tokenizer is a TokenStream whose input is a Reader.
    11  
    12  This is an abstract class; subclasses must override IncrementToken()
    13  
    14  NOTE: Subclasses overriding IncrementToken() must call
    15  Attributes().ClearAttributes() before setting attributes.
    16  */
    17  type Tokenizer struct {
    18  	*TokenStreamImpl
    19  	// The text source for this Tokenizer
    20  	Input io.RuneReader
    21  	// Pending reader: not actually assigned to input until reset()
    22  	inputPending io.RuneReader
    23  }
    24  
    25  /* Constructs a token stream processing the given input. */
    26  func NewTokenizer(input io.RuneReader) *Tokenizer {
    27  	assert2(input != nil, "input must not be nil")
    28  	return &Tokenizer{
    29  		TokenStreamImpl: NewTokenStream(),
    30  		inputPending:    input,
    31  		Input:           ILLEGAL_STATE_READER,
    32  	}
    33  }
    34  
    35  func (t *Tokenizer) Close() error {
    36  	if v, ok := t.Input.(io.Closer); ok {
    37  		err := v.Close()
    38  		if err != nil {
    39  			return err
    40  		}
    41  	} // optional close
    42  	t.inputPending = ILLEGAL_STATE_READER
    43  	t.Input = ILLEGAL_STATE_READER
    44  	return nil
    45  }
    46  
    47  /*
    48  Return the corrected offset. If input is a CharFilter subclass, this
    49  method calls CharFilter.correctOffset(), else returns currentOff.
    50  */
    51  func (t *Tokenizer) CorrectOffset(currentOff int) int {
    52  	assert2(t.Input != nil, "this tokenizer is closed")
    53  	if v, ok := t.Input.(CharFilterService); ok {
    54  		return v.CorrectOffset(currentOff)
    55  	}
    56  	return currentOff
    57  }
    58  
    59  /*
    60  Expert: Set a new reader on the Tokenizer. Typically, an analyzer (in
    61  its tokenStream method) will use this to re-use a previously created
    62  tokenizer.
    63  */
    64  func (t *Tokenizer) SetReader(input io.RuneReader) error {
    65  	assert2(input != nil, "input must not be nil")
    66  	assert2(t.Input == ILLEGAL_STATE_READER, "TokenStream contract violation: close() call missing")
    67  	t.inputPending = input
    68  	return nil
    69  }
    70  
    71  func (t *Tokenizer) Reset() error {
    72  	t.Input = t.inputPending
    73  	t.inputPending = ILLEGAL_STATE_READER
    74  	return nil
    75  }
    76  
    77  var ILLEGAL_STATE_READER = new(illegalStateReader)
    78  
    79  type illegalStateReader struct{}
    80  
    81  func (r *illegalStateReader) ReadRune() (rune, int, error) {
    82  	panic("TokenStream contract violation: reset()/close() call missing, " +
    83  		"reset() called multiple times, or subclass does not call super.reset(). " +
    84  		"Please see Javadocs of TokenStream class for more information about the correct consuming workflow.")
    85  }
    86  
    87  func (r *illegalStateReader) String() string { return "ILLEGAL_STATE_READRE" }