github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/docFieldConsumerPerField.go (about)

     1  package index
     2  
     3  // import (
     4  // 	ta "github.com/balzaczyy/golucene/core/analysis/tokenattributes"
     5  // 	"github.com/balzaczyy/golucene/core/index/model"
     6  // 	"github.com/balzaczyy/golucene/core/util"
     7  // )
     8  
     9  // type DocFieldConsumerPerField interface {
    10  // 	// Processes all occurrences of a single field
    11  // 	processFields([]model.IndexableField, int) error
    12  // 	abort()
    13  // 	fieldInfo() *model.FieldInfo
    14  // }
    15  
    16  // index/DocInverterPerField.java
    17  
    18  // type DocInverterPerField struct {
    19  // 	_fieldInfo  *model.FieldInfo
    20  // 	consumer    InvertedDocConsumerPerField
    21  // 	endConsumer InvertedDocEndConsumerPerField
    22  // 	docState    *docState
    23  // 	fieldState  *FieldInvertState
    24  // }
    25  
    26  // func newDocInverterPerField(parent *DocInverter, fieldInfo *model.FieldInfo) *DocInverterPerField {
    27  // 	ans := &DocInverterPerField{
    28  // 		_fieldInfo: fieldInfo,
    29  // 		docState:   parent.docState,
    30  // 		fieldState: newFieldInvertState(fieldInfo.Name),
    31  // 	}
    32  // 	ans.consumer = parent.consumer.addField(ans, fieldInfo)
    33  // 	ans.endConsumer = parent.endConsumer.addField(ans, fieldInfo)
    34  // 	return ans
    35  // }
    36  
    37  // func (dipf *DocInverterPerField) abort() {
    38  // 	defer dipf.endConsumer.abort()
    39  // 	dipf.consumer.abort()
    40  // }
    41  
    42  // func (di *DocInverterPerField) processFields(fields []model.IndexableField, count int) error {
    43  // 	di.fieldState.reset()
    44  
    45  // 	doInvert, err := di.consumer.start(fields, count)
    46  // 	if err != nil {
    47  // 		return err
    48  // 	}
    49  
    50  // 	for i, field := range fields[:count] {
    51  // 		fieldType := field.FieldType()
    52  
    53  // 		// TODO FI: this should be "genericized" to querying consumer if
    54  // 		// it wants to see this particular field tokenized.
    55  // 		if fieldType.Indexed() && doInvert {
    56  // 			analyzed := fieldType.Tokenized() && di.docState.analyzer != nil
    57  
    58  // 			// if the field omits norms, the boost cannot be indexed.
    59  // 			assert2(!fieldType.OmitNorms() || field.Boost() == 1.0,
    60  // 				"You cannot set an index-time boost: norms are omitted for field '%v'",
    61  // 				field.Name())
    62  
    63  // 			// only bother checking offsets if something will consume them.
    64  // 			// TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
    65  // 			checkOffsets := fieldType.IndexOptions() == model.INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
    66  // 			lastStartOffset := 0
    67  
    68  // 			if i > 0 && analyzed {
    69  // 				panic("not implemented yet")
    70  // 				// di.fieldState.position += di.docState.analyzer.PositionIncrementGap(di.fieldInfo().Name())
    71  // 			}
    72  
    73  // 			if stream, err := field.TokenStream(di.docState.analyzer); err == nil {
    74  // 				// reset the TokenStream to the first token
    75  // 				if err = stream.Reset(); err == nil {
    76  // 					err = func() (err error) {
    77  // 						var success2 = false
    78  // 						defer func() {
    79  // 							if !success2 {
    80  // 								util.CloseWhileSuppressingError(stream)
    81  // 							} else {
    82  // 								err = stream.Close()
    83  // 							}
    84  // 						}()
    85  
    86  // 						var hasMoreTokens bool
    87  // 						hasMoreTokens, err = stream.IncrementToken()
    88  // 						if err != nil {
    89  // 							return err
    90  // 						}
    91  
    92  // 						atts := stream.Attributes()
    93  // 						di.fieldState.attributeSource = atts
    94  
    95  // 						offsetAttribute := atts.Add("OffsetAttribute").(ta.OffsetAttribute)
    96  // 						posIncrAttribute := atts.Add("PositionIncrementAttribute").(ta.PositionIncrementAttribute)
    97  
    98  // 						if hasMoreTokens {
    99  // 							di.consumer.startField(field)
   100  
   101  // 							for {
   102  // 								// If we hit an error in stream.next below (which is
   103  // 								// fairy common, eg if analyer chokes on a given
   104  // 								// document), then it's non-aborting and (above) this
   105  // 								// one document will be marked as deleted, but still
   106  // 								// consume a docID
   107  
   108  // 								posIncr := posIncrAttribute.PositionIncrement()
   109  // 								assert2(posIncr >= 0,
   110  // 									"position increment must be >=0 (got %v) for field '%v'",
   111  // 									posIncr, field.Name())
   112  // 								assert2(di.fieldState.position != 0 || posIncr != 0,
   113  // 									"first position increment must be > 0 (got 0) for field '%v'",
   114  // 									field.Name())
   115  // 								position := di.fieldState.position + posIncr
   116  // 								if position > 0 {
   117  // 									// NOTE: confusing: this "mirrors" the position++ we do below
   118  // 									position--
   119  // 								} else {
   120  // 									assert2(position >= 0, "position overflow for field '%v'", field.Name())
   121  // 								}
   122  
   123  // 								// position is legal, we can safely place it in fieldState now.
   124  // 								// not sure if anything will use fieldState after non-aborting exc...
   125  // 								di.fieldState.position = position
   126  
   127  // 								if posIncr == 0 {
   128  // 									di.fieldState.numOverlap++
   129  // 								}
   130  
   131  // 								if checkOffsets {
   132  // 									startOffset := di.fieldState.offset + offsetAttribute.StartOffset()
   133  // 									endOffset := di.fieldState.offset + offsetAttribute.EndOffset()
   134  // 									assert2(startOffset >= 0 && startOffset <= endOffset,
   135  // 										"startOffset must be non-negative, and endOffset must be >= startOffset, startOffset=%v,endOffset=%v for field '%v'",
   136  // 										startOffset, endOffset, field.Name())
   137  // 									assert2(startOffset >= lastStartOffset,
   138  // 										"offsets must not go backwards startOffset=%v is < lastStartOffset=%v for field '%v'",
   139  // 										startOffset, lastStartOffset, field.Name())
   140  // 									lastStartOffset = startOffset
   141  // 								}
   142  
   143  // 								if err = func() error {
   144  // 									var success = false
   145  // 									defer func() {
   146  // 										if !success {
   147  // 											di.docState.docWriter.setAborting()
   148  // 										}
   149  // 									}()
   150  // 									// If we hit an error here, we abort all buffered
   151  // 									// documents since the last flush, on the
   152  // 									// likelihood that the internal state of the
   153  // 									// consumer is now corrupt and should not be
   154  // 									// flushed to a new segment:
   155  // 									if err := di.consumer.add(); err != nil {
   156  // 										return err
   157  // 									}
   158  // 									success = true
   159  // 									return nil
   160  // 								}(); err != nil {
   161  // 									return err
   162  // 								}
   163  
   164  // 								di.fieldState.length++
   165  // 								di.fieldState.position++
   166  // 								ok, err := stream.IncrementToken()
   167  // 								if err != nil {
   168  // 									return err
   169  // 								}
   170  // 								if !ok {
   171  // 									break
   172  // 								}
   173  // 							}
   174  // 						}
   175  // 						// trigger stream to perform end-of-stream operations
   176  // 						err = stream.End()
   177  // 						if err != nil {
   178  // 							return err
   179  // 						}
   180  // 						// TODO: maybe add some safety? then again, it's alread
   181  // 						// checked when we come back around to the field...
   182  // 						di.fieldState.position += posIncrAttribute.PositionIncrement()
   183  // 						di.fieldState.offset += offsetAttribute.EndOffset()
   184  // 						success2 = true
   185  // 						return nil
   186  // 					}()
   187  // 				}
   188  // 			}
   189  // 			if err != nil {
   190  // 				return err
   191  // 			}
   192  // 		}
   193  
   194  // 		// LUCENE-2387: don't hang onto the field, so GC can recliam
   195  // 		fields[i] = nil
   196  // 	}
   197  
   198  // 	err = di.consumer.finish()
   199  // 	if err == nil {
   200  // 		err = di.endConsumer.finish()
   201  // 	}
   202  // 	return err
   203  // }
   204  
   205  // func (dipf *DocInverterPerField) fieldInfo() *model.FieldInfo {
   206  // 	return dipf._fieldInfo
   207  // }