github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/docFieldConsumerPerField.go (about) 1 package index 2 3 // import ( 4 // ta "github.com/balzaczyy/golucene/core/analysis/tokenattributes" 5 // "github.com/balzaczyy/golucene/core/index/model" 6 // "github.com/balzaczyy/golucene/core/util" 7 // ) 8 9 // type DocFieldConsumerPerField interface { 10 // // Processes all occurrences of a single field 11 // processFields([]model.IndexableField, int) error 12 // abort() 13 // fieldInfo() *model.FieldInfo 14 // } 15 16 // index/DocInverterPerField.java 17 18 // type DocInverterPerField struct { 19 // _fieldInfo *model.FieldInfo 20 // consumer InvertedDocConsumerPerField 21 // endConsumer InvertedDocEndConsumerPerField 22 // docState *docState 23 // fieldState *FieldInvertState 24 // } 25 26 // func newDocInverterPerField(parent *DocInverter, fieldInfo *model.FieldInfo) *DocInverterPerField { 27 // ans := &DocInverterPerField{ 28 // _fieldInfo: fieldInfo, 29 // docState: parent.docState, 30 // fieldState: newFieldInvertState(fieldInfo.Name), 31 // } 32 // ans.consumer = parent.consumer.addField(ans, fieldInfo) 33 // ans.endConsumer = parent.endConsumer.addField(ans, fieldInfo) 34 // return ans 35 // } 36 37 // func (dipf *DocInverterPerField) abort() { 38 // defer dipf.endConsumer.abort() 39 // dipf.consumer.abort() 40 // } 41 42 // func (di *DocInverterPerField) processFields(fields []model.IndexableField, count int) error { 43 // di.fieldState.reset() 44 45 // doInvert, err := di.consumer.start(fields, count) 46 // if err != nil { 47 // return err 48 // } 49 50 // for i, field := range fields[:count] { 51 // fieldType := field.FieldType() 52 53 // // TODO FI: this should be "genericized" to querying consumer if 54 // // it wants to see this particular field tokenized. 55 // if fieldType.Indexed() && doInvert { 56 // analyzed := fieldType.Tokenized() && di.docState.analyzer != nil 57 58 // // if the field omits norms, the boost cannot be indexed. 59 // assert2(!fieldType.OmitNorms() || field.Boost() == 1.0, 60 // "You cannot set an index-time boost: norms are omitted for field '%v'", 61 // field.Name()) 62 63 // // only bother checking offsets if something will consume them. 64 // // TODO: after we fix analyzers, also check if termVectorOffsets will be indexed. 65 // checkOffsets := fieldType.IndexOptions() == model.INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS 66 // lastStartOffset := 0 67 68 // if i > 0 && analyzed { 69 // panic("not implemented yet") 70 // // di.fieldState.position += di.docState.analyzer.PositionIncrementGap(di.fieldInfo().Name()) 71 // } 72 73 // if stream, err := field.TokenStream(di.docState.analyzer); err == nil { 74 // // reset the TokenStream to the first token 75 // if err = stream.Reset(); err == nil { 76 // err = func() (err error) { 77 // var success2 = false 78 // defer func() { 79 // if !success2 { 80 // util.CloseWhileSuppressingError(stream) 81 // } else { 82 // err = stream.Close() 83 // } 84 // }() 85 86 // var hasMoreTokens bool 87 // hasMoreTokens, err = stream.IncrementToken() 88 // if err != nil { 89 // return err 90 // } 91 92 // atts := stream.Attributes() 93 // di.fieldState.attributeSource = atts 94 95 // offsetAttribute := atts.Add("OffsetAttribute").(ta.OffsetAttribute) 96 // posIncrAttribute := atts.Add("PositionIncrementAttribute").(ta.PositionIncrementAttribute) 97 98 // if hasMoreTokens { 99 // di.consumer.startField(field) 100 101 // for { 102 // // If we hit an error in stream.next below (which is 103 // // fairy common, eg if analyer chokes on a given 104 // // document), then it's non-aborting and (above) this 105 // // one document will be marked as deleted, but still 106 // // consume a docID 107 108 // posIncr := posIncrAttribute.PositionIncrement() 109 // assert2(posIncr >= 0, 110 // "position increment must be >=0 (got %v) for field '%v'", 111 // posIncr, field.Name()) 112 // assert2(di.fieldState.position != 0 || posIncr != 0, 113 // "first position increment must be > 0 (got 0) for field '%v'", 114 // field.Name()) 115 // position := di.fieldState.position + posIncr 116 // if position > 0 { 117 // // NOTE: confusing: this "mirrors" the position++ we do below 118 // position-- 119 // } else { 120 // assert2(position >= 0, "position overflow for field '%v'", field.Name()) 121 // } 122 123 // // position is legal, we can safely place it in fieldState now. 124 // // not sure if anything will use fieldState after non-aborting exc... 125 // di.fieldState.position = position 126 127 // if posIncr == 0 { 128 // di.fieldState.numOverlap++ 129 // } 130 131 // if checkOffsets { 132 // startOffset := di.fieldState.offset + offsetAttribute.StartOffset() 133 // endOffset := di.fieldState.offset + offsetAttribute.EndOffset() 134 // assert2(startOffset >= 0 && startOffset <= endOffset, 135 // "startOffset must be non-negative, and endOffset must be >= startOffset, startOffset=%v,endOffset=%v for field '%v'", 136 // startOffset, endOffset, field.Name()) 137 // assert2(startOffset >= lastStartOffset, 138 // "offsets must not go backwards startOffset=%v is < lastStartOffset=%v for field '%v'", 139 // startOffset, lastStartOffset, field.Name()) 140 // lastStartOffset = startOffset 141 // } 142 143 // if err = func() error { 144 // var success = false 145 // defer func() { 146 // if !success { 147 // di.docState.docWriter.setAborting() 148 // } 149 // }() 150 // // If we hit an error here, we abort all buffered 151 // // documents since the last flush, on the 152 // // likelihood that the internal state of the 153 // // consumer is now corrupt and should not be 154 // // flushed to a new segment: 155 // if err := di.consumer.add(); err != nil { 156 // return err 157 // } 158 // success = true 159 // return nil 160 // }(); err != nil { 161 // return err 162 // } 163 164 // di.fieldState.length++ 165 // di.fieldState.position++ 166 // ok, err := stream.IncrementToken() 167 // if err != nil { 168 // return err 169 // } 170 // if !ok { 171 // break 172 // } 173 // } 174 // } 175 // // trigger stream to perform end-of-stream operations 176 // err = stream.End() 177 // if err != nil { 178 // return err 179 // } 180 // // TODO: maybe add some safety? then again, it's alread 181 // // checked when we come back around to the field... 182 // di.fieldState.position += posIncrAttribute.PositionIncrement() 183 // di.fieldState.offset += offsetAttribute.EndOffset() 184 // success2 = true 185 // return nil 186 // }() 187 // } 188 // } 189 // if err != nil { 190 // return err 191 // } 192 // } 193 194 // // LUCENE-2387: don't hang onto the field, so GC can recliam 195 // fields[i] = nil 196 // } 197 198 // err = di.consumer.finish() 199 // if err == nil { 200 // err = di.endConsumer.finish() 201 // } 202 // return err 203 // } 204 205 // func (dipf *DocInverterPerField) fieldInfo() *model.FieldInfo { 206 // return dipf._fieldInfo 207 // }