github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/index/docConsumer.go (about) 1 package index 2 3 import ( 4 "github.com/balzaczyy/golucene/core/index/model" 5 // "github.com/balzaczyy/golucene/core/store" 6 // "github.com/balzaczyy/golucene/core/util" 7 ) 8 9 // index/DocConsumer.java 10 11 type DocConsumer interface { 12 processDocument() error 13 flush(state *model.SegmentWriteState) error 14 abort() 15 } 16 17 // // index/DocFieldProcessor.java 18 19 // /* 20 // This is a DocConsumer that gathers all fields under the same name, 21 // and calls per-field consumers to process field by field. This class 22 // doesn't do any "real" work of its own: it just forwards the fields to 23 // a DocFieldConsumer. 24 // */ 25 // type DocFieldProcessor struct { 26 // consumer DocFieldConsumer 27 // storedConsumer StoredFieldsConsumer 28 // codec Codec 29 30 // // Holds all fields seen in current doc 31 // _fields []*DocFieldProcessorPerField 32 // fieldCount int 33 34 // // Hash table for all fields ever seen 35 // fieldHash []*DocFieldProcessorPerField 36 // hashMask int 37 // totalFieldCount int 38 39 // fieldGen int 40 41 // docState *docState 42 43 // bytesUsed util.Counter 44 // } 45 46 // func newDocFieldProcessor(docWriter *DocumentsWriterPerThread, 47 // consumer DocFieldConsumer, storedConsumer StoredFieldsConsumer) *DocFieldProcessor { 48 49 // assert(storedConsumer != nil) 50 // return &DocFieldProcessor{ 51 // _fields: make([]*DocFieldProcessorPerField, 1), 52 // fieldHash: make([]*DocFieldProcessorPerField, 2), 53 // hashMask: 1, 54 // docState: docWriter.docState, 55 // codec: docWriter.codec, 56 // bytesUsed: docWriter._bytesUsed, 57 // consumer: consumer, 58 // storedConsumer: storedConsumer, 59 // } 60 // } 61 62 // func (p *DocFieldProcessor) flush(state *model.SegmentWriteState) error { 63 // childFields := make(map[string]DocFieldConsumerPerField) 64 // for _, f := range p.fields() { 65 // childFields[f.fieldInfo().Name] = f 66 // } 67 68 // err := p.storedConsumer.flush(state) 69 // if err != nil { 70 // return err 71 // } 72 // err = p.consumer.flush(childFields, state) 73 // if err != nil { 74 // return err 75 // } 76 77 // // Impotant to save after asking consumer to flush so consumer can 78 // // alter the FieldInfo if necessary. E.g., FreqProxTermsWriter does 79 // // this with FieldInfo.storePayload. 80 // infosWriter := p.codec.FieldInfosFormat().FieldInfosWriter() 81 // assert(infosWriter != nil) 82 // return infosWriter(state.Directory, state.SegmentInfo.Name, 83 // state.FieldInfos, store.IO_CONTEXT_DEFAULT) 84 // } 85 86 // func (p *DocFieldProcessor) abort() { 87 // for _, field := range p.fieldHash { 88 // for field != nil { 89 // next := field.next 90 // field.abort() 91 // field = next 92 // } 93 // } 94 // p.storedConsumer.abort() 95 // p.consumer.abort() 96 // // assert2(err == nil, err.Error()) 97 // } 98 99 // func (p *DocFieldProcessor) fields() []DocFieldConsumerPerField { 100 // var fields []DocFieldConsumerPerField 101 // for _, field := range p.fieldHash { 102 // for field != nil { 103 // fields = append(fields, field.consumer) 104 // field = field.next 105 // } 106 // } 107 // assert(len(fields) == p.totalFieldCount) 108 // return fields 109 // } 110 111 // func (p *DocFieldProcessor) rehash() { 112 // newHashSize := len(p.fieldHash) * 2 113 // assert(newHashSize > len(p.fieldHash)) // avoid overflow 114 115 // newHashArray := make([]*DocFieldProcessorPerField, newHashSize) 116 117 // // Rehash 118 // newHashMask := newHashSize - 1 119 // for _, fp0 := range p.fieldHash { 120 // for fp0 != nil { 121 // hashPos2 := hashstr(fp0.fieldInfo.Name) & newHashMask 122 // nextFP0 := fp0.next 123 // fp0.next = newHashArray[hashPos2] 124 // newHashArray[hashPos2] = fp0 125 // fp0 = nextFP0 126 // } 127 // } 128 129 // p.fieldHash = newHashArray 130 // p.hashMask = newHashMask 131 // } 132 133 // func (p *DocFieldProcessor) processDocument(fieldInfos *model.FieldInfosBuilder) error { 134 // p.consumer.startDocument() 135 // p.storedConsumer.startDocument() 136 137 // p.fieldCount = 0 138 139 // thisFieldGen := p.fieldGen 140 // p.fieldGen++ 141 142 // // Absorb any new fields first seen in this document. Also absort 143 // // any changes to fields we had already seen before (e.g. suddenly 144 // // turning on norms or vectors, etc.) 145 146 // for _, field := range p.docState.doc { 147 // fieldName := field.Name() 148 149 // // Make sure we have a PerField allocated 150 // hashPos := hashstr(fieldName) & p.hashMask 151 // fp := p.fieldHash[hashPos] 152 // for fp != nil && fp.fieldInfo.Name != fieldName { 153 // fp = fp.next 154 // } 155 156 // if fp == nil { 157 // // TODO FI: we need to genericize the "flags" that a field 158 // // holds, and, how these flags are merged; it needs to be more 159 // // "pluggable" such that if I want to have a new "thing" my 160 // // Fields can do, I can easily add it 161 // fi := fieldInfos.AddOrUpdate(fieldName, field.FieldType()) 162 163 // fp = newDocFieldProcessorPerField(p, fi) 164 // fp.next = p.fieldHash[hashPos] 165 // p.fieldHash[hashPos] = fp 166 // p.totalFieldCount++ 167 168 // if p.totalFieldCount >= len(p.fieldHash)/2 { 169 // p.rehash() 170 // } 171 // } else { 172 // panic("not implemented yet") 173 // } 174 175 // if thisFieldGen != fp.lastGen { 176 // // First time we're seeing this field for this doc 177 // fp.fieldCount = 0 178 179 // if p.fieldCount == len(p._fields) { 180 // newSize := len(p._fields) * 2 181 // newArray := make([]*DocFieldProcessorPerField, newSize) 182 // copy(newArray, p._fields[:p.fieldCount]) 183 // p._fields = newArray 184 // } 185 186 // p._fields[p.fieldCount] = fp 187 // p.fieldCount++ 188 // fp.lastGen = thisFieldGen 189 // } 190 191 // fp.addField(field) 192 // p.storedConsumer.addField(p.docState.docID, field, fp.fieldInfo) 193 // } 194 195 // // If we are writing vectors then we must visit fields in sorted 196 // // order so they are written in sorted order. TODO: we actually 197 // // only need to sort the subset of fields that have vectors enabled; 198 // // we could save [small amount of] CPU here. 199 // util.IntroSort(ByNameDocFieldProcessorPerFields(p._fields[:p.fieldCount])) 200 // for _, perField := range p._fields[:p.fieldCount] { 201 // err := perField.consumer.processFields(perField.fields, perField.fieldCount) 202 // if err != nil { 203 // return err 204 // } 205 // } 206 207 // if prefix, is := p.docState.maxTermPrefix, p.docState.infoStream; prefix != "" && is.IsEnabled("IW") { 208 // is.Message("IW", 209 // "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length %v), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '%v...'", 210 // MAX_TERM_LENGTH_UTF8, 211 // prefix) 212 // p.docState.maxTermPrefix = "" 213 // } 214 215 // return nil 216 // } 217 218 // type ByNameDocFieldProcessorPerFields []*DocFieldProcessorPerField 219 220 // func (a ByNameDocFieldProcessorPerFields) Len() int { return len(a) } 221 // func (a ByNameDocFieldProcessorPerFields) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 222 // func (a ByNameDocFieldProcessorPerFields) Less(i, j int) bool { 223 // return a[i].fieldInfo.Name < a[j].fieldInfo.Name 224 // } 225 226 // func (p *DocFieldProcessor) finishDocument() (err error) { 227 // defer func() { 228 // err = mergeError(err, p.consumer.finishDocument()) 229 // }() 230 // return p.storedConsumer.finishDocument() 231 // } 232 233 // // index/DocFieldProcessorPerField.java 234 235 // /* Holds all per thread, per field state. */ 236 // type DocFieldProcessorPerField struct { 237 // consumer DocFieldConsumerPerField 238 // fieldInfo *model.FieldInfo 239 240 // next *DocFieldProcessorPerField 241 // lastGen int // -1 242 243 // fieldCount int 244 // fields []model.IndexableField 245 // } 246 247 // func newDocFieldProcessorPerField(docFieldProcessor *DocFieldProcessor, 248 // fieldInfo *model.FieldInfo) *DocFieldProcessorPerField { 249 // return &DocFieldProcessorPerField{ 250 // consumer: docFieldProcessor.consumer.addField(fieldInfo), 251 // lastGen: -1, 252 // fieldInfo: fieldInfo, 253 // } 254 // } 255 256 // func (f *DocFieldProcessorPerField) addField(field model.IndexableField) { 257 // if f.fieldCount == len(f.fields) { 258 // newSize := util.Oversize(f.fieldCount+1, util.NUM_BYTES_OBJECT_REF) 259 // newArray := make([]model.IndexableField, newSize) 260 // copy(newArray, f.fields) 261 // f.fields = newArray 262 // } 263 // f.fields[f.fieldCount] = field 264 // f.fieldCount++ 265 // } 266 267 // func (f *DocFieldProcessorPerField) abort() { 268 // f.consumer.abort() 269 // }