github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene42/docValuesProducer.go (about) 1 package lucene42 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/codec" 7 . "github.com/balzaczyy/golucene/core/codec/spi" 8 . "github.com/balzaczyy/golucene/core/index/model" 9 "github.com/balzaczyy/golucene/core/store" 10 "github.com/balzaczyy/golucene/core/util" 11 "reflect" 12 "sync" 13 "sync/atomic" 14 ) 15 16 const ( 17 LUCENE42_DV_DATA_CODEC = "Lucene42DocValuesData" 18 LUCENE42_DV_DATA_EXTENSION = "dvd" 19 LUCENE42_DV_METADATA_CODEC = "Lucene42DocValuesMetadata" 20 LUCENE42_DV_METADATA_EXTENSION = "dvm" 21 22 LUCENE42_DV_VERSION_START = 0 23 LUCENE42_DV_VERSION_GCD_COMPRESSION = 1 24 LUCENE42_DV_VERSION_CHECKSUM = 2 25 LUCENE42_DV_VERSION_CURRENT = LUCENE42_DV_VERSION_CHECKSUM 26 27 LUCENE42_DV_NUMBER = 0 28 LUCENE42_DV_BYTES = 1 29 LUCENE42_DV_FST = 2 30 31 LUCENE42_DV_DELTA_COMPRESSED = 0 32 LUCENE42_DV_TABLE_COMPRESSED = 1 33 LUCENE42_DV_UNCOMPRESSED = 2 34 LUCENE42_DV_GCD_COMPRESSED = 3 35 ) 36 37 type Lucene42DocValuesProducer struct { 38 lock sync.Mutex 39 40 numerics map[int]NumericEntry 41 binaries map[int]BinaryEntry 42 fsts map[int]FSTEntry 43 data store.IndexInput 44 45 numericInstances map[int]NumericDocValues 46 47 maxDoc int 48 ramBytesUsed int64 49 } 50 51 func newLucene42DocValuesProducer(state SegmentReadState, 52 dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { 53 54 fmt.Println("Initializing Lucene42DocValuesProducer...") 55 dvp = &Lucene42DocValuesProducer{ 56 numericInstances: make(map[int]NumericDocValues), 57 } 58 dvp.maxDoc = state.SegmentInfo.DocCount() 59 60 metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) 61 fmt.Println("Reading", metaName) 62 // read in the entries from the metadata file. 63 var in store.ChecksumIndexInput 64 if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil { 65 return nil, err 66 } 67 dvp.ramBytesUsed = util.ShallowSizeOfInstance(reflect.TypeOf(dvp)) 68 69 var version int32 70 func() { 71 var success = false 72 defer func() { 73 if success { 74 err = util.Close(in) 75 } else { 76 util.CloseWhileSuppressingError(in) 77 } 78 }() 79 80 if version, err = codec.CheckHeader(in, metaCodec, 81 LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT); err != nil { 82 return 83 } 84 85 dvp.numerics = make(map[int]NumericEntry) 86 dvp.binaries = make(map[int]BinaryEntry) 87 dvp.fsts = make(map[int]FSTEntry) 88 if err = dvp.readFields(in, state.FieldInfos); err != nil { 89 return 90 } 91 92 if version >= LUCENE42_DV_VERSION_CHECKSUM { 93 _, err = codec.CheckFooter(in) 94 } else { 95 err = codec.CheckEOF(in) 96 } 97 }() 98 if err != nil { 99 return nil, err 100 } 101 102 var success = false 103 defer func() { 104 if !success { 105 util.CloseWhileSuppressingError(dvp.data) 106 } 107 }() 108 109 dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) 110 fmt.Println("Reading", dataName) 111 if dvp.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil { 112 return nil, err 113 } 114 var version2 int32 115 if version2, err = codec.CheckHeader(dvp.data, dataCodec, 116 LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT); err != nil { 117 return nil, err 118 } 119 120 if version != version2 { 121 return nil, errors.New("Format versions mismatch") 122 } 123 124 if version >= LUCENE42_DV_VERSION_CHECKSUM { 125 panic("niy") 126 } 127 128 success = true 129 130 return dvp, nil 131 } 132 133 /* 134 Lucene42DocValuesProducer.java/4.5.1/L138 135 */ 136 func (dvp *Lucene42DocValuesProducer) readFields(meta store.IndexInput, 137 infos FieldInfos) (err error) { 138 139 var fieldNumber int 140 var fieldType byte 141 fieldNumber, err = asInt(meta.ReadVInt()) 142 for fieldNumber != -1 && err == nil { 143 if infos.FieldInfoByNumber(fieldNumber) == nil { 144 // tricker to validate more: because we re-use for norms, 145 // becaue we use multiple entries for "composite" types like 146 // sortedset, etc. 147 return errors.New(fmt.Sprintf( 148 "Invalid field number: %v (resource=%v)", 149 fieldNumber, meta)) 150 } 151 152 fieldType, err = meta.ReadByte() 153 if err != nil { 154 return 155 } 156 switch fieldType { 157 case LUCENE42_DV_NUMBER: 158 entry := NumericEntry{} 159 entry.offset, err = meta.ReadLong() 160 if err != nil { 161 return 162 } 163 entry.format, err = meta.ReadByte() 164 if err != nil { 165 return 166 } 167 switch entry.format { 168 case LUCENE42_DV_DELTA_COMPRESSED: 169 case LUCENE42_DV_TABLE_COMPRESSED: 170 case LUCENE42_DV_GCD_COMPRESSED: 171 case LUCENE42_DV_UNCOMPRESSED: 172 default: 173 return errors.New(fmt.Sprintf("Unknown format: %v, input=%v", entry.format, meta)) 174 } 175 if entry.format != LUCENE42_DV_UNCOMPRESSED { 176 entry.packedIntsVersion, err = asInt(meta.ReadVInt()) 177 if err != nil { 178 return 179 } 180 } 181 fmt.Printf("Found entry [offset=%v, format=%v, packedIntsVersion=%v\n", 182 entry.offset, entry.format, entry.packedIntsVersion) 183 dvp.numerics[fieldNumber] = entry 184 case LUCENE42_DV_BYTES: 185 panic("not implemented yet") 186 case LUCENE42_DV_FST: 187 panic("not implemented yet") 188 default: 189 return errors.New(fmt.Sprintf("invalid entry type: %v, input=%v", fieldType, meta)) 190 } 191 fieldNumber, err = asInt(meta.ReadVInt()) 192 } 193 return 194 } 195 196 func asInt(n int32, err error) (n2 int, err2 error) { 197 return int(n), err 198 } 199 200 func (dvp *Lucene42DocValuesProducer) Numeric(field *FieldInfo) (v NumericDocValues, err error) { 201 dvp.lock.Lock() 202 defer dvp.lock.Unlock() 203 204 v, exists := dvp.numericInstances[int(field.Number)] 205 if !exists { 206 if v, err = dvp.loadNumeric(field); err == nil { 207 dvp.numericInstances[int(field.Number)] = v 208 } 209 } 210 return 211 } 212 213 func (dvp *Lucene42DocValuesProducer) loadNumeric(field *FieldInfo) (v NumericDocValues, err error) { 214 entry := dvp.numerics[int(field.Number)] 215 if err = dvp.data.Seek(entry.offset); err != nil { 216 return 217 } 218 219 switch entry.format { 220 case LUCENE42_DV_TABLE_COMPRESSED: 221 panic("not implemented yet") 222 case LUCENE42_DV_DELTA_COMPRESSED: 223 panic("not implemented yet") 224 case LUCENE42_DV_UNCOMPRESSED: 225 bytes := make([]byte, dvp.maxDoc) 226 if err = dvp.data.ReadBytes(bytes); err == nil { 227 atomic.AddInt64(&dvp.ramBytesUsed, util.SizeOf(bytes)) 228 return func(docID int) int64 { 229 return int64(bytes[docID]) 230 }, nil 231 } 232 case LUCENE42_DV_GCD_COMPRESSED: 233 panic("not implemented yet") 234 default: 235 panic("assert fail") 236 } 237 return 238 } 239 240 func (dvp *Lucene42DocValuesProducer) Binary(field *FieldInfo) (v BinaryDocValues, err error) { 241 panic("not implemented yet") 242 return nil, nil 243 } 244 245 func (dvp *Lucene42DocValuesProducer) Sorted(field *FieldInfo) (v SortedDocValues, err error) { 246 panic("not implemented yet") 247 return nil, nil 248 } 249 250 func (dvp *Lucene42DocValuesProducer) SortedSet(field *FieldInfo) (v SortedSetDocValues, err error) { 251 panic("not implemented yet") 252 return nil, nil 253 } 254 255 func (dvp *Lucene42DocValuesProducer) Close() error { 256 if dvp == nil { 257 return nil 258 } 259 return dvp.data.Close() 260 } 261 262 type NumericEntry struct { 263 offset int64 264 format byte 265 packedIntsVersion int 266 } 267 268 type BinaryEntry struct { 269 offset int64 270 numBytes int64 271 minLength int 272 maxLength int 273 packedIntsVersion int 274 blockSize int 275 } 276 277 type FSTEntry struct { 278 offset int64 279 numOrds int64 280 }