github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene49/normsProducer.go (about) 1 package lucene49 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/codec" 7 . "github.com/balzaczyy/golucene/core/codec/spi" 8 . "github.com/balzaczyy/golucene/core/index/model" 9 "github.com/balzaczyy/golucene/core/store" 10 "github.com/balzaczyy/golucene/core/util" 11 "github.com/balzaczyy/golucene/core/util/packed" 12 "reflect" 13 "sync" 14 "sync/atomic" 15 ) 16 17 // lucene49/Lucene49NormsProduer.java 18 19 type NormsEntry struct { 20 format byte 21 offset int64 22 } 23 24 type NormsProducer struct { 25 sync.Locker 26 27 norms map[int]*NormsEntry 28 data store.IndexInput 29 version int32 30 31 instances map[int]NumericDocValues 32 33 maxDoc int 34 ramBytesUsed int64 // atomic 35 } 36 37 func newLucene49NormsProducer(state SegmentReadState, 38 dataCodec, dataExtension, metaCodec, metaExtension string) (np *NormsProducer, err error) { 39 40 np = &NormsProducer{ 41 Locker: new(sync.Mutex), 42 norms: make(map[int]*NormsEntry), 43 instances: make(map[int]NumericDocValues), 44 maxDoc: state.SegmentInfo.DocCount(), 45 ramBytesUsed: util.ShallowSizeOfInstance(reflect.TypeOf(np)), 46 } 47 metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension) 48 // read in the entries from the metadta file. 49 var in store.ChecksumIndexInput 50 if in, err = state.Dir.OpenChecksumInput(metaName, state.Context); err != nil { 51 return nil, err 52 } 53 54 if err = func() error { 55 var success = false 56 defer func() { 57 if success { 58 err = util.Close(in) 59 } else { 60 util.CloseWhileSuppressingError(in) 61 } 62 }() 63 64 if np.version, err = codec.CheckHeader(in, metaCodec, VERSION_START, VERSION_CURRENT); err != nil { 65 return err 66 } 67 if err = np.readFields(in, state.FieldInfos); err != nil { 68 return err 69 } 70 if _, err = codec.CheckFooter(in); err != nil { 71 return err 72 } 73 success = true 74 return nil 75 }(); err != nil { 76 return nil, err 77 } 78 79 dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension) 80 if np.data, err = state.Dir.OpenInput(dataName, state.Context); err != nil { 81 return nil, err 82 } 83 var success = false 84 defer func() { 85 if !success { 86 util.CloseWhileSuppressingError(np.data) 87 } 88 }() 89 90 var version2 int32 91 if version2, err = codec.CheckHeader(np.data, dataCodec, VERSION_START, VERSION_CURRENT); err != nil { 92 return nil, err 93 } 94 if version2 != np.version { 95 return nil, errors.New("Format versions mismatch") 96 } 97 98 // NOTE: data file is too costly to verify checksum against all the 99 // bytes on open, but fo rnow we at least verify proper structure 100 // of the checksum footer: which looks for FOOTER_MATIC + 101 // algorithmID. This is cheap and can detect some forms of 102 // corruption such as file trucation. 103 if _, err = codec.RetrieveChecksum(np.data); err != nil { 104 return nil, err 105 } 106 107 success = true 108 109 return np, nil 110 } 111 112 func (np *NormsProducer) readFields(meta store.IndexInput, infos FieldInfos) (err error) { 113 var fieldNumber int32 114 if fieldNumber, err = meta.ReadVInt(); err != nil { 115 return err 116 } 117 for fieldNumber != -1 { 118 info := infos.FieldInfoByNumber(int(fieldNumber)) 119 if info == nil { 120 return errors.New(fmt.Sprintf("Invalid field number: %v (resource=%v)", fieldNumber, meta)) 121 } else if !info.HasNorms() { 122 return errors.New(fmt.Sprintf("Invalid field: %v (resource=%v)", info.Name, meta)) 123 } 124 var format byte 125 if format, err = meta.ReadByte(); err != nil { 126 return err 127 } 128 var offset int64 129 if offset, err = meta.ReadLong(); err != nil { 130 return err 131 } 132 entry := &NormsEntry{ 133 format: format, 134 offset: offset, 135 } 136 if format > UNCOMPRESSED { 137 return errors.New(fmt.Sprintf("Unknown format: %v, input=%v", format, meta)) 138 } 139 np.norms[int(fieldNumber)] = entry 140 if fieldNumber, err = meta.ReadVInt(); err != nil { 141 return err 142 } 143 } 144 return nil 145 } 146 147 func (np *NormsProducer) Numeric(field *FieldInfo) (NumericDocValues, error) { 148 np.Lock() 149 defer np.Unlock() 150 151 instance, ok := np.instances[int(field.Number)] 152 if !ok { 153 var err error 154 if instance, err = np.loadNorms(field); err != nil { 155 return nil, err 156 } 157 np.instances[int(field.Number)] = instance 158 } 159 return instance, nil 160 } 161 162 func (np *NormsProducer) loadNorms(field *FieldInfo) (NumericDocValues, error) { 163 entry, ok := np.norms[int(field.Number)] 164 assert(ok) 165 switch entry.format { 166 case CONST_COMPRESSED: 167 return func(int) int64 { return entry.offset }, nil 168 case UNCOMPRESSED: 169 panic("not implemented yet") 170 case DELTA_COMPRESSED: 171 panic("not implemented yet") 172 case TABLE_COMPRESSED: 173 var err error 174 if err = np.data.Seek(entry.offset); err == nil { 175 var packedVersion int32 176 if packedVersion, err = np.data.ReadVInt(); err == nil { 177 var size int 178 if size, err = int32ToInt(np.data.ReadVInt()); err == nil { 179 if size > 256 { 180 return nil, errors.New(fmt.Sprintf( 181 "TABLE_COMPRESSED cannot have more than 256 distinct values, input=%v", 182 np.data)) 183 } 184 decode := make([]int64, size) 185 for i, _ := range decode { 186 if decode[i], err = np.data.ReadLong(); err != nil { 187 break 188 } 189 } 190 if err == nil { 191 var formatId int 192 if formatId, err = int32ToInt(np.data.ReadVInt()); err == nil { 193 var bitsPerValue int32 194 if bitsPerValue, err = np.data.ReadVInt(); err == nil { 195 var ordsReader packed.PackedIntsReader 196 if ordsReader, err = packed.ReaderNoHeader(np.data, 197 packed.PackedFormat(formatId), packedVersion, 198 int32(np.maxDoc), uint32(bitsPerValue)); err == nil { 199 200 atomic.AddInt64(&np.ramBytesUsed, util.SizeOf(decode)+ordsReader.RamBytesUsed()) 201 return func(docId int) int64 { 202 return decode[int(ordsReader.Get(docId))] 203 }, nil 204 } 205 } 206 } 207 } 208 } 209 } 210 } 211 if err != nil { 212 return nil, err 213 } 214 default: 215 panic("assert fail") 216 } 217 panic("should not be here") 218 } 219 220 func int32ToInt(n int32, err error) (int, error) { 221 return int(n), err 222 } 223 224 func (np *NormsProducer) Binary(field *FieldInfo) (BinaryDocValues, error) { 225 panic("not supported") 226 } 227 228 func (np *NormsProducer) Sorted(field *FieldInfo) (SortedDocValues, error) { 229 panic("not supported") 230 } 231 232 func (np *NormsProducer) SortedSet(field *FieldInfo) (SortedSetDocValues, error) { 233 panic("not supported") 234 } 235 236 func (np *NormsProducer) Close() error { 237 return np.data.Close() 238 }