github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/compressing/storedFieldsIndexReader.go (about) 1 package compressing 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/balzaczyy/golucene/core/index/model" 7 "github.com/balzaczyy/golucene/core/store" 8 "github.com/balzaczyy/golucene/core/util" 9 "github.com/balzaczyy/golucene/core/util/packed" 10 ) 11 12 // codec/compressing/CompressingStoredFieldsIndexReader.java 13 14 // Random-access reader for CompressingStoredFieldsIndexWriter 15 type CompressingStoredFieldsIndexReader struct { 16 maxDoc int 17 docBases []int 18 startPointers []int64 19 avgChunkDocs []int 20 avgChunkSizes []int64 21 docBasesDeltas []packed.PackedIntsReader 22 startPointersDeltas []packed.PackedIntsReader 23 } 24 25 func newCompressingStoredFieldsIndexReader(fieldsIndexIn store.IndexInput, 26 si *model.SegmentInfo) (r *CompressingStoredFieldsIndexReader, err error) { 27 28 r = &CompressingStoredFieldsIndexReader{} 29 r.maxDoc = si.DocCount() 30 r.docBases = make([]int, 0, 16) 31 r.startPointers = make([]int64, 0, 16) 32 r.avgChunkDocs = make([]int, 0, 16) 33 r.avgChunkSizes = make([]int64, 0, 16) 34 r.docBasesDeltas = make([]packed.PackedIntsReader, 0, 16) 35 r.startPointersDeltas = make([]packed.PackedIntsReader, 0, 16) 36 37 packedIntsVersion, err := fieldsIndexIn.ReadVInt() 38 if err != nil { 39 return nil, err 40 } 41 42 for blockCount := 0; ; blockCount++ { 43 numChunks, err := fieldsIndexIn.ReadVInt() 44 if err != nil { 45 return nil, err 46 } 47 if numChunks == 0 { 48 break 49 } 50 51 { // doc bases 52 n, err := fieldsIndexIn.ReadVInt() 53 if err != nil { 54 return nil, err 55 } 56 r.docBases = append(r.docBases, int(n)) 57 n, err = fieldsIndexIn.ReadVInt() 58 if err != nil { 59 return nil, err 60 } 61 r.avgChunkDocs = append(r.avgChunkDocs, int(n)) 62 bitsPerDocBase, err := fieldsIndexIn.ReadVInt() 63 if err != nil { 64 return nil, err 65 } 66 if bitsPerDocBase > 32 { 67 return nil, errors.New(fmt.Sprintf("Corrupted bitsPerDocBase (resource=%v)", fieldsIndexIn)) 68 } 69 pr, err := packed.ReaderNoHeader(fieldsIndexIn, packed.PACKED, packedIntsVersion, numChunks, uint32(bitsPerDocBase)) 70 if err != nil { 71 return nil, err 72 } 73 r.docBasesDeltas = append(r.docBasesDeltas, pr) 74 } 75 76 { // start pointers 77 n, err := fieldsIndexIn.ReadVLong() 78 if err != nil { 79 return nil, err 80 } 81 r.startPointers = append(r.startPointers, n) 82 n, err = fieldsIndexIn.ReadVLong() 83 if err != nil { 84 return nil, err 85 } 86 r.avgChunkSizes = append(r.avgChunkSizes, n) 87 bitsPerStartPointer, err := fieldsIndexIn.ReadVInt() 88 if err != nil { 89 return nil, err 90 } 91 if bitsPerStartPointer > 64 { 92 return nil, errors.New(fmt.Sprintf("Corrupted bitsPerStartPonter (resource=%v)", fieldsIndexIn)) 93 } 94 pr, err := packed.ReaderNoHeader(fieldsIndexIn, packed.PACKED, packedIntsVersion, numChunks, uint32(bitsPerStartPointer)) 95 if err != nil { 96 return nil, err 97 } 98 r.startPointersDeltas = append(r.startPointersDeltas, pr) 99 } 100 } 101 102 return r, nil 103 } 104 105 func (r *CompressingStoredFieldsIndexReader) block(docID int) int { 106 lo, hi := 0, len(r.docBases)-1 107 for lo <= hi { 108 mid := int(uint(lo+hi) >> 1) 109 midValue := r.docBases[mid] 110 if midValue == docID { 111 return mid 112 } else if midValue < docID { 113 lo = mid + 1 114 } else { 115 hi = mid - 1 116 } 117 } 118 return hi 119 } 120 121 func (r *CompressingStoredFieldsIndexReader) relativeDocBase(block, relativeChunk int) int { 122 expected := r.avgChunkDocs[block] * relativeChunk 123 delta := util.ZigZagDecodeLong(r.docBasesDeltas[block].Get(relativeChunk)) 124 return expected + int(delta) 125 } 126 127 func (r *CompressingStoredFieldsIndexReader) relativeStartPointer(block, relativeChunk int) int64 { 128 expected := r.avgChunkSizes[block] * int64(relativeChunk) 129 delta := util.ZigZagDecodeLong(r.startPointersDeltas[block].Get(relativeChunk)) 130 return expected + delta 131 } 132 133 func (r *CompressingStoredFieldsIndexReader) relativeChunk(block, relativeDoc int) int { 134 lo, hi := 0, int(r.docBasesDeltas[block].Size())-1 135 for lo <= hi { 136 mid := int(uint(lo+hi) >> 1) 137 midValue := r.relativeDocBase(block, mid) 138 if midValue == relativeDoc { 139 return mid 140 } else if midValue < relativeDoc { 141 lo = mid + 1 142 } else { 143 hi = mid - 1 144 } 145 } 146 return hi 147 } 148 149 func (r *CompressingStoredFieldsIndexReader) startPointer(docID int) int64 { 150 if docID < 0 || docID >= r.maxDoc { 151 panic(fmt.Sprintf("docID out of range [0-%v]: %v", r.maxDoc, docID)) 152 } 153 block := r.block(docID) 154 relativeChunk := r.relativeChunk(block, docID-r.docBases[block]) 155 return r.startPointers[block] + r.relativeStartPointer(block, relativeChunk) 156 } 157 158 func (r *CompressingStoredFieldsIndexReader) Clone() *CompressingStoredFieldsIndexReader { 159 return r 160 }