github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/codec/lucene41/forUtil.go (about) 1 package lucene41 2 3 import ( 4 "fmt" 5 "github.com/balzaczyy/golucene/core/util" 6 "github.com/balzaczyy/golucene/core/util/packed" 7 "math" 8 ) 9 10 // codecs/lucene41/ForUtil.java 11 12 const ( 13 /** 14 * Special number of bits per value used whenever all values to encode are equal. 15 */ 16 ALL_VALUES_EQUAL = 0 17 /** 18 * Upper limit of the number of bytes that might be required to stored 19 * <code>BLOCK_SIZE</code> encoded values. 20 */ 21 MAX_ENCODED_SIZE = LUCENE41_BLOCK_SIZE * 4 22 ) 23 24 /** 25 * Upper limit of the number of values that might be decoded in a single call to 26 * {@link #readBlock(IndexInput, byte[], int[])}. Although values after 27 * <code>BLOCK_SIZE</code> are garbage, it is necessary to allocate value buffers 28 * whose size is >= MAX_DATA_SIZE to avoid {@link ArrayIndexOutOfBoundsException}s. 29 */ 30 var MAX_DATA_SIZE int = computeMaxDataSize() 31 32 func computeMaxDataSize() int { 33 maxDataSize := 0 34 // for each version 35 for version := packed.PACKED_VERSION_START; version <= packed.VERSION_CURRENT; version++ { 36 // for each packed format 37 for format := packed.PACKED; format <= packed.PACKED_SINGLE_BLOCK; format++ { 38 // for each bit-per-value 39 for bpv := 1; bpv <= 32; bpv++ { 40 if !packed.PackedFormat(format).IsSupported(bpv) { 41 continue 42 } 43 decoder := packed.GetPackedIntsDecoder(packed.PackedFormat(format), int32(version), uint32(bpv)) 44 iterations := int(computeIterations(decoder)) 45 if n := iterations * decoder.ByteValueCount(); n > maxDataSize { 46 maxDataSize = n 47 } 48 } 49 } 50 } 51 return maxDataSize 52 } 53 54 /** 55 * Encode all values in normal area with fixed bit width, 56 * which is determined by the max value in this block. 57 */ 58 type ForUtil struct { 59 encodedSizes []int32 60 encoders []packed.PackedIntsEncoder 61 decoders []packed.PackedIntsDecoder 62 iterations []int32 63 } 64 65 /* Create a new ForUtil instance and save state into out. */ 66 func NewForUtilInto(accetableOverheadRatio float32, out util.DataOutput) (*ForUtil, error) { 67 ans, err := &ForUtil{}, out.WriteVInt(packed.VERSION_CURRENT) 68 if err != nil { 69 return ans, err 70 } 71 ans.encodedSizes = make([]int32, 33) 72 ans.encoders = make([]packed.PackedIntsEncoder, 33) 73 ans.decoders = make([]packed.PackedIntsDecoder, 33) 74 ans.iterations = make([]int32, 33) 75 76 packedIntsVersion := int32(packed.VERSION_CURRENT) 77 for bpv := 1; bpv <= 32; bpv++ { 78 formatAndBits := packed.FastestFormatAndBits( 79 LUCENE41_BLOCK_SIZE, bpv, accetableOverheadRatio) 80 format := formatAndBits.Format 81 bitsPerValue := formatAndBits.BitsPerValue 82 assert(format.IsSupported(bitsPerValue)) 83 assert(bitsPerValue <= 32) 84 ans.encodedSizes[bpv] = encodedSize(format, packedIntsVersion, uint32(bitsPerValue)) 85 ans.encoders[bpv] = packed.GetPackedIntsEncoder(format, packedIntsVersion, uint32(bitsPerValue)) 86 ans.decoders[bpv] = packed.GetPackedIntsDecoder(format, packedIntsVersion, uint32(bitsPerValue)) 87 ans.iterations[bpv] = computeIterations(ans.decoders[bpv]) 88 89 err = out.WriteVInt(int32(format.Id()<<5 | (bitsPerValue - 1))) 90 if err != nil { 91 return ans, err 92 } 93 } 94 return ans, err 95 } 96 97 func assert(ok bool) { 98 if !ok { 99 panic("assert fail") 100 } 101 } 102 103 func assert2(ok bool, msg string, args ...interface{}) { 104 if !ok { 105 panic(fmt.Sprintf(msg, args...)) 106 } 107 } 108 109 type DataInput interface { 110 ReadVInt() (n int32, err error) 111 } 112 113 /* Restore a ForUtil from a DataInput. */ 114 func NewForUtilFrom(in DataInput) (fu *ForUtil, err error) { 115 self := &ForUtil{} 116 packedIntsVersion, err := in.ReadVInt() 117 if err != nil { 118 return self, err 119 } 120 packed.CheckVersion(packedIntsVersion) 121 self.encodedSizes = make([]int32, 33) 122 self.encoders = make([]packed.PackedIntsEncoder, 33) 123 self.decoders = make([]packed.PackedIntsDecoder, 33) 124 self.iterations = make([]int32, 33) 125 126 for bpv := 1; bpv <= 32; bpv++ { 127 code, err := in.ReadVInt() 128 if err != nil { 129 return self, err 130 } 131 formatId := uint32(code) >> 5 132 bitsPerValue := (uint32(code) & 31) + 1 133 134 format := packed.PackedFormat(formatId) 135 // assert format.isSupported(bitsPerValue) 136 self.encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue) 137 self.encoders[bpv] = packed.GetPackedIntsEncoder(format, packedIntsVersion, bitsPerValue) 138 self.decoders[bpv] = packed.GetPackedIntsDecoder(format, packedIntsVersion, bitsPerValue) 139 self.iterations[bpv] = computeIterations(self.decoders[bpv]) 140 } 141 return self, nil 142 } 143 144 type IndexOutput interface { 145 WriteByte(byte) error 146 WriteBytes([]byte) error 147 WriteVInt(int32) error 148 } 149 150 func (u *ForUtil) writeBlock(data []int, encoded []byte, out IndexOutput) error { 151 if isAllEqual(data) { 152 var err error 153 if err = out.WriteByte(byte(ALL_VALUES_EQUAL)); err == nil { 154 err = out.WriteVInt(int32(data[0])) 155 } 156 return err 157 } 158 159 numBits := bitsRequired(data) 160 assert2(numBits > 0 && numBits <= 32, "%v", numBits) 161 encoder := u.encoders[numBits] 162 iters := int(u.iterations[numBits]) 163 assert(iters*encoder.ByteValueCount() >= LUCENE41_BLOCK_SIZE) 164 encodedSize := int(u.encodedSizes[numBits]) 165 assert(iters*encoder.ByteBlockCount() >= encodedSize) 166 167 if err := out.WriteByte(byte(numBits)); err != nil { 168 return err 169 } 170 171 encoder.EncodeIntToByte(data, encoded, iters) 172 return out.WriteBytes(encoded[:encodedSize]) 173 } 174 175 func encodedSize(format packed.PackedFormat, packedIntsVersion int32, bitsPerValue uint32) int32 { 176 byteCount := format.ByteCount(packedIntsVersion, LUCENE41_BLOCK_SIZE, bitsPerValue) 177 // assert byteCount >= 0 && byteCount <= math.MaxInt32() 178 return int32(byteCount) 179 } 180 181 func computeIterations(decoder packed.PackedIntsDecoder) int32 { 182 return int32(math.Ceil(float64(LUCENE41_BLOCK_SIZE) / float64(decoder.ByteValueCount()))) 183 } 184 185 func isAllEqual(data []int) bool { 186 first := data[0] 187 for _, v := range data[1:LUCENE41_BLOCK_SIZE] { 188 if v != first { 189 return false 190 } 191 } 192 return true 193 } 194 195 func bitsRequired(data []int) int { 196 or := int64(0) 197 for _, v := range data[:LUCENE41_BLOCK_SIZE] { 198 assert(v >= 0) 199 or |= int64(v) 200 } 201 return packed.BitsRequired(or) 202 }