github.com/ledgerwatch/erigon-lib@v1.0.0/recsplit/golomb_rice.go (about) 1 /* 2 Copyright 2021 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package recsplit 18 19 import ( 20 "encoding/binary" 21 "io" 22 "math/bits" 23 "unsafe" 24 25 "github.com/ledgerwatch/erigon-lib/common/bitutil" 26 ) 27 28 // Optimal Golomb-Rice parameters for leaves 29 var bijMemo = []uint32{0, 0, 0, 1, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19, 21, 22, 23, 25, 26, 28, 29, 30} 30 31 // GolombRice can build up the golomb-rice encoding of the sequeuce of numbers, as well as read the numbers back from it. 32 type GolombRice struct { 33 data []uint64 // Present in the builder and in the reader 34 bitCount int // Speficic to the builder - number of bits added to the encoding so far 35 } 36 37 // appendUnaryAll adds the unary encoding of specified sequence of numbers to the end of the 38 // current encoding 39 func (g *GolombRice) appendUnaryAll(unary []uint64) { 40 bitInc := 0 41 for _, u := range unary { 42 // Each number u uses u+1 bits for its unary representation 43 bitInc += int(u) + 1 44 } 45 targetSize := (g.bitCount + bitInc + 63) / 64 46 for len(g.data) < targetSize { 47 g.data = append(g.data, 0) 48 } 49 50 for _, u := range unary { 51 g.bitCount += int(u) 52 appendPtr := g.bitCount / 64 53 g.data[appendPtr] |= uint64(1) << (g.bitCount & 63) 54 g.bitCount++ 55 } 56 } 57 58 // appendFixed encodes the next value using specified Golomb parameter. Since we are using Golomb-Rice encoding, 59 // all Golomb parameters are powers of two. Therefore we input log2 of golomb parameter, rather than golomn paramter itself, 60 // for convinience 61 func (g *GolombRice) appendFixed(v uint64, log2golomb int) { 62 if log2golomb == 0 { 63 return 64 } 65 lowerBits := v & ((uint64(1) << log2golomb) - 1) // Extract the part of the number that will be encoded using truncated binary encoding 66 usedBits := g.bitCount & 63 // How many bits of the last element of b.data is used by previous value 67 targetSize := (g.bitCount + log2golomb + 63) / 64 68 //fmt.Printf("g.bitCount = %d, log2golomb = %d, targetSize = %d\n", g.bitCount, log2golomb, targetSize) 69 for len(g.data) < targetSize { 70 g.data = append(g.data, 0) 71 } 72 appendPtr := g.bitCount / 64 // The index in b.data corresponding to the last element used by previous value, or if previous values fits perfectly, the index of the next free element 73 curWord := g.data[appendPtr] 74 curWord |= lowerBits << usedBits // curWord now contains the new value potentially combined with the part of the previous value 75 if usedBits+log2golomb > 64 { 76 // New value overflows to the next element 77 g.data[appendPtr] = curWord 78 appendPtr++ 79 curWord = lowerBits >> (64 - usedBits) // curWord now contains the part of the new value that overflows 80 } 81 g.data[appendPtr] = curWord 82 g.bitCount += log2golomb 83 } 84 85 // Bits returns currrent number of bits in the compact encoding of the hash function representation 86 func (g *GolombRice) Bits() int { 87 return g.bitCount 88 } 89 90 func (g *GolombRiceReader) ReadReset(bitPos, unaryOffset int) { 91 g.currFixedOffset = bitPos 92 unaryPos := bitPos + unaryOffset 93 g.currPtrUnary = unaryPos / 64 94 g.currWindowUnary = g.data[g.currPtrUnary] >> (unaryPos & 63) 95 g.currPtrUnary++ 96 g.validLowerBitsUnary = 64 - (unaryPos & 63) 97 } 98 99 func (g *GolombRiceReader) SkipSubtree(nodes, fixedLen int) { 100 if nodes <= 0 { 101 panic("nodes <= 0") 102 } 103 missing := nodes 104 var cnt int 105 for cnt = bits.OnesCount64(g.currWindowUnary); cnt < missing; cnt = bits.OnesCount64(g.currWindowUnary) { 106 g.currWindowUnary = g.data[g.currPtrUnary] 107 g.currPtrUnary++ 108 missing -= cnt 109 g.validLowerBitsUnary = 64 110 } 111 cnt = bitutil.Select64(g.currWindowUnary, missing-1) 112 g.currWindowUnary >>= cnt 113 g.currWindowUnary >>= 1 114 g.validLowerBitsUnary -= cnt + 1 115 116 g.currFixedOffset += fixedLen 117 } 118 119 func (g *GolombRiceReader) ReadNext(log2golomb int) uint64 { 120 var result uint64 121 122 if g.currWindowUnary == 0 { 123 result += uint64(g.validLowerBitsUnary) 124 g.currWindowUnary = g.data[g.currPtrUnary] 125 g.currPtrUnary++ 126 g.validLowerBitsUnary = 64 127 for g.currWindowUnary == 0 { 128 result += 64 129 g.currWindowUnary = g.data[g.currPtrUnary] 130 g.currPtrUnary++ 131 } 132 } 133 134 pos := bits.TrailingZeros64(g.currWindowUnary) 135 136 g.currWindowUnary >>= pos 137 g.currWindowUnary >>= 1 138 g.validLowerBitsUnary -= pos + 1 139 140 result += uint64(pos) 141 result <<= log2golomb 142 143 idx64 := g.currFixedOffset >> 6 144 var fixed uint64 145 shift := g.currFixedOffset & 63 146 fixed = g.data[idx64] >> shift 147 if shift+log2golomb > 64 { 148 fixed |= g.data[idx64+1] << (64 - shift) 149 } 150 result |= fixed & ((uint64(1) << log2golomb) - 1) 151 g.currFixedOffset += log2golomb 152 return result 153 } 154 155 // Data returns the binary representation of the Golomb-Rice code that is built 156 func (g *GolombRice) Data() []uint64 { 157 return g.data 158 } 159 160 const maxDataSize = 0xFFFFFFFFFFFF 161 162 // Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read 163 func (g *GolombRice) Write(w io.Writer) error { 164 var numBuf [8]byte 165 binary.BigEndian.PutUint64(numBuf[:], uint64(len(g.data))) 166 if _, e := w.Write(numBuf[:]); e != nil { 167 return e 168 } 169 p := (*[maxDataSize]byte)(unsafe.Pointer(&g.data[0])) 170 b := (*p)[:] 171 if _, e := w.Write(b[:len(g.data)*8]); e != nil { 172 return e 173 } 174 return nil 175 } 176 177 type GolombRiceReader struct { 178 data []uint64 // Present in the builder and in the reader 179 currFixedOffset int // Specific to the reader 180 currWindowUnary uint64 181 currPtrUnary int 182 validLowerBitsUnary int 183 }