github.com/ledgerwatch/erigon-lib@v1.0.0/recsplit/eliasfano16/elias_fano.go (about) 1 /* 2 Copyright 2021 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package eliasfano16 18 19 import ( 20 "encoding/binary" 21 "fmt" 22 "io" 23 "math" 24 "math/bits" 25 "unsafe" 26 27 "github.com/ledgerwatch/erigon-lib/common/bitutil" 28 ) 29 30 // EliasFano algo overview https://www.antoniomallia.it/sorted-integers-compression-with-elias-fano-encoding.html 31 // P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974. 32 // Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf 33 34 const ( 35 log2q uint64 = 8 36 q uint64 = 1 << log2q 37 qMask = q - 1 38 superQ uint64 = 1 << 14 39 superQMask = superQ - 1 40 qPerSuperQ = superQ / q // 64 41 superQSize = 1 + qPerSuperQ/4 // 1 + 64/4 = 17 42 ) 43 44 // EliasFano can be used to encode one monotone sequence 45 type EliasFano struct { 46 data []uint64 47 lowerBits []uint64 48 upperBits []uint64 49 jump []uint64 50 lowerBitsMask uint64 51 count uint64 52 u uint64 53 l uint64 54 maxOffset uint64 55 minDelta uint64 56 i uint64 57 delta uint64 58 wordsUpperBits int 59 } 60 61 func NewEliasFano(count uint64, maxOffset, minDelta uint64) *EliasFano { 62 //fmt.Printf("count=%d,maxOffset=%d,minDelta=%d\n", count, maxOffset, minDelta) 63 ef := &EliasFano{ 64 count: count - 1, 65 maxOffset: maxOffset, 66 minDelta: minDelta, 67 } 68 ef.u = maxOffset - ef.count*ef.minDelta + 1 69 ef.wordsUpperBits = ef.deriveFields() 70 return ef 71 } 72 73 func (ef *EliasFano) AddOffset(offset uint64) { 74 //fmt.Printf("0x%x,\n", offset) 75 if ef.l != 0 { 76 setBits(ef.lowerBits, ef.i*ef.l, int(ef.l), (offset-ef.delta)&ef.lowerBitsMask) 77 } 78 //pos := ((offset - ef.delta) >> ef.l) + ef.i 79 set(ef.upperBits, ((offset-ef.delta)>>ef.l)+ef.i) 80 //fmt.Printf("add:%x, pos=%x, set=%x, res=%x\n", offset, pos, pos/64, uint64(1)<<(pos%64)) 81 ef.i++ 82 ef.delta += ef.minDelta 83 } 84 85 func (ef *EliasFano) jumpSizeWords() int { 86 size := ((ef.count + 1) / superQ) * superQSize // Whole blocks 87 if (ef.count+1)%superQ != 0 { 88 size += 1 + (((ef.count+1)%superQ+q-1)/q+3)/4 // Partial block 89 } 90 return int(size) 91 } 92 93 func (ef *EliasFano) deriveFields() int { 94 if ef.u/(ef.count+1) == 0 { 95 ef.l = 0 96 } else { 97 ef.l = 63 ^ uint64(bits.LeadingZeros64(ef.u/(ef.count+1))) // pos of first non-zero bit 98 //fmt.Printf("lllllllll: %d, %d\n", 63^uint64(bits.LeadingZeros64(24/7)), msb(ef.u/(ef.count+1))) 99 } 100 ef.lowerBitsMask = (uint64(1) << ef.l) - 1 101 wordsLowerBits := int(((ef.count+1)*ef.l+63)/64 + 1) 102 wordsUpperBits := int((ef.count + 1 + (ef.u >> ef.l) + 63) / 64) 103 jumpWords := ef.jumpSizeWords() 104 totalWords := wordsLowerBits + wordsUpperBits + jumpWords 105 if ef.data == nil { 106 ef.data = make([]uint64, totalWords) 107 } else { 108 ef.data = ef.data[:totalWords] 109 } 110 111 ef.lowerBits = ef.data[:wordsLowerBits] 112 ef.upperBits = ef.data[wordsLowerBits : wordsLowerBits+wordsUpperBits] 113 ef.jump = ef.data[wordsLowerBits+wordsUpperBits:] 114 return wordsUpperBits 115 } 116 117 // Build construct Elias Fano index for a given sequences 118 func (ef *EliasFano) Build() { 119 for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(ef.wordsUpperBits); i++ { 120 for b := uint64(0); b < 64; b++ { 121 if ef.upperBits[i]&(uint64(1)<<b) != 0 { 122 if (c & superQMask) == 0 { 123 // When c is multiple of 2^14 (4096) 124 lastSuperQ = i*64 + b 125 ef.jump[(c/superQ)*superQSize] = lastSuperQ 126 } 127 if (c & qMask) == 0 { 128 // When c is multiple of 2^8 (256) 129 var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256 130 // offset needs to be encoded as 16-bit integer, therefore the following check 131 if offset >= (1 << 16) { 132 fmt.Printf("ef.l=%x,ef.u=%x\n", ef.l, ef.u) 133 fmt.Printf("offset=%x,lastSuperQ=%x,i=%x,b=%x,c=%x\n", offset, lastSuperQ, i, b, c) 134 fmt.Printf("ef.minDelta=%x\n", ef.minDelta) 135 //fmt.Printf("ef.upperBits=%x\n", ef.upperBits) 136 //fmt.Printf("ef.lowerBits=%x\n", ef.lowerBits) 137 //fmt.Printf("ef.wordsUpperBits=%b\n", ef.wordsUpperBits) 138 panic("") 139 } 140 // c % superQ is the bit index inside the group of 4096 bits 141 jumpSuperQ := (c / superQ) * superQSize 142 jumpInsideSuperQ := (c % superQ) / q 143 idx64 := jumpSuperQ + 1 + (jumpInsideSuperQ >> 2) 144 shift := 16 * (jumpInsideSuperQ % 4) 145 mask := uint64(0xffff) << shift 146 ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) 147 } 148 c++ 149 } 150 } 151 } 152 } 153 154 func (ef *EliasFano) get(i uint64) (val, window uint64, sel int, currWord, lower, delta uint64) { 155 lower = i * ef.l 156 idx64 := lower / 64 157 shift := lower % 64 158 lower = ef.lowerBits[idx64] >> shift 159 if shift > 0 { 160 lower |= ef.lowerBits[idx64+1] << (64 - shift) 161 } 162 163 jumpSuperQ := (i / superQ) * superQSize 164 jumpInsideSuperQ := (i % superQ) / q 165 idx64 = jumpSuperQ + 1 + (jumpInsideSuperQ >> 2) 166 shift = 16 * (jumpInsideSuperQ % 4) 167 mask := uint64(0xffff) << shift 168 jump := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift 169 170 currWord = jump / 64 171 window = ef.upperBits[currWord] & (uint64(0xffffffffffffffff) << (jump % 64)) 172 d := int(i & qMask) 173 174 for bitCount := bits.OnesCount64(window); bitCount <= d; bitCount = bits.OnesCount64(window) { 175 currWord++ 176 window = ef.upperBits[currWord] 177 d -= bitCount 178 } 179 180 sel = bitutil.Select64(window, d) 181 delta = i * ef.minDelta 182 val = ((currWord*64+uint64(sel)-i)<<ef.l | (lower & ef.lowerBitsMask)) + delta 183 184 return 185 } 186 187 func (ef *EliasFano) Get(i uint64) uint64 { 188 val, _, _, _, _, _ := ef.get(i) 189 return val 190 } 191 192 func (ef *EliasFano) Get2(i uint64) (val, valNext uint64) { 193 var window uint64 194 var sel int 195 var currWord uint64 196 var lower uint64 197 var delta uint64 198 val, window, sel, currWord, lower, delta = ef.get(i) 199 window &= (uint64(0xffffffffffffffff) << sel) << 1 200 for window == 0 { 201 currWord++ 202 window = ef.upperBits[currWord] 203 } 204 205 lower >>= ef.l 206 valNext = ((currWord*64+uint64(bits.TrailingZeros64(window))-i-1)<<ef.l | (lower & ef.lowerBitsMask)) + delta + ef.minDelta 207 return 208 } 209 210 // Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read 211 func (ef *EliasFano) Write(w io.Writer) error { 212 var numBuf [8]byte 213 binary.BigEndian.PutUint64(numBuf[:], ef.count) 214 if _, e := w.Write(numBuf[:]); e != nil { 215 return e 216 } 217 binary.BigEndian.PutUint64(numBuf[:], ef.u) 218 if _, e := w.Write(numBuf[:]); e != nil { 219 return e 220 } 221 binary.BigEndian.PutUint64(numBuf[:], ef.minDelta) 222 if _, e := w.Write(numBuf[:]); e != nil { 223 return e 224 } 225 p := (*[maxDataSize]byte)(unsafe.Pointer(&ef.data[0])) 226 b := (*p)[:] 227 if _, e := w.Write(b[:len(ef.data)*8]); e != nil { 228 return e 229 } 230 return nil 231 } 232 233 // Read inputs the state of golomb rice encoding from a reader s 234 func ReadEliasFano(r []byte) (*EliasFano, int) { 235 ef := &EliasFano{} 236 ef.count = binary.BigEndian.Uint64(r[:8]) 237 ef.u = binary.BigEndian.Uint64(r[8:16]) 238 ef.minDelta = binary.BigEndian.Uint64(r[16:24]) 239 p := (*[maxDataSize / 8]uint64)(unsafe.Pointer(&r[24])) 240 ef.data = p[:] 241 ef.deriveFields() 242 return ef, 24 + 8*len(ef.data) 243 } 244 245 const maxDataSize = 0xFFFFFFFFFFFF 246 247 // DoubleEliasFano can be used to encode two monotone sequences 248 // it is called "double" because the lower bits array contains two sequences interleaved 249 type DoubleEliasFano struct { 250 data []uint64 251 lowerBits []uint64 252 upperBitsPosition []uint64 253 upperBitsCumKeys []uint64 254 jump []uint64 255 lowerBitsMaskCumKeys uint64 256 lowerBitsMaskPosition uint64 257 numBuckets uint64 258 uCumKeys uint64 259 uPosition uint64 260 lPosition uint64 261 lCumKeys uint64 262 cumKeysMinDelta uint64 263 posMinDelta uint64 264 } 265 266 func (ef *DoubleEliasFano) deriveFields() (int, int) { 267 if ef.uPosition/(ef.numBuckets+1) == 0 { 268 ef.lPosition = 0 269 } else { 270 ef.lPosition = 63 ^ uint64(bits.LeadingZeros64(ef.uPosition/(ef.numBuckets+1))) 271 } 272 if ef.uCumKeys/(ef.numBuckets+1) == 0 { 273 ef.lCumKeys = 0 274 } else { 275 ef.lCumKeys = 63 ^ uint64(bits.LeadingZeros64(ef.uCumKeys/(ef.numBuckets+1))) 276 } 277 //fmt.Printf("uPosition = %d, lPosition = %d, uCumKeys = %d, lCumKeys = %d\n", ef.uPosition, ef.lPosition, ef.uCumKeys, ef.lCumKeys) 278 if ef.lCumKeys*2+ef.lPosition > 56 { 279 panic(fmt.Sprintf("ef.lCumKeys (%d) * 2 + ef.lPosition (%d) > 56", ef.lCumKeys, ef.lPosition)) 280 } 281 ef.lowerBitsMaskCumKeys = (uint64(1) << ef.lCumKeys) - 1 282 ef.lowerBitsMaskPosition = (uint64(1) << ef.lPosition) - 1 283 wordsLowerBits := int(((ef.numBuckets+1)*(ef.lCumKeys+ef.lPosition)+63)/64 + 1) 284 wordsCumKeys := int((ef.numBuckets + 1 + (ef.uCumKeys >> ef.lCumKeys) + 63) / 64) 285 wordsPosition := int((ef.numBuckets + 1 + (ef.uPosition >> ef.lPosition) + 63) / 64) 286 jumpWords := ef.jumpSizeWords() 287 totalWords := wordsLowerBits + wordsCumKeys + wordsPosition + jumpWords 288 if ef.data == nil { 289 ef.data = make([]uint64, totalWords) 290 } else { 291 ef.data = ef.data[:totalWords] 292 } 293 ef.lowerBits = ef.data[:wordsLowerBits] 294 ef.upperBitsCumKeys = ef.data[wordsLowerBits : wordsLowerBits+wordsCumKeys] 295 ef.upperBitsPosition = ef.data[wordsLowerBits+wordsCumKeys : wordsLowerBits+wordsCumKeys+wordsPosition] 296 ef.jump = ef.data[wordsLowerBits+wordsCumKeys+wordsPosition:] 297 return wordsCumKeys, wordsPosition 298 } 299 300 // Build construct double Elias Fano index for two given sequences 301 func (ef *DoubleEliasFano) Build(cumKeys []uint64, position []uint64) { 302 //fmt.Printf("cumKeys = %d\nposition = %d\n", cumKeys, position) 303 if len(cumKeys) != len(position) { 304 panic("len(cumKeys) != len(position)") 305 } 306 ef.numBuckets = uint64(len(cumKeys) - 1) 307 ef.posMinDelta = math.MaxUint64 308 ef.cumKeysMinDelta = math.MaxUint64 309 for i := uint64(1); i <= ef.numBuckets; i++ { 310 if cumKeys[i] < cumKeys[i-1] { 311 panic("cumKeys[i] <= cumKeys[i-1]") 312 } 313 nkeysDelta := cumKeys[i] - cumKeys[i-1] 314 if nkeysDelta < ef.cumKeysMinDelta { 315 ef.cumKeysMinDelta = nkeysDelta 316 } 317 if position[i] < position[i-1] { 318 panic("position[i] < position[i-1]") 319 } 320 bucketBits := position[i] - position[i-1] 321 if bucketBits < ef.posMinDelta { 322 ef.posMinDelta = bucketBits 323 } 324 } 325 //fmt.Printf("cumKeysMinDelta = %d, posMinDelta = %d\n", ef.cumKeysMinDelta, ef.posMinDelta) 326 ef.uPosition = position[ef.numBuckets] - ef.numBuckets*ef.posMinDelta + 1 327 ef.uCumKeys = cumKeys[ef.numBuckets] - ef.numBuckets*ef.cumKeysMinDelta + 1 // Largest possible encoding of the cumKeys 328 wordsCumKeys, wordsPosition := ef.deriveFields() 329 330 for i, cumDelta, bitDelta := uint64(0), uint64(0), uint64(0); i <= ef.numBuckets; i, cumDelta, bitDelta = i+1, cumDelta+ef.cumKeysMinDelta, bitDelta+ef.posMinDelta { 331 if ef.lCumKeys != 0 { 332 //fmt.Printf("i=%d, set_bits cum for %d = %b\n", i, cumKeys[i]-cumDelta, (cumKeys[i]-cumDelta)&ef.lowerBitsMaskCumKeys) 333 setBits(ef.lowerBits, i*(ef.lCumKeys+ef.lPosition), int(ef.lCumKeys), (cumKeys[i]-cumDelta)&ef.lowerBitsMaskCumKeys) 334 //fmt.Printf("loweBits %b\n", ef.lowerBits) 335 } 336 set(ef.upperBitsCumKeys, ((cumKeys[i]-cumDelta)>>ef.lCumKeys)+i) 337 //fmt.Printf("i=%d, set cum for %d = %d\n", i, cumKeys[i]-cumDelta, (cumKeys[i]-cumDelta)>>ef.lCumKeys+i) 338 339 if ef.lPosition != 0 { 340 //fmt.Printf("i=%d, set_bits pos for %d = %b\n", i, position[i]-bitDelta, (position[i]-bitDelta)&ef.lowerBitsMaskPosition) 341 setBits(ef.lowerBits, i*(ef.lCumKeys+ef.lPosition)+ef.lCumKeys, int(ef.lPosition), (position[i]-bitDelta)&ef.lowerBitsMaskPosition) 342 //fmt.Printf("lowerBits %b\n", ef.lowerBits) 343 } 344 set(ef.upperBitsPosition, ((position[i]-bitDelta)>>ef.lPosition)+i) 345 //fmt.Printf("i=%d, set pos for %d = %d\n", i, position[i]-bitDelta, (position[i]-bitDelta)>>ef.lPosition+i) 346 } 347 //fmt.Printf("loweBits %b\n", ef.lowerBits) 348 //fmt.Printf("upperBitsCumKeys %b\n", ef.upperBitsCumKeys) 349 //fmt.Printf("upperBitsPosition %b\n", ef.upperBitsPosition) 350 // i iterates over the 64-bit words in the wordCumKeys vector 351 // c iterates over bits in the wordCumKeys 352 // lastSuperQ is the largest multiple of 2^14 (4096) which is no larger than c 353 // c/superQ is the index of the current 4096 block of bits 354 // superQSize is how many words is required to encode one block of 4096 bits. It is 17 words which is 1088 bits 355 for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsCumKeys); i++ { 356 for b := uint64(0); b < 64; b++ { 357 if ef.upperBitsCumKeys[i]&(uint64(1)<<b) != 0 { 358 if (c & superQMask) == 0 { 359 // When c is multiple of 2^14 (4096) 360 lastSuperQ = i*64 + b 361 ef.jump[(c/superQ)*(superQSize*2)] = lastSuperQ 362 } 363 if (c & qMask) == 0 { 364 // When c is multiple of 2^8 (256) 365 var offset = i*64 + b - lastSuperQ // offset can be either 0, 256, 512, 768, ..., up to 4096-256 366 // offset needs to be encoded as 16-bit integer, therefore the following check 367 if offset >= (1 << 16) { 368 panic("") 369 } 370 // c % superQ is the bit index inside the group of 4096 bits 371 jumpSuperQ := (c / superQ) * (superQSize * 2) 372 jumpInsideSuperQ := 2 * (c % superQ) / q 373 idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2) 374 shift := 16 * (jumpInsideSuperQ % 4) 375 mask := uint64(0xffff) << shift 376 ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) 377 } 378 c++ 379 } 380 } 381 } 382 383 for i, c, lastSuperQ := uint64(0), uint64(0), uint64(0); i < uint64(wordsPosition); i++ { 384 for b := uint64(0); b < 64; b++ { 385 if ef.upperBitsPosition[i]&(uint64(1)<<b) != 0 { 386 if (c & superQMask) == 0 { 387 lastSuperQ = i*64 + b 388 ef.jump[(c/superQ)*(superQSize*2)+1] = lastSuperQ 389 } 390 if (c & qMask) == 0 { 391 var offset = i*64 + b - lastSuperQ 392 if offset >= (1 << 16) { 393 panic("") 394 } 395 jumpSuperQ := (c / superQ) * (superQSize * 2) 396 jumpInsideSuperQ := 2*((c%superQ)/q) + 1 397 idx64 := jumpSuperQ + 2 + (jumpInsideSuperQ >> 2) 398 shift := 16 * (jumpInsideSuperQ % 4) 399 mask := uint64(0xffff) << shift 400 ef.jump[idx64] = (ef.jump[idx64] &^ mask) | (offset << shift) 401 } 402 c++ 403 } 404 } 405 } 406 //fmt.Printf("jump: %x\n", ef.jump) 407 } 408 409 // setBits assumes that bits are set in monotonic order, so that 410 // we can skip the masking for the second word 411 func setBits(bits []uint64, start uint64, width int, value uint64) { 412 shift := int(start & 63) 413 idx64 := start >> 6 414 mask := (uint64(1)<<width - 1) << shift 415 //fmt.Printf("mask = %b, idx64 = %d\n", mask, idx64) 416 bits[idx64] = (bits[idx64] &^ mask) | (value << shift) 417 //fmt.Printf("start = %d, width = %d, shift + width = %d\n", start, width, shift+width) 418 if shift+width > 64 { 419 // changes two 64-bit words 420 bits[idx64+1] = value >> (64 - shift) 421 } 422 } 423 424 func set(bits []uint64, pos uint64) { 425 //bits[pos>>6] |= uint64(1) << (pos & 63) 426 bits[pos/64] |= uint64(1) << (pos % 64) 427 } 428 429 func (ef *DoubleEliasFano) jumpSizeWords() int { 430 size := ((ef.numBuckets + 1) / superQ) * superQSize * 2 // Whole blocks 431 if (ef.numBuckets+1)%superQ != 0 { 432 size += (1 + (((ef.numBuckets+1)%superQ+q-1)/q+3)/4) * 2 // Partial block 433 } 434 return int(size) 435 } 436 437 // Data returns binary representation of double Ellias-Fano index that has been built 438 func (ef *DoubleEliasFano) Data() []uint64 { 439 return ef.data 440 } 441 442 func (ef *DoubleEliasFano) get2(i uint64) (cumKeys, position uint64, 443 windowCumKeys uint64, selectCumKeys int, currWordCumKeys, lower, cumDelta uint64) { 444 posLower := i * (ef.lCumKeys + ef.lPosition) 445 idx64 := posLower / 64 446 shift := posLower % 64 447 lower = ef.lowerBits[idx64] >> shift 448 if shift > 0 { 449 lower |= ef.lowerBits[idx64+1] << (64 - shift) 450 } 451 //fmt.Printf("i = %d, posLower = %d, lower = %b\n", i, posLower, lower) 452 453 jumpSuperQ := (i / superQ) * superQSize * 2 454 jumpInsideSuperQ := (i % superQ) / q 455 idx16 := 4*(jumpSuperQ+2) + 2*jumpInsideSuperQ 456 idx64 = idx16 / 4 457 shift = 16 * (idx16 % 4) 458 mask := uint64(0xffff) << shift 459 jumpCumKeys := ef.jump[jumpSuperQ] + (ef.jump[idx64]&mask)>>shift 460 idx16++ 461 idx64 = idx16 / 4 462 shift = 16 * (idx16 % 4) 463 mask = uint64(0xffff) << shift 464 jumpPosition := ef.jump[jumpSuperQ+1] + (ef.jump[idx64]&mask)>>shift 465 //fmt.Printf("i = %d, jumpCumKeys = %d, jumpPosition = %d\n", i, jumpCumKeys, jumpPosition) 466 467 currWordCumKeys = jumpCumKeys / 64 468 currWordPosition := jumpPosition / 64 469 windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys] & (uint64(0xffffffffffffffff) << (jumpCumKeys % 64)) 470 windowPosition := ef.upperBitsPosition[currWordPosition] & (uint64(0xffffffffffffffff) << (jumpPosition % 64)) 471 deltaCumKeys := int(i & qMask) 472 deltaPosition := int(i & qMask) 473 474 for bitCount := bits.OnesCount64(windowCumKeys); bitCount <= deltaCumKeys; bitCount = bits.OnesCount64(windowCumKeys) { 475 //fmt.Printf("i = %d, bitCount cum = %d\n", i, bitCount) 476 currWordCumKeys++ 477 windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys] 478 deltaCumKeys -= bitCount 479 } 480 for bitCount := bits.OnesCount64(windowPosition); bitCount <= deltaPosition; bitCount = bits.OnesCount64(windowPosition) { 481 //fmt.Printf("i = %d, bitCount pos = %d\n", i, bitCount) 482 currWordPosition++ 483 windowPosition = ef.upperBitsPosition[currWordPosition] 484 deltaPosition -= bitCount 485 } 486 487 selectCumKeys = bitutil.Select64(windowCumKeys, deltaCumKeys) 488 //fmt.Printf("i = %d, select cum in %b for %d = %d\n", i, windowCumKeys, deltaCumKeys, selectCumKeys) 489 cumDelta = i * ef.cumKeysMinDelta 490 cumKeys = ((currWordCumKeys*64+uint64(selectCumKeys)-i)<<ef.lCumKeys | (lower & ef.lowerBitsMaskCumKeys)) + cumDelta 491 492 lower >>= ef.lCumKeys 493 //fmt.Printf("i = %d, lower = %b\n", i, lower) 494 selectPosition := bitutil.Select64(windowPosition, deltaPosition) 495 //fmt.Printf("i = %d, select pos in %b for %d = %d\n", i, windowPosition, deltaPosition, selectPosition) 496 bitDelta := i * ef.posMinDelta 497 position = ((currWordPosition*64+uint64(selectPosition)-i)<<ef.lPosition | (lower & ef.lowerBitsMaskPosition)) + bitDelta 498 return 499 } 500 501 func (ef *DoubleEliasFano) Get2(i uint64) (cumKeys, position uint64) { 502 cumKeys, position, _, _, _, _, _ = ef.get2(i) 503 return 504 } 505 506 func (ef *DoubleEliasFano) Get3(i uint64) (cumKeys, cumKeysNext, position uint64) { 507 var windowCumKeys uint64 508 var selectCumKeys int 509 var currWordCumKeys uint64 510 var lower uint64 511 var cumDelta uint64 512 cumKeys, position, windowCumKeys, selectCumKeys, currWordCumKeys, lower, cumDelta = ef.get2(i) 513 windowCumKeys &= (uint64(0xffffffffffffffff) << selectCumKeys) << 1 514 for windowCumKeys == 0 { 515 currWordCumKeys++ 516 windowCumKeys = ef.upperBitsCumKeys[currWordCumKeys] 517 } 518 519 lower >>= ef.lPosition 520 cumKeysNext = ((currWordCumKeys*64+uint64(bits.TrailingZeros64(windowCumKeys))-i-1)<<ef.lCumKeys | (lower & ef.lowerBitsMaskCumKeys)) + cumDelta + ef.cumKeysMinDelta 521 return 522 } 523 524 // Write outputs the state of golomb rice encoding into a writer, which can be recovered later by Read 525 func (ef *DoubleEliasFano) Write(w io.Writer) error { 526 var numBuf [8]byte 527 binary.BigEndian.PutUint64(numBuf[:], ef.numBuckets) 528 if _, e := w.Write(numBuf[:]); e != nil { 529 return e 530 } 531 binary.BigEndian.PutUint64(numBuf[:], ef.uCumKeys) 532 if _, e := w.Write(numBuf[:]); e != nil { 533 return e 534 } 535 binary.BigEndian.PutUint64(numBuf[:], ef.uPosition) 536 if _, e := w.Write(numBuf[:]); e != nil { 537 return e 538 } 539 binary.BigEndian.PutUint64(numBuf[:], ef.cumKeysMinDelta) 540 if _, e := w.Write(numBuf[:]); e != nil { 541 return e 542 } 543 binary.BigEndian.PutUint64(numBuf[:], ef.posMinDelta) 544 if _, e := w.Write(numBuf[:]); e != nil { 545 return e 546 } 547 p := (*[maxDataSize]byte)(unsafe.Pointer(&ef.data[0])) 548 b := (*p)[:] 549 if _, e := w.Write(b[:len(ef.data)*8]); e != nil { 550 return e 551 } 552 return nil 553 } 554 555 // Read inputs the state of golomb rice encoding from a reader s 556 func (ef *DoubleEliasFano) Read(r []byte) int { 557 ef.numBuckets = binary.BigEndian.Uint64(r[:8]) 558 ef.uCumKeys = binary.BigEndian.Uint64(r[8:16]) 559 ef.uPosition = binary.BigEndian.Uint64(r[16:24]) 560 ef.cumKeysMinDelta = binary.BigEndian.Uint64(r[24:32]) 561 ef.posMinDelta = binary.BigEndian.Uint64(r[32:40]) 562 p := (*[maxDataSize / 8]uint64)(unsafe.Pointer(&r[40])) 563 ef.data = p[:] 564 ef.deriveFields() 565 return 40 + 8*len(ef.data) 566 }