github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/surf/bits_vec.go (about)

     1  package surf
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  	"math/bits"
     7  	"sort"
     8  
     9  	"github.com/dgryski/go-farm"
    10  )
    11  
    12  type bitVector struct {
    13  	numBits uint32
    14  	bits    []uint64
    15  }
    16  
    17  func (v *bitVector) numWords() uint32 {
    18  	wordSz := v.numBits / wordSize
    19  	if v.numBits%wordSize != 0 {
    20  		wordSz++
    21  	}
    22  	return wordSz
    23  }
    24  
    25  func (v *bitVector) bitsSize() uint32 {
    26  	return v.numWords() * 8
    27  }
    28  
    29  func (v *bitVector) Init(bitsPerLevel [][]uint64, numBitsPerLevel []uint32) {
    30  	for _, n := range numBitsPerLevel {
    31  		v.numBits += n
    32  	}
    33  
    34  	v.bits = make([]uint64, v.numWords())
    35  
    36  	var wordID, bitShift uint32
    37  	for level, bits := range bitsPerLevel {
    38  		n := numBitsPerLevel[level]
    39  		if n == 0 {
    40  			continue
    41  		}
    42  
    43  		nCompleteWords := n / wordSize
    44  		for word := 0; uint32(word) < nCompleteWords; word++ {
    45  			v.bits[wordID] |= bits[word] << bitShift
    46  			wordID++
    47  			if bitShift > 0 {
    48  				v.bits[wordID] |= bits[word] >> (wordSize - bitShift)
    49  			}
    50  		}
    51  
    52  		remain := n % wordSize
    53  		if remain > 0 {
    54  			lastWord := bits[nCompleteWords]
    55  			v.bits[wordID] |= lastWord << bitShift
    56  			if bitShift+remain <= wordSize {
    57  				bitShift = (bitShift + remain) % wordSize
    58  				if bitShift == 0 {
    59  					wordID++
    60  				}
    61  			} else {
    62  				wordID++
    63  				v.bits[wordID] |= lastWord >> (wordSize - bitShift)
    64  				bitShift = bitShift + remain - wordSize
    65  			}
    66  		}
    67  	}
    68  }
    69  
    70  func (v *bitVector) IsSet(pos uint32) bool {
    71  	return readBit(v.bits, pos)
    72  }
    73  
    74  func (v *bitVector) DistanceToNextSetBit(pos uint32) uint32 {
    75  	var distance uint32 = 1
    76  	wordOff := (pos + 1) / wordSize
    77  	bitsOff := (pos + 1) % wordSize
    78  
    79  	if wordOff >= uint32(len(v.bits)) {
    80  		return 0
    81  	}
    82  
    83  	testBits := v.bits[wordOff] >> bitsOff
    84  	if testBits > 0 {
    85  		return distance + uint32(bits.TrailingZeros64(testBits))
    86  	}
    87  
    88  	numWords := v.numWords()
    89  	if wordOff == numWords-1 {
    90  		return v.numBits - pos
    91  	}
    92  	distance += wordSize - bitsOff
    93  
    94  	for wordOff < numWords-1 {
    95  		wordOff++
    96  		testBits = v.bits[wordOff]
    97  		if testBits > 0 {
    98  			return distance + uint32(bits.TrailingZeros64(testBits))
    99  		}
   100  		distance += wordSize
   101  	}
   102  
   103  	if wordOff == numWords-1 && v.numBits%64 != 0 {
   104  		distance -= wordSize - v.numBits%64
   105  	}
   106  
   107  	return distance
   108  }
   109  
   110  func (v *bitVector) DistanceToPrevSetBit(pos uint32) uint32 {
   111  	if pos == 0 {
   112  		return 1
   113  	}
   114  	distance := uint32(1)
   115  	wordOff := (pos - 1) / wordSize
   116  	bitsOff := (pos - 1) % wordSize
   117  
   118  	testBits := v.bits[wordOff] << (wordSize - 1 - bitsOff)
   119  	if testBits > 0 {
   120  		return distance + uint32(bits.LeadingZeros64(testBits))
   121  	}
   122  	distance += bitsOff + 1
   123  
   124  	for wordOff > 0 {
   125  		wordOff--
   126  		testBits = v.bits[wordOff]
   127  		if testBits > 0 {
   128  			return distance + uint32(bits.LeadingZeros64(testBits))
   129  		}
   130  		distance += wordSize
   131  	}
   132  	return distance
   133  }
   134  
   135  type valueVector struct {
   136  	bytes     []byte
   137  	valueSize uint32
   138  }
   139  
   140  func (v *valueVector) Init(valuesPerLevel [][]byte, valueSize uint32) {
   141  	var size int
   142  	for l := range valuesPerLevel {
   143  		size += len(valuesPerLevel[l])
   144  	}
   145  	v.valueSize = valueSize
   146  	v.bytes = make([]byte, size)
   147  
   148  	var pos uint32
   149  	for _, val := range valuesPerLevel {
   150  		copy(v.bytes[pos:], val)
   151  		pos += uint32(len(val))
   152  	}
   153  }
   154  
   155  func (v *valueVector) Get(pos uint32) []byte {
   156  	off := pos * v.valueSize
   157  	return v.bytes[off : off+v.valueSize]
   158  }
   159  
   160  func (v *valueVector) MarshalSize() int64 {
   161  	return align(v.rawMarshalSize())
   162  }
   163  
   164  func (v *valueVector) rawMarshalSize() int64 {
   165  	return 8 + int64(len(v.bytes))
   166  }
   167  
   168  func (v *valueVector) WriteTo(w io.Writer) error {
   169  	var bs [4]byte
   170  	endian.PutUint32(bs[:], uint32(len(v.bytes)))
   171  	if _, err := w.Write(bs[:]); err != nil {
   172  		return err
   173  	}
   174  
   175  	endian.PutUint32(bs[:], v.valueSize)
   176  	if _, err := w.Write(bs[:]); err != nil {
   177  		return err
   178  	}
   179  
   180  	if _, err := w.Write(v.bytes); err != nil {
   181  		return err
   182  	}
   183  
   184  	var zeros [8]byte
   185  	padding := v.MarshalSize() - v.rawMarshalSize()
   186  	_, err := w.Write(zeros[:padding])
   187  	return err
   188  }
   189  
   190  func (v *valueVector) Unmarshal(buf []byte) []byte {
   191  	var cursor int64
   192  	sz := int64(endian.Uint32(buf))
   193  	cursor += 4
   194  
   195  	v.valueSize = endian.Uint32(buf[cursor:])
   196  	cursor += 4
   197  
   198  	v.bytes = buf[cursor : cursor+sz]
   199  	cursor = align(cursor + sz)
   200  
   201  	return buf[cursor:]
   202  }
   203  
   204  const selectSampleInterval = 64
   205  
   206  type selectVector struct {
   207  	bitVector
   208  	numOnes   uint32
   209  	selectLut []uint32
   210  }
   211  
   212  func (v *selectVector) Init(bitsPerLevel [][]uint64, numBitsPerLevel []uint32) *selectVector {
   213  	v.bitVector.Init(bitsPerLevel, numBitsPerLevel)
   214  	lut := []uint32{0}
   215  	sampledOnes := selectSampleInterval
   216  	onesUptoWord := 0
   217  	for i, w := range v.bits {
   218  		ones := bits.OnesCount64(w)
   219  		for sampledOnes <= onesUptoWord+ones {
   220  			diff := sampledOnes - onesUptoWord
   221  			targetPos := i*wordSize + int(select64(w, int64(diff)))
   222  			lut = append(lut, uint32(targetPos))
   223  			sampledOnes += selectSampleInterval
   224  		}
   225  		onesUptoWord += ones
   226  	}
   227  
   228  	v.numOnes = uint32(onesUptoWord)
   229  	v.selectLut = make([]uint32, len(lut))
   230  	for i := range v.selectLut {
   231  		v.selectLut[i] = lut[i]
   232  	}
   233  
   234  	return v
   235  }
   236  
   237  func (v *selectVector) lutSize() uint32 {
   238  	return (v.numOnes/selectSampleInterval + 1) * 4
   239  }
   240  
   241  // Select returns the postion of the rank-th 1 bit.
   242  // position is zero-based; rank is one-based.
   243  // E.g., for bitvector: 100101000, select(3) = 5
   244  func (v *selectVector) Select(rank uint32) uint32 {
   245  	lutIdx := rank / selectSampleInterval
   246  	rankLeft := rank % selectSampleInterval
   247  	if lutIdx == 0 {
   248  		rankLeft--
   249  	}
   250  
   251  	pos := v.selectLut[lutIdx]
   252  	if rankLeft == 0 {
   253  		return pos
   254  	}
   255  
   256  	wordOff := pos / wordSize
   257  	bitsOff := pos % wordSize
   258  	if bitsOff == wordSize-1 {
   259  		wordOff++
   260  		bitsOff = 0
   261  	} else {
   262  		bitsOff++
   263  	}
   264  
   265  	w := v.bits[wordOff] >> bitsOff << bitsOff
   266  	ones := uint32(bits.OnesCount64(w))
   267  	for ones < rankLeft {
   268  		wordOff++
   269  		w = v.bits[wordOff]
   270  		rankLeft -= ones
   271  		ones = uint32(bits.OnesCount64(w))
   272  	}
   273  
   274  	return wordOff*wordSize + uint32(select64(w, int64(rankLeft)))
   275  }
   276  
   277  func (v *selectVector) MarshalSize() int64 {
   278  	return align(v.rawMarshalSize())
   279  }
   280  
   281  func (v *selectVector) rawMarshalSize() int64 {
   282  	return 4 + 4 + int64(v.bitsSize()) + int64(v.lutSize())
   283  }
   284  
   285  func (v *selectVector) WriteTo(w io.Writer) error {
   286  	var buf [4]byte
   287  	endian.PutUint32(buf[:], v.numBits)
   288  	if _, err := w.Write(buf[:]); err != nil {
   289  		return err
   290  	}
   291  	endian.PutUint32(buf[:], v.numOnes)
   292  	if _, err := w.Write(buf[:]); err != nil {
   293  		return err
   294  	}
   295  	if _, err := w.Write(u64SliceToBytes(v.bits)); err != nil {
   296  		return err
   297  	}
   298  	if _, err := w.Write(u32SliceToBytes(v.selectLut)); err != nil {
   299  		return err
   300  	}
   301  
   302  	var zeros [8]byte
   303  	padding := v.MarshalSize() - v.rawMarshalSize()
   304  	_, err := w.Write(zeros[:padding])
   305  	return err
   306  }
   307  
   308  func (v *selectVector) Unmarshal(buf []byte) []byte {
   309  	var cursor int64
   310  	v.numBits = endian.Uint32(buf)
   311  	cursor += 4
   312  	v.numOnes = endian.Uint32(buf[cursor:])
   313  	cursor += 4
   314  
   315  	bitsSize := int64(v.bitsSize())
   316  	v.bits = bytesToU64Slice(buf[cursor : cursor+bitsSize])
   317  	cursor += bitsSize
   318  
   319  	lutSize := int64(v.lutSize())
   320  	v.selectLut = bytesToU32Slice(buf[cursor : cursor+lutSize])
   321  	cursor = align(cursor + lutSize)
   322  	return buf[cursor:]
   323  }
   324  
   325  const (
   326  	rankDenseBlockSize  = 64
   327  	rankSparseBlockSize = 512
   328  )
   329  
   330  type rankVector struct {
   331  	bitVector
   332  	blockSize uint32
   333  	rankLut   []uint32
   334  }
   335  
   336  func (v *rankVector) init(blockSize uint32, bitsPerLevel [][]uint64, numBitsPerLevel []uint32) *rankVector {
   337  	v.bitVector.Init(bitsPerLevel, numBitsPerLevel)
   338  	v.blockSize = blockSize
   339  	wordPerBlk := v.blockSize / wordSize
   340  	nblks := v.numBits/v.blockSize + 1
   341  	v.rankLut = make([]uint32, nblks)
   342  
   343  	var totalRank, i uint32
   344  	for i = 0; i < nblks-1; i++ {
   345  		v.rankLut[i] = totalRank
   346  		totalRank += popcountBlock(v.bits, i*wordPerBlk, v.blockSize)
   347  	}
   348  	v.rankLut[nblks-1] = totalRank
   349  	return v
   350  }
   351  
   352  func (v *rankVector) lutSize() uint32 {
   353  	return (v.numBits/v.blockSize + 1) * 4
   354  }
   355  
   356  func (v *rankVector) MarshalSize() int64 {
   357  	return align(v.rawMarshalSize())
   358  }
   359  
   360  func (v *rankVector) rawMarshalSize() int64 {
   361  	return 4 + 4 + int64(v.bitsSize()) + int64(v.lutSize())
   362  }
   363  
   364  func (v *rankVector) WriteTo(w io.Writer) error {
   365  	var buf [4]byte
   366  	endian.PutUint32(buf[:], v.numBits)
   367  	if _, err := w.Write(buf[:]); err != nil {
   368  		return err
   369  	}
   370  	endian.PutUint32(buf[:], v.blockSize)
   371  	if _, err := w.Write(buf[:]); err != nil {
   372  		return err
   373  	}
   374  	if _, err := w.Write(u64SliceToBytes(v.bits)); err != nil {
   375  		return err
   376  	}
   377  	if _, err := w.Write(u32SliceToBytes(v.rankLut)); err != nil {
   378  		return err
   379  	}
   380  
   381  	var zeros [8]byte
   382  	padding := v.MarshalSize() - v.rawMarshalSize()
   383  	_, err := w.Write(zeros[:padding])
   384  	return err
   385  }
   386  
   387  func (v *rankVector) Unmarshal(buf []byte) []byte {
   388  	var cursor int64
   389  	v.numBits = endian.Uint32(buf)
   390  	cursor += 4
   391  	v.blockSize = endian.Uint32(buf[cursor:])
   392  	cursor += 4
   393  
   394  	bitsSize := int64(v.bitsSize())
   395  	v.bits = bytesToU64Slice(buf[cursor : cursor+bitsSize])
   396  	cursor += bitsSize
   397  
   398  	lutSize := int64(v.lutSize())
   399  	v.rankLut = bytesToU32Slice(buf[cursor : cursor+lutSize])
   400  	cursor = align(cursor + lutSize)
   401  	return buf[cursor:]
   402  }
   403  
   404  type rankVectorDense struct {
   405  	rankVector
   406  }
   407  
   408  func (v *rankVectorDense) Init(bitsPerLevel [][]uint64, numBitsPerLevel []uint32) {
   409  	v.rankVector.init(rankDenseBlockSize, bitsPerLevel, numBitsPerLevel)
   410  }
   411  
   412  func (v *rankVectorDense) Rank(pos uint32) uint32 {
   413  	wordPreBlk := uint32(rankDenseBlockSize / wordSize)
   414  	blockOff := pos / rankDenseBlockSize
   415  	bitsOff := pos % rankDenseBlockSize
   416  
   417  	return v.rankLut[blockOff] + popcountBlock(v.bits, blockOff*wordPreBlk, bitsOff+1)
   418  }
   419  
   420  type rankVectorSparse struct {
   421  	rankVector
   422  }
   423  
   424  func (v *rankVectorSparse) Init(bitsPerLevel [][]uint64, numBitsPerLevel []uint32) {
   425  	v.rankVector.init(rankSparseBlockSize, bitsPerLevel, numBitsPerLevel)
   426  }
   427  
   428  func (v *rankVectorSparse) Rank(pos uint32) uint32 {
   429  	wordPreBlk := uint32(rankSparseBlockSize / wordSize)
   430  	blockOff := pos / rankSparseBlockSize
   431  	bitsOff := pos % rankSparseBlockSize
   432  
   433  	return v.rankLut[blockOff] + popcountBlock(v.bits, blockOff*wordPreBlk, bitsOff+1)
   434  }
   435  
   436  const labelTerminator = 0xff
   437  
   438  type labelVector struct {
   439  	labels []byte
   440  }
   441  
   442  func (v *labelVector) Init(labelsPerLevel [][]byte, startLevel, endLevel uint32) {
   443  	numBytes := 1
   444  	for l := startLevel; l < endLevel; l++ {
   445  		numBytes += len(labelsPerLevel[l])
   446  	}
   447  	v.labels = make([]byte, numBytes)
   448  
   449  	var pos uint32
   450  	for l := startLevel; l < endLevel; l++ {
   451  		copy(v.labels[pos:], labelsPerLevel[l])
   452  		pos += uint32(len(labelsPerLevel[l]))
   453  	}
   454  }
   455  
   456  func (v *labelVector) GetLabel(pos uint32) byte {
   457  	return v.labels[pos]
   458  }
   459  
   460  func (v *labelVector) Search(k byte, off, size uint32) (uint32, bool) {
   461  	start := off
   462  	if size > 1 && v.labels[start] == labelTerminator {
   463  		start++
   464  		size--
   465  	}
   466  
   467  	end := start + size
   468  	if end > uint32(len(v.labels)) {
   469  		end = uint32(len(v.labels))
   470  	}
   471  	result := bytes.IndexByte(v.labels[start:end], k)
   472  	if result < 0 {
   473  		return off, false
   474  	}
   475  	return start + uint32(result), true
   476  }
   477  
   478  func (v *labelVector) SearchGreaterThan(label byte, pos, size uint32) (uint32, bool) {
   479  	if size > 1 && v.labels[pos] == labelTerminator {
   480  		pos++
   481  		size--
   482  	}
   483  
   484  	result := sort.Search(int(size), func(i int) bool { return v.labels[pos+uint32(i)] > label })
   485  	if uint32(result) == size {
   486  		return pos + uint32(result) - 1, false
   487  	}
   488  	return pos + uint32(result), true
   489  }
   490  
   491  func (v *labelVector) MarshalSize() int64 {
   492  	return align(v.rawMarshalSize())
   493  }
   494  
   495  func (v *labelVector) rawMarshalSize() int64 {
   496  	return 4 + int64(len(v.labels))
   497  }
   498  
   499  func (v *labelVector) WriteTo(w io.Writer) error {
   500  	var bs [4]byte
   501  	endian.PutUint32(bs[:], uint32(len(v.labels)))
   502  	if _, err := w.Write(bs[:]); err != nil {
   503  		return err
   504  	}
   505  	if _, err := w.Write(v.labels); err != nil {
   506  		return err
   507  	}
   508  
   509  	padding := v.MarshalSize() - v.rawMarshalSize()
   510  	var zeros [8]byte
   511  	_, err := w.Write(zeros[:padding])
   512  	return err
   513  }
   514  
   515  func (v *labelVector) Unmarshal(buf []byte) []byte {
   516  	l := endian.Uint32(buf)
   517  	v.labels = buf[4 : 4+l]
   518  	return buf[align(int64(4+l)):]
   519  }
   520  
   521  const (
   522  	hashShift       = 7
   523  	couldBePositive = 2
   524  )
   525  
   526  // max(hashSuffixLen + realSuffixLen) = 64 bits
   527  // For real suffixes, if the stored key is not long enough to provide
   528  // realSuffixLen suffix bits, its suffix field is cleared (i.e., all 0's)
   529  // to indicate that there is no suffix info associated with the key.
   530  type suffixVector struct {
   531  	bitVector
   532  	hashSuffixLen uint32
   533  	realSuffixLen uint32
   534  }
   535  
   536  func (v *suffixVector) Init(hashLen, realLen uint32, bitsPerLevel [][]uint64, numBitsPerLevel []uint32) *suffixVector {
   537  	v.bitVector.Init(bitsPerLevel, numBitsPerLevel)
   538  	v.hashSuffixLen = hashLen
   539  	v.realSuffixLen = realLen
   540  	return v
   541  }
   542  
   543  func (v *suffixVector) CheckEquality(idx uint32, key []byte, level uint32) bool {
   544  	if !v.hasSuffix() {
   545  		return true
   546  	}
   547  	if idx*v.suffixLen() >= v.numBits {
   548  		return false
   549  	}
   550  
   551  	suffix := v.read(idx)
   552  	if v.isRealSuffix() {
   553  		if suffix == 0 {
   554  			return true
   555  		}
   556  		if uint32(len(key)) < level || (uint32(len(key))-level)*8 < v.realSuffixLen {
   557  			return false
   558  		}
   559  	}
   560  	expected := constructSuffix(key, level, v.realSuffixLen, v.hashSuffixLen)
   561  	return suffix == expected
   562  }
   563  
   564  func (v *suffixVector) Compare(key []byte, idx, level uint32) int {
   565  	if idx*v.suffixLen() >= v.numBits || v.realSuffixLen == 0 {
   566  		return couldBePositive
   567  	}
   568  
   569  	suffix := v.read(idx)
   570  	if v.isMixedSuffix() {
   571  		suffix = extractRealSuffix(suffix, v.realSuffixLen)
   572  	}
   573  	expected := constructRealSuffix(key, level, v.realSuffixLen)
   574  
   575  	if suffix == 0 || expected == 0 {
   576  		// Key length is not long enough to provide suffix, cannot determin which one is the larger one.
   577  		return couldBePositive
   578  	} else if suffix < expected {
   579  		return -1
   580  	} else if suffix == expected {
   581  		return couldBePositive
   582  	} else {
   583  		return 1
   584  	}
   585  }
   586  
   587  func (v *suffixVector) MarshalSize() int64 {
   588  	return align(v.rawMarshalSize())
   589  }
   590  
   591  func (v *suffixVector) rawMarshalSize() int64 {
   592  	return 4 + 4 + 4 + int64(v.bitsSize())
   593  }
   594  
   595  func (v *suffixVector) WriteTo(w io.Writer) error {
   596  	var buf [4]byte
   597  	endian.PutUint32(buf[:], v.numBits)
   598  	if _, err := w.Write(buf[:]); err != nil {
   599  		return err
   600  	}
   601  	endian.PutUint32(buf[:], v.hashSuffixLen)
   602  	if _, err := w.Write(buf[:]); err != nil {
   603  		return err
   604  	}
   605  	endian.PutUint32(buf[:], v.realSuffixLen)
   606  	if _, err := w.Write(buf[:]); err != nil {
   607  		return err
   608  	}
   609  	if _, err := w.Write(u64SliceToBytes(v.bits)); err != nil {
   610  		return err
   611  	}
   612  
   613  	padding := v.MarshalSize() - v.rawMarshalSize()
   614  	var zeros [8]byte
   615  	_, err := w.Write(zeros[:padding])
   616  	return err
   617  }
   618  
   619  func (v *suffixVector) Unmarshal(buf []byte) []byte {
   620  	var cursor int64
   621  	v.numBits = endian.Uint32(buf)
   622  	cursor += 4
   623  	v.hashSuffixLen = endian.Uint32(buf[cursor:])
   624  	cursor += 4
   625  	v.realSuffixLen = endian.Uint32(buf[cursor:])
   626  	cursor += 4
   627  	if v.hasSuffix() {
   628  		bitsSize := int64(v.bitsSize())
   629  		v.bits = bytesToU64Slice(buf[cursor : cursor+bitsSize])
   630  		cursor += bitsSize
   631  	}
   632  	cursor = align(cursor)
   633  	return buf[cursor:]
   634  }
   635  
   636  func (v *suffixVector) read(idx uint32) uint64 {
   637  	suffixLen := v.suffixLen()
   638  	bitPos := idx * suffixLen
   639  	wordOff := bitPos / wordSize
   640  	bitsOff := bitPos % wordSize
   641  	result := (v.bits[wordOff] >> bitsOff) & (1<<suffixLen - 1)
   642  	if bitsOff+suffixLen > wordSize {
   643  		leftLen := wordSize - bitsOff
   644  		rightLen := suffixLen - leftLen
   645  		result |= (v.bits[wordOff+1] & (1<<rightLen - 1)) << leftLen
   646  	}
   647  	return result
   648  }
   649  
   650  func (v *suffixVector) suffixLen() uint32 {
   651  	return v.hashSuffixLen + v.realSuffixLen
   652  }
   653  
   654  func (v *suffixVector) hasSuffix() bool {
   655  	return v.realSuffixLen != 0 || v.hashSuffixLen != 0
   656  }
   657  
   658  func (v *suffixVector) isHashSuffix() bool {
   659  	return v.realSuffixLen == 0 && v.hashSuffixLen != 0
   660  }
   661  
   662  func (v *suffixVector) isRealSuffix() bool {
   663  	return v.realSuffixLen != 0 && v.hashSuffixLen == 0
   664  }
   665  
   666  func (v *suffixVector) isMixedSuffix() bool {
   667  	return v.realSuffixLen != 0 && v.hashSuffixLen != 0
   668  }
   669  
   670  func constructSuffix(key []byte, level uint32, realSuffixLen, hashSuffixLen uint32) uint64 {
   671  	if hashSuffixLen == 0 && realSuffixLen == 0 {
   672  		return 0
   673  	}
   674  	if realSuffixLen == 0 {
   675  		return constructHashSuffix(key, hashSuffixLen)
   676  	}
   677  	if hashSuffixLen == 0 {
   678  		return constructRealSuffix(key, level, realSuffixLen)
   679  	}
   680  	return constructMixedSuffix(key, level, realSuffixLen, hashSuffixLen)
   681  }
   682  
   683  func constructHashSuffix(key []byte, hashSuffixLen uint32) uint64 {
   684  	fp := farm.Fingerprint64(key)
   685  	fp <<= wordSize - hashSuffixLen - hashShift
   686  	fp >>= wordSize - hashSuffixLen
   687  	return fp
   688  }
   689  
   690  func constructRealSuffix(key []byte, level, realSuffixLen uint32) uint64 {
   691  	klen := uint32(len(key))
   692  	if klen < level || (klen-level)*8 < realSuffixLen {
   693  		return 0
   694  	}
   695  
   696  	var suffix uint64
   697  	nbytes := realSuffixLen / 8
   698  	if nbytes > 0 {
   699  		suffix += uint64(key[level])
   700  		for i := 1; uint32(i) < nbytes; i++ {
   701  			suffix <<= 8
   702  			suffix += uint64(key[i])
   703  		}
   704  	}
   705  
   706  	off := realSuffixLen % 8
   707  	if off > 0 {
   708  		suffix <<= off
   709  		remain := uint64(key[level+nbytes])
   710  		remain >>= 8 - off
   711  		suffix += remain
   712  	}
   713  
   714  	return suffix
   715  }
   716  
   717  func constructMixedSuffix(key []byte, level, realSuffixLen, hashSuffixLen uint32) uint64 {
   718  	hs := constructHashSuffix(key, hashSuffixLen)
   719  	rs := constructRealSuffix(key, level, realSuffixLen)
   720  	return (hs << realSuffixLen) | rs
   721  }
   722  
   723  func extractRealSuffix(suffix uint64, suffixLen uint32) uint64 {
   724  	mask := (uint64(1) << suffixLen) - 1
   725  	return suffix & mask
   726  }
   727  
   728  type prefixVector struct {
   729  	hasPrefixVec  rankVectorSparse
   730  	prefixOffsets []uint32
   731  	prefixData    []byte
   732  }
   733  
   734  func (v *prefixVector) Init(hasPrefixBits [][]uint64, numNodesPerLevel []uint32, prefixes [][][]byte) {
   735  	v.hasPrefixVec.Init(hasPrefixBits, numNodesPerLevel)
   736  
   737  	var offset uint32
   738  	for _, level := range prefixes {
   739  		for _, prefix := range level {
   740  			v.prefixOffsets = append(v.prefixOffsets, offset)
   741  			offset += uint32(len(prefix))
   742  			v.prefixData = append(v.prefixData, prefix...)
   743  		}
   744  	}
   745  }
   746  
   747  func (v *prefixVector) CheckPrefix(key []byte, depth uint32, nodeID uint32) (uint32, bool) {
   748  	prefix := v.GetPrefix(nodeID)
   749  	if len(prefix) == 0 {
   750  		return 0, true
   751  	}
   752  
   753  	if int(depth)+len(prefix) > len(key) {
   754  		return 0, false
   755  	}
   756  	if !bytes.Equal(key[depth:depth+uint32(len(prefix))], prefix) {
   757  		return 0, false
   758  	}
   759  	return uint32(len(prefix)), true
   760  }
   761  
   762  func (v *prefixVector) GetPrefix(nodeID uint32) []byte {
   763  	if !v.hasPrefixVec.IsSet(nodeID) {
   764  		return nil
   765  	}
   766  
   767  	prefixID := v.hasPrefixVec.Rank(nodeID) - 1
   768  	start := v.prefixOffsets[prefixID]
   769  	end := uint32(len(v.prefixData))
   770  	if int(prefixID+1) < len(v.prefixOffsets) {
   771  		end = v.prefixOffsets[prefixID+1]
   772  	}
   773  	return v.prefixData[start:end]
   774  }
   775  
   776  func (v *prefixVector) WriteTo(w io.Writer) error {
   777  	if err := v.hasPrefixVec.WriteTo(w); err != nil {
   778  		return err
   779  	}
   780  
   781  	var length [8]byte
   782  	endian.PutUint32(length[:4], uint32(len(v.prefixOffsets)*4))
   783  	endian.PutUint32(length[4:], uint32(len(v.prefixData)))
   784  
   785  	if _, err := w.Write(length[:]); err != nil {
   786  		return err
   787  	}
   788  	if _, err := w.Write(u32SliceToBytes(v.prefixOffsets)); err != nil {
   789  		return err
   790  	}
   791  	if _, err := w.Write(v.prefixData); err != nil {
   792  		return err
   793  	}
   794  
   795  	padding := v.MarshalSize() - v.rawMarshalSize()
   796  	var zeros [8]byte
   797  	_, err := w.Write(zeros[:padding])
   798  	return err
   799  }
   800  
   801  func (v *prefixVector) Unmarshal(b []byte) []byte {
   802  	buf1 := v.hasPrefixVec.Unmarshal(b)
   803  	var cursor int64
   804  	offsetsLen := int64(endian.Uint32(buf1[cursor:]))
   805  	cursor += 4
   806  	dataLen := int64(endian.Uint32(buf1[cursor:]))
   807  	cursor += 4
   808  
   809  	v.prefixOffsets = bytesToU32Slice(buf1[cursor : cursor+offsetsLen])
   810  	cursor += offsetsLen
   811  	v.prefixData = buf1[cursor : cursor+dataLen]
   812  
   813  	return b[v.MarshalSize():]
   814  }
   815  
   816  func (v *prefixVector) rawMarshalSize() int64 {
   817  	return v.hasPrefixVec.MarshalSize() + 8 + int64(len(v.prefixOffsets)*4+len(v.prefixData))
   818  }
   819  
   820  func (v *prefixVector) MarshalSize() int64 {
   821  	return align(v.rawMarshalSize())
   822  }