github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/surf/louds_dense.go (about)

     1  package surf
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  )
     7  
     8  const (
     9  	denseFanout      = 256
    10  	denseRankBlkSize = 512
    11  )
    12  
    13  type loudsDense struct {
    14  	labelVec    rankVectorDense
    15  	hasChildVec rankVectorDense
    16  	isPrefixVec rankVectorDense
    17  	suffixes    suffixVector
    18  	values      valueVector
    19  	prefixVec   prefixVector
    20  
    21  	// height is dense end level.
    22  	height uint32
    23  }
    24  
    25  func (ld *loudsDense) Init(builder *Builder) *loudsDense {
    26  	ld.height = builder.sparseStartLevel
    27  
    28  	numBitsPerLevel := make([]uint32, ld.height)
    29  	for level := range numBitsPerLevel {
    30  		numBitsPerLevel[level] = uint32(len(builder.ldLabels[level]) * wordSize)
    31  	}
    32  
    33  	ld.labelVec.Init(builder.ldLabels[:ld.height], numBitsPerLevel)
    34  	ld.hasChildVec.Init(builder.ldHasChild[:ld.height], numBitsPerLevel)
    35  	ld.isPrefixVec.Init(builder.ldIsPrefix[:ld.height], builder.nodeCounts)
    36  
    37  	if builder.suffixLen() != 0 {
    38  		hashLen := builder.hashSuffixLen
    39  		realLen := builder.realSuffixLen
    40  		suffixLen := hashLen + realLen
    41  		numSuffixBitsPerLevel := make([]uint32, ld.height)
    42  		for i := range numSuffixBitsPerLevel {
    43  			numSuffixBitsPerLevel[i] = builder.suffixCounts[i] * suffixLen
    44  		}
    45  		ld.suffixes.Init(hashLen, realLen, builder.suffixes[:ld.height], numSuffixBitsPerLevel)
    46  	}
    47  
    48  	ld.values.Init(builder.values[:ld.height], builder.valueSize)
    49  	ld.prefixVec.Init(builder.hasPrefix[:ld.height], builder.nodeCounts[:ld.height], builder.prefixes[:ld.height])
    50  
    51  	return ld
    52  }
    53  
    54  func (ld *loudsDense) Get(key []byte) (sparseNode int64, depth uint32, value []byte, ok bool) {
    55  	var nodeID, pos uint32
    56  	for level := uint32(0); level < ld.height; level++ {
    57  		prefixLen, ok := ld.prefixVec.CheckPrefix(key, depth, nodeID)
    58  		if !ok {
    59  			return -1, depth, nil, false
    60  		}
    61  		depth += prefixLen
    62  
    63  		pos = nodeID * denseFanout
    64  		if depth >= uint32(len(key)) {
    65  			if ok = ld.isPrefixVec.IsSet(nodeID); ok {
    66  				valPos := ld.suffixPos(pos, true)
    67  				if ok = ld.suffixes.CheckEquality(valPos, key, depth+1); ok {
    68  					value = ld.values.Get(valPos)
    69  				}
    70  			}
    71  			return -1, depth, value, ok
    72  		}
    73  		pos += uint32(key[depth])
    74  
    75  		if !ld.labelVec.IsSet(pos) {
    76  			return -1, depth, nil, false
    77  		}
    78  
    79  		if !ld.hasChildVec.IsSet(pos) {
    80  			valPos := ld.suffixPos(pos, false)
    81  			if ok = ld.suffixes.CheckEquality(valPos, key, depth+1); ok {
    82  				value = ld.values.Get(valPos)
    83  			}
    84  			return -1, depth, value, ok
    85  		}
    86  
    87  		nodeID = ld.childNodeID(pos)
    88  		depth++
    89  	}
    90  
    91  	return int64(nodeID), depth, nil, true
    92  }
    93  
    94  func (ld *loudsDense) MarshalSize() int64 {
    95  	return align(ld.rawMarshalSize())
    96  }
    97  
    98  func (ld *loudsDense) rawMarshalSize() int64 {
    99  	return 4 + ld.labelVec.MarshalSize() + ld.hasChildVec.MarshalSize() + ld.isPrefixVec.MarshalSize() + ld.suffixes.MarshalSize() + ld.prefixVec.MarshalSize()
   100  }
   101  
   102  func (ld *loudsDense) WriteTo(w io.Writer) error {
   103  	var bs [4]byte
   104  	endian.PutUint32(bs[:], ld.height)
   105  
   106  	if _, err := w.Write(bs[:]); err != nil {
   107  		return err
   108  	}
   109  	if err := ld.labelVec.WriteTo(w); err != nil {
   110  		return err
   111  	}
   112  	if err := ld.hasChildVec.WriteTo(w); err != nil {
   113  		return err
   114  	}
   115  	if err := ld.isPrefixVec.WriteTo(w); err != nil {
   116  		return err
   117  	}
   118  	if err := ld.suffixes.WriteTo(w); err != nil {
   119  		return err
   120  	}
   121  	if err := ld.prefixVec.WriteTo(w); err != nil {
   122  		return err
   123  	}
   124  
   125  	padding := ld.MarshalSize() - ld.rawMarshalSize()
   126  	var zeros [8]byte
   127  	_, err := w.Write(zeros[:padding])
   128  	return err
   129  }
   130  
   131  func (ld *loudsDense) Unmarshal(buf []byte) []byte {
   132  	ld.height = endian.Uint32(buf)
   133  	buf1 := buf[4:]
   134  	buf1 = ld.labelVec.Unmarshal(buf1)
   135  	buf1 = ld.hasChildVec.Unmarshal(buf1)
   136  	buf1 = ld.isPrefixVec.Unmarshal(buf1)
   137  	buf1 = ld.suffixes.Unmarshal(buf1)
   138  	buf1 = ld.prefixVec.Unmarshal(buf1)
   139  
   140  	sz := align(int64(len(buf) - len(buf1)))
   141  	return buf[sz:]
   142  }
   143  
   144  func (ld *loudsDense) childNodeID(pos uint32) uint32 {
   145  	return ld.hasChildVec.Rank(pos)
   146  }
   147  
   148  func (ld *loudsDense) suffixPos(pos uint32, isPrefix bool) uint32 {
   149  	nodeID := pos / denseFanout
   150  	suffixPos := ld.labelVec.Rank(pos) - ld.hasChildVec.Rank(pos) + ld.isPrefixVec.Rank(nodeID) - 1
   151  
   152  	// Correct off by one error when current have a leaf node at label 0.
   153  	// Otherwise suffixPos will point to that leaf node's suffix.
   154  	if isPrefix && ld.labelVec.IsSet(pos) && !ld.hasChildVec.IsSet(pos) {
   155  		suffixPos--
   156  	}
   157  	return suffixPos
   158  }
   159  
   160  func (ld *loudsDense) nextPos(pos uint32) uint32 {
   161  	return pos + ld.labelVec.DistanceToNextSetBit(pos)
   162  }
   163  
   164  func (ld *loudsDense) prevPos(pos uint32) (uint32, bool) {
   165  	dist := ld.labelVec.DistanceToPrevSetBit(pos)
   166  	if pos < dist {
   167  		return 0, true
   168  	}
   169  	return pos - dist, false
   170  }
   171  
   172  type denseIter struct {
   173  	valid         bool
   174  	searchComp    bool
   175  	leftComp      bool
   176  	rightComp     bool
   177  	ld            *loudsDense
   178  	sendOutNodeID uint32
   179  	sendOutDepth  uint32
   180  	keyBuf        []byte
   181  	level         uint32
   182  	posInTrie     []uint32
   183  	prefixLen     []uint32
   184  	atPrefixKey   bool
   185  }
   186  
   187  func (it *denseIter) Init(ld *loudsDense) {
   188  	it.ld = ld
   189  	it.posInTrie = make([]uint32, ld.height)
   190  	it.prefixLen = make([]uint32, ld.height)
   191  }
   192  
   193  func (it *denseIter) Reset() {
   194  	it.valid = false
   195  	it.level = 0
   196  	it.atPrefixKey = false
   197  	it.keyBuf = it.keyBuf[:0]
   198  }
   199  
   200  func (it *denseIter) Next() {
   201  	if it.ld.height == 0 {
   202  		return
   203  	}
   204  	if it.atPrefixKey {
   205  		it.atPrefixKey = false
   206  		it.MoveToLeftMostKey()
   207  		return
   208  	}
   209  
   210  	pos := it.posInTrie[it.level]
   211  	nextPos := it.ld.nextPos(pos)
   212  
   213  	for pos == nextPos || nextPos/denseFanout > pos/denseFanout {
   214  		if it.level == 0 {
   215  			it.valid = false
   216  			return
   217  		}
   218  		it.level--
   219  		pos = it.posInTrie[it.level]
   220  		nextPos = it.ld.nextPos(pos)
   221  	}
   222  	it.setAt(it.level, nextPos)
   223  	it.MoveToLeftMostKey()
   224  }
   225  
   226  func (it *denseIter) Prev() {
   227  	if it.ld.height == 0 {
   228  		return
   229  	}
   230  	if it.atPrefixKey {
   231  		it.atPrefixKey = false
   232  		it.level--
   233  	}
   234  	pos := it.posInTrie[it.level]
   235  	prevPos, out := it.ld.prevPos(pos)
   236  	if out {
   237  		it.valid = false
   238  		return
   239  	}
   240  
   241  	for prevPos/denseFanout < pos/denseFanout {
   242  		nodeID := pos / denseFanout
   243  		if it.ld.isPrefixVec.IsSet(nodeID) {
   244  			it.truncate(it.level)
   245  			it.atPrefixKey = true
   246  			it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   247  			return
   248  		}
   249  
   250  		if it.level == 0 {
   251  			it.valid = false
   252  			return
   253  		}
   254  		it.level--
   255  		pos = it.posInTrie[it.level]
   256  		prevPos, out = it.ld.prevPos(pos)
   257  		if out {
   258  			it.valid = false
   259  			return
   260  		}
   261  	}
   262  	it.setAt(it.level, prevPos)
   263  	it.MoveToRightMostKey()
   264  }
   265  
   266  func (it *denseIter) Seek(key []byte) bool {
   267  	var nodeID, pos, depth uint32
   268  	for it.level = 0; it.level < it.ld.height; it.level++ {
   269  		prefix := it.ld.prefixVec.GetPrefix(nodeID)
   270  		var prefixCmp int
   271  		if len(prefix) != 0 {
   272  			end := int(depth) + len(prefix)
   273  			if end > len(key) {
   274  				end = len(key)
   275  			}
   276  			prefixCmp = bytes.Compare(prefix, key[depth:end])
   277  		}
   278  
   279  		if prefixCmp < 0 {
   280  			if nodeID == 0 {
   281  				it.valid = false
   282  				return false
   283  			}
   284  			it.level--
   285  			it.Next()
   286  			return false
   287  		}
   288  
   289  		pos = nodeID * denseFanout
   290  		depth += uint32(len(prefix))
   291  		if depth >= uint32(len(key)) || prefixCmp > 0 {
   292  			if pos > 0 {
   293  				it.append(it.ld.nextPos(pos - 1))
   294  			} else {
   295  				it.SetToFirstInRoot()
   296  			}
   297  			if it.ld.isPrefixVec.IsSet(nodeID) {
   298  				it.atPrefixKey = true
   299  				it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   300  			} else {
   301  				it.MoveToLeftMostKey()
   302  			}
   303  			return prefixCmp == 0
   304  		}
   305  
   306  		pos += uint32(key[depth])
   307  		it.append(pos)
   308  		depth++
   309  
   310  		if !it.ld.labelVec.IsSet(pos) {
   311  			it.Next()
   312  			return false
   313  		}
   314  
   315  		if !it.ld.hasChildVec.IsSet(pos) {
   316  			return it.compareSuffixGreaterThan(key, pos, depth)
   317  		}
   318  
   319  		nodeID = it.ld.childNodeID(pos)
   320  	}
   321  
   322  	it.level--
   323  	it.sendOutNodeID = nodeID
   324  	it.sendOutDepth = depth
   325  	it.valid, it.searchComp, it.leftComp, it.rightComp = true, false, true, true
   326  	return true
   327  }
   328  
   329  func (it *denseIter) Key() []byte {
   330  	if it.atPrefixKey {
   331  		return it.keyBuf[:len(it.keyBuf)-1]
   332  	}
   333  	return it.keyBuf
   334  }
   335  
   336  func (it *denseIter) Value() []byte {
   337  	valPos := it.ld.suffixPos(it.posInTrie[it.level], it.atPrefixKey)
   338  	return it.ld.values.Get(valPos)
   339  }
   340  
   341  func (it *denseIter) Compare(key []byte) int {
   342  	itKey := it.Key()
   343  
   344  	cmpLen := len(itKey)
   345  	if cmpLen > len(key) {
   346  		cmpLen = len(key)
   347  	}
   348  	cmp := bytes.Compare(itKey[:cmpLen], key[:cmpLen])
   349  	if cmp != 0 {
   350  		return cmp
   351  	}
   352  	if len(itKey) > len(key) {
   353  		return 1
   354  	}
   355  	if len(itKey) == len(key) && it.atPrefixKey {
   356  		return 0
   357  	}
   358  
   359  	if it.IsComplete() {
   360  		suffixPos := it.ld.suffixPos(it.posInTrie[it.level], it.atPrefixKey)
   361  		return it.ld.suffixes.Compare(key, suffixPos, uint32(len(itKey)))
   362  	}
   363  	return cmp
   364  }
   365  
   366  func (it *denseIter) IsComplete() bool {
   367  	return it.searchComp && (it.leftComp && it.rightComp)
   368  }
   369  
   370  func (it *denseIter) append(pos uint32) {
   371  	nodeID := pos / denseFanout
   372  	prefix := it.ld.prefixVec.GetPrefix(nodeID)
   373  	it.keyBuf = append(it.keyBuf, prefix...)
   374  	it.keyBuf = append(it.keyBuf, byte(pos%denseFanout))
   375  	it.posInTrie[it.level] = pos
   376  	it.prefixLen[it.level] = uint32(len(prefix)) + 1
   377  	if it.level != 0 {
   378  		it.prefixLen[it.level] += it.prefixLen[it.level-1]
   379  	}
   380  }
   381  
   382  func (it *denseIter) MoveToLeftMostKey() {
   383  	pos := it.posInTrie[it.level]
   384  	if !it.ld.hasChildVec.IsSet(pos) {
   385  		it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   386  		return
   387  	}
   388  
   389  	for it.level < it.ld.height-1 {
   390  		it.level++
   391  		nodeID := it.ld.childNodeID(pos)
   392  		if it.ld.isPrefixVec.IsSet(nodeID) {
   393  			it.append(it.ld.nextPos(nodeID*denseFanout - 1))
   394  			it.atPrefixKey = true
   395  			it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   396  			return
   397  		}
   398  
   399  		pos = it.ld.nextPos(nodeID*denseFanout - 1)
   400  		it.append(pos)
   401  
   402  		// If trie branch terminates
   403  		if !it.ld.hasChildVec.IsSet(pos) {
   404  			it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   405  			return
   406  		}
   407  	}
   408  	it.sendOutNodeID = it.ld.childNodeID(pos)
   409  	it.sendOutDepth = uint32(len(it.keyBuf))
   410  	it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, false, true
   411  }
   412  
   413  func (it *denseIter) MoveToRightMostKey() {
   414  	pos := it.posInTrie[it.level]
   415  	if !it.ld.hasChildVec.IsSet(pos) {
   416  		it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   417  		return
   418  	}
   419  
   420  	var out bool
   421  	for it.level < it.ld.height-1 {
   422  		it.level++
   423  		nodeID := it.ld.childNodeID(pos)
   424  		pos, out = it.ld.prevPos((nodeID + 1) * denseFanout)
   425  		if out {
   426  			it.valid = false
   427  			return
   428  		}
   429  		it.append(pos)
   430  
   431  		// If trie branch terminates
   432  		if !it.ld.hasChildVec.IsSet(pos) {
   433  			it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   434  			return
   435  		}
   436  	}
   437  	it.sendOutNodeID = it.ld.childNodeID(pos)
   438  	it.sendOutDepth = uint32(len(it.keyBuf))
   439  	it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, false
   440  }
   441  
   442  func (it *denseIter) SetToFirstInRoot() {
   443  	if it.ld.labelVec.IsSet(0) {
   444  		it.append(0)
   445  	} else {
   446  		it.append(it.ld.nextPos(0))
   447  	}
   448  }
   449  
   450  func (it *denseIter) SetToLastInRoot() {
   451  	pos, _ := it.ld.prevPos(denseFanout)
   452  	it.append(pos)
   453  }
   454  
   455  func (it *denseIter) setAt(level, pos uint32) {
   456  	it.keyBuf = append(it.keyBuf[:it.prefixLen[level]-1], byte(pos%denseFanout))
   457  	it.posInTrie[it.level] = pos
   458  }
   459  
   460  func (it *denseIter) truncate(level uint32) {
   461  	it.keyBuf = it.keyBuf[:it.prefixLen[level]]
   462  }
   463  
   464  func (it *denseIter) compareSuffixGreaterThan(key []byte, pos, level uint32) bool {
   465  	cmp := it.ld.suffixes.Compare(key, it.ld.suffixPos(pos, false), level)
   466  	if cmp < 0 {
   467  		it.Next()
   468  		return false
   469  	}
   470  	it.valid, it.searchComp, it.leftComp, it.rightComp = true, true, true, true
   471  	return cmp == couldBePositive
   472  }