github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/surf/louds_sparse.go (about)

     1  package surf
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  )
     7  
     8  type loudsSparse struct {
     9  	height          uint32
    10  	startLevel      uint32
    11  	denseNodeCount  uint32
    12  	denseChildCount uint32
    13  
    14  	labelVec    labelVector
    15  	hasChildVec rankVectorSparse
    16  	loudsVec    selectVector
    17  	suffixes    suffixVector
    18  	values      valueVector
    19  	prefixVec   prefixVector
    20  }
    21  
    22  func (ls *loudsSparse) Init(builder *Builder) *loudsSparse {
    23  	ls.height = uint32(len(builder.lsLabels))
    24  	ls.startLevel = builder.sparseStartLevel
    25  
    26  	for l := 0; uint32(l) < ls.startLevel; l++ {
    27  		ls.denseNodeCount += builder.nodeCounts[l]
    28  	}
    29  
    30  	if ls.startLevel != 0 {
    31  		ls.denseChildCount = ls.denseNodeCount + builder.nodeCounts[ls.startLevel] - 1
    32  	}
    33  
    34  	ls.labelVec.Init(builder.lsLabels, ls.startLevel, ls.height)
    35  
    36  	numItemsPerLevel := make([]uint32, ls.sparseLevels())
    37  	for level := range numItemsPerLevel {
    38  		numItemsPerLevel[level] = uint32(len(builder.lsLabels[int(ls.startLevel)+level]))
    39  	}
    40  	ls.hasChildVec.Init(builder.lsHasChild[ls.startLevel:], numItemsPerLevel)
    41  	ls.loudsVec.Init(builder.lsLoudsBits[ls.startLevel:], numItemsPerLevel)
    42  
    43  	if builder.suffixLen() != 0 {
    44  		hashLen := builder.hashSuffixLen
    45  		realLen := builder.realSuffixLen
    46  		suffixLen := hashLen + realLen
    47  		numSuffixBitsPerLevel := make([]uint32, ls.sparseLevels())
    48  		for i := range numSuffixBitsPerLevel {
    49  			numSuffixBitsPerLevel[i] = builder.suffixCounts[int(ls.startLevel)+i] * suffixLen
    50  		}
    51  		ls.suffixes.Init(hashLen, realLen, builder.suffixes[ls.startLevel:], numSuffixBitsPerLevel)
    52  	}
    53  
    54  	ls.values.Init(builder.values[ls.startLevel:], builder.valueSize)
    55  	ls.prefixVec.Init(builder.hasPrefix[ls.startLevel:], builder.nodeCounts[ls.startLevel:], builder.prefixes[ls.startLevel:])
    56  
    57  	return ls
    58  }
    59  
    60  func (ls *loudsSparse) Get(key []byte, startDepth, nodeID uint32) (value []byte, ok bool) {
    61  	var (
    62  		pos       = ls.firstLabelPos(nodeID)
    63  		depth     uint32
    64  		prefixLen uint32
    65  	)
    66  	for depth = startDepth; depth < uint32(len(key)); depth++ {
    67  		prefixLen, ok = ls.prefixVec.CheckPrefix(key, depth, ls.prefixID(nodeID))
    68  		if !ok {
    69  			return nil, false
    70  		}
    71  		depth += prefixLen
    72  
    73  		if depth >= uint32(len(key)) {
    74  			break
    75  		}
    76  
    77  		if pos, ok = ls.labelVec.Search(key[depth], pos, ls.nodeSize(pos)); !ok {
    78  			return nil, false
    79  		}
    80  
    81  		if !ls.hasChildVec.IsSet(pos) {
    82  			valPos := ls.suffixPos(pos)
    83  			if ok = ls.suffixes.CheckEquality(valPos, key, depth+1); ok {
    84  				value = ls.values.Get(valPos)
    85  			}
    86  			return value, ok
    87  		}
    88  
    89  		nodeID = ls.childNodeID(pos)
    90  		pos = ls.firstLabelPos(nodeID)
    91  	}
    92  
    93  	if ls.labelVec.GetLabel(pos) == labelTerminator && !ls.hasChildVec.IsSet(pos) {
    94  		valPos := ls.suffixPos(pos)
    95  		if ok = ls.suffixes.CheckEquality(valPos, key, depth+1); ok {
    96  			value = ls.values.Get(valPos)
    97  		}
    98  		return value, ok
    99  	}
   100  
   101  	return nil, false
   102  }
   103  
   104  func (ls *loudsSparse) MarshalSize() int64 {
   105  	return align(ls.rawMarshalSize())
   106  }
   107  
   108  func (ls *loudsSparse) rawMarshalSize() int64 {
   109  	return 4*4 + ls.labelVec.MarshalSize() + ls.hasChildVec.MarshalSize() + ls.loudsVec.MarshalSize() +
   110  		ls.suffixes.MarshalSize() + ls.prefixVec.MarshalSize()
   111  }
   112  
   113  func (ls *loudsSparse) WriteTo(w io.Writer) error {
   114  	var bs [4]byte
   115  	endian.PutUint32(bs[:], ls.height)
   116  	if _, err := w.Write(bs[:]); err != nil {
   117  		return err
   118  	}
   119  	endian.PutUint32(bs[:], ls.startLevel)
   120  	if _, err := w.Write(bs[:]); err != nil {
   121  		return err
   122  	}
   123  	endian.PutUint32(bs[:], ls.denseNodeCount)
   124  	if _, err := w.Write(bs[:]); err != nil {
   125  		return err
   126  	}
   127  	endian.PutUint32(bs[:], ls.denseChildCount)
   128  	if _, err := w.Write(bs[:]); err != nil {
   129  		return err
   130  	}
   131  	if err := ls.labelVec.WriteTo(w); err != nil {
   132  		return err
   133  	}
   134  	if err := ls.hasChildVec.WriteTo(w); err != nil {
   135  		return err
   136  	}
   137  	if err := ls.loudsVec.WriteTo(w); err != nil {
   138  		return err
   139  	}
   140  	if err := ls.suffixes.WriteTo(w); err != nil {
   141  		return err
   142  	}
   143  	if err := ls.prefixVec.WriteTo(w); err != nil {
   144  		return err
   145  	}
   146  
   147  	padding := ls.MarshalSize() - ls.rawMarshalSize()
   148  	var zeros [8]byte
   149  	_, err := w.Write(zeros[:padding])
   150  	return err
   151  }
   152  
   153  func (ls *loudsSparse) Unmarshal(buf []byte) []byte {
   154  	buf1 := buf
   155  	ls.height = endian.Uint32(buf1)
   156  	buf1 = buf1[4:]
   157  	ls.startLevel = endian.Uint32(buf1)
   158  	buf1 = buf1[4:]
   159  	ls.denseNodeCount = endian.Uint32(buf1)
   160  	buf1 = buf1[4:]
   161  	ls.denseChildCount = endian.Uint32(buf1)
   162  	buf1 = buf1[4:]
   163  
   164  	buf1 = ls.labelVec.Unmarshal(buf1)
   165  	buf1 = ls.hasChildVec.Unmarshal(buf1)
   166  	buf1 = ls.loudsVec.Unmarshal(buf1)
   167  	buf1 = ls.suffixes.Unmarshal(buf1)
   168  	buf1 = ls.prefixVec.Unmarshal(buf1)
   169  
   170  	sz := align(int64(len(buf) - len(buf1)))
   171  	return buf[sz:]
   172  }
   173  
   174  func (ls *loudsSparse) suffixPos(pos uint32) uint32 {
   175  	return pos - ls.hasChildVec.Rank(pos)
   176  }
   177  
   178  func (ls *loudsSparse) firstLabelPos(nodeID uint32) uint32 {
   179  	return ls.loudsVec.Select(nodeID + 1 - ls.denseNodeCount)
   180  }
   181  
   182  func (ls *loudsSparse) sparseLevels() uint32 {
   183  	return ls.height - ls.startLevel
   184  }
   185  func (ls *loudsSparse) prefixID(nodeID uint32) uint32 {
   186  	return nodeID - ls.denseNodeCount
   187  }
   188  
   189  func (ls *loudsSparse) lastLabelPos(nodeID uint32) uint32 {
   190  	nextRank := nodeID + 2 - ls.denseNodeCount
   191  	if nextRank > ls.loudsVec.numOnes {
   192  		return ls.loudsVec.numBits - 1
   193  	}
   194  	return ls.loudsVec.Select(nextRank) - 1
   195  }
   196  
   197  func (ls *loudsSparse) childNodeID(pos uint32) uint32 {
   198  	return ls.hasChildVec.Rank(pos) + ls.denseChildCount
   199  }
   200  
   201  func (ls *loudsSparse) nodeSize(pos uint32) uint32 {
   202  	return ls.loudsVec.DistanceToNextSetBit(pos)
   203  }
   204  
   205  func (ls *loudsSparse) isEndOfNode(pos uint32) bool {
   206  	return pos == ls.loudsVec.numBits-1 || ls.loudsVec.IsSet(pos+1)
   207  }
   208  
   209  type sparseIter struct {
   210  	valid        bool
   211  	atTerminator bool
   212  	ls           *loudsSparse
   213  	startLevel   uint32
   214  	startNodeID  uint32
   215  	startDepth   uint32
   216  	level        uint32
   217  	keyBuf       []byte
   218  	posInTrie    []uint32
   219  	nodeID       []uint32
   220  	prefixLen    []uint32
   221  }
   222  
   223  func (it *sparseIter) Init(ls *loudsSparse) {
   224  	it.ls = ls
   225  	it.startLevel = ls.startLevel
   226  	it.posInTrie = make([]uint32, ls.height-ls.startLevel)
   227  	it.prefixLen = make([]uint32, ls.height-ls.startLevel)
   228  	it.nodeID = make([]uint32, ls.height-ls.startLevel)
   229  }
   230  
   231  func (it *sparseIter) Next() {
   232  	it.atTerminator = false
   233  	pos := it.posInTrie[it.level] + 1
   234  	nodeID := it.nodeID[it.level]
   235  
   236  	for pos >= it.ls.loudsVec.numBits || it.ls.loudsVec.IsSet(pos) {
   237  		if it.level == 0 {
   238  			it.valid = false
   239  			it.keyBuf = it.keyBuf[:0]
   240  			return
   241  		}
   242  		it.level--
   243  		pos = it.posInTrie[it.level] + 1
   244  		nodeID = it.nodeID[it.level]
   245  	}
   246  	it.setAt(it.level, pos, nodeID)
   247  	it.MoveToLeftMostKey()
   248  }
   249  
   250  func (it *sparseIter) Prev() {
   251  	it.atTerminator = false
   252  	pos := it.posInTrie[it.level]
   253  	nodeID := it.nodeID[it.level]
   254  
   255  	if pos == 0 {
   256  		it.valid = false
   257  		return
   258  	}
   259  	for it.ls.loudsVec.IsSet(pos) {
   260  		if it.level == 0 {
   261  			it.valid = false
   262  			it.keyBuf = it.keyBuf[:0]
   263  			return
   264  		}
   265  		it.level--
   266  		pos = it.posInTrie[it.level]
   267  		nodeID = it.nodeID[it.level]
   268  	}
   269  	it.setAt(it.level, pos-1, nodeID)
   270  	it.MoveToRightMostKey()
   271  }
   272  
   273  func (it *sparseIter) Seek(key []byte) bool {
   274  	nodeID := it.startNodeID
   275  	pos := it.ls.firstLabelPos(nodeID)
   276  	var ok bool
   277  	depth := it.startDepth
   278  
   279  	for it.level = 0; it.level < it.ls.sparseLevels(); it.level++ {
   280  		prefix := it.ls.prefixVec.GetPrefix(it.ls.prefixID(nodeID))
   281  		var prefixCmp int
   282  		if len(prefix) != 0 {
   283  			end := int(depth) + len(prefix)
   284  			if end > len(key) {
   285  				end = len(key)
   286  			}
   287  			prefixCmp = bytes.Compare(prefix, key[depth:end])
   288  		}
   289  
   290  		if prefixCmp < 0 {
   291  			if it.level == 0 {
   292  				it.valid = false
   293  				return false
   294  			}
   295  			it.level--
   296  			it.Next()
   297  			return false
   298  		}
   299  
   300  		depth += uint32(len(prefix))
   301  		if depth >= uint32(len(key)) || prefixCmp > 0 {
   302  			it.append(it.ls.labelVec.GetLabel(pos), pos, nodeID)
   303  			it.MoveToLeftMostKey()
   304  			return false
   305  		}
   306  
   307  		nodeSize := it.ls.nodeSize(pos)
   308  		pos, ok = it.ls.labelVec.Search(key[depth], pos, nodeSize)
   309  		if !ok {
   310  			it.moveToLeftInNextSubTrie(pos, nodeID, nodeSize, key[depth])
   311  			return false
   312  		}
   313  
   314  		it.append(key[depth], pos, nodeID)
   315  
   316  		if !it.ls.hasChildVec.IsSet(pos) {
   317  			return it.compareSuffixGreaterThan(key, pos, depth+1)
   318  		}
   319  
   320  		nodeID = it.ls.childNodeID(pos)
   321  		pos = it.ls.firstLabelPos(nodeID)
   322  		depth++
   323  	}
   324  
   325  	if it.ls.labelVec.GetLabel(pos) == labelTerminator && !it.ls.hasChildVec.IsSet(pos) && !it.ls.isEndOfNode(pos) {
   326  		it.append(labelTerminator, pos, nodeID)
   327  		it.atTerminator = true
   328  		it.valid = true
   329  		return false
   330  	}
   331  
   332  	if uint32(len(key)) <= depth {
   333  		it.MoveToLeftMostKey()
   334  		return false
   335  	}
   336  
   337  	it.valid = true
   338  	return true
   339  }
   340  
   341  func (it *sparseIter) Key() []byte {
   342  	if it.atTerminator {
   343  		return it.keyBuf[:len(it.keyBuf)-1]
   344  	}
   345  	return it.keyBuf
   346  }
   347  
   348  func (it *sparseIter) Value() []byte {
   349  	valPos := it.ls.suffixPos(it.posInTrie[it.level])
   350  	return it.ls.values.Get(valPos)
   351  }
   352  
   353  func (it *sparseIter) Compare(key []byte) int {
   354  	itKey := it.Key()
   355  	startDepth := int(it.startDepth)
   356  	if startDepth > len(key) {
   357  		panic("dense compare have bug")
   358  	}
   359  	if startDepth == len(key) {
   360  		if len(itKey) == 0 {
   361  			return 0
   362  		}
   363  		return 1
   364  	}
   365  	cmpKey := key[startDepth:]
   366  
   367  	cmpLen := len(itKey)
   368  	if cmpLen > len(cmpKey) {
   369  		cmpLen = len(cmpKey)
   370  	}
   371  	cmp := bytes.Compare(itKey[:cmpLen], cmpKey[:cmpLen])
   372  	if cmp != 0 {
   373  		return cmp
   374  	}
   375  	if len(itKey) > len(cmpKey) {
   376  		return 1
   377  	}
   378  	if len(itKey) == len(cmpKey) && it.atTerminator {
   379  		return 0
   380  	}
   381  	suffixPos := it.ls.suffixPos(it.posInTrie[it.level])
   382  	return it.ls.suffixes.Compare(key, suffixPos, uint32(len(itKey)+startDepth))
   383  }
   384  
   385  func (it *sparseIter) Reset() {
   386  	it.valid = false
   387  	it.level = 0
   388  	it.atTerminator = false
   389  	it.keyBuf = it.keyBuf[:0]
   390  }
   391  
   392  func (it *sparseIter) MoveToLeftMostKey() {
   393  	if len(it.keyBuf) == 0 {
   394  		pos := it.ls.firstLabelPos(it.startNodeID)
   395  		label := it.ls.labelVec.GetLabel(pos)
   396  		it.append(label, pos, it.startNodeID)
   397  	}
   398  
   399  	pos := it.posInTrie[it.level]
   400  	label := it.ls.labelVec.GetLabel(pos)
   401  
   402  	if !it.ls.hasChildVec.IsSet(pos) {
   403  		if label == labelTerminator && !it.ls.isEndOfNode(pos) {
   404  			it.atTerminator = true
   405  		}
   406  		it.valid = true
   407  		return
   408  	}
   409  
   410  	for it.level < it.ls.sparseLevels() {
   411  		it.level++
   412  		nodeID := it.ls.childNodeID(pos)
   413  		pos = it.ls.firstLabelPos(nodeID)
   414  		label = it.ls.labelVec.GetLabel(pos)
   415  
   416  		if !it.ls.hasChildVec.IsSet(pos) {
   417  			it.append(label, pos, nodeID)
   418  			if label == labelTerminator && !it.ls.isEndOfNode(pos) {
   419  				it.atTerminator = true
   420  			}
   421  			it.valid = true
   422  			return
   423  		}
   424  		it.append(label, pos, nodeID)
   425  	}
   426  	panic("unreachable")
   427  }
   428  
   429  func (it *sparseIter) MoveToRightMostKey() {
   430  	if len(it.keyBuf) == 0 {
   431  		pos := it.ls.lastLabelPos(it.startNodeID)
   432  		label := it.ls.labelVec.GetLabel(pos)
   433  		it.append(label, pos, it.startNodeID)
   434  	}
   435  
   436  	pos := it.posInTrie[it.level]
   437  	label := it.ls.labelVec.GetLabel(pos)
   438  
   439  	if !it.ls.hasChildVec.IsSet(pos) {
   440  		if label == labelTerminator && !it.ls.isEndOfNode(pos) {
   441  			it.atTerminator = true
   442  		}
   443  		it.valid = true
   444  		return
   445  	}
   446  
   447  	for it.level < it.ls.sparseLevels() {
   448  		it.level++
   449  		nodeID := it.ls.childNodeID(pos)
   450  		pos = it.ls.lastLabelPos(nodeID)
   451  		label = it.ls.labelVec.GetLabel(pos)
   452  
   453  		if !it.ls.hasChildVec.IsSet(pos) {
   454  			it.append(label, pos, nodeID)
   455  			if label == labelTerminator && !it.ls.isEndOfNode(pos) {
   456  				it.atTerminator = true
   457  			}
   458  			it.valid = true
   459  			return
   460  		}
   461  		it.append(label, pos, nodeID)
   462  	}
   463  	panic("unreachable")
   464  }
   465  
   466  func (it *sparseIter) SetToFirstInRoot() {
   467  	it.append(it.ls.labelVec.GetLabel(0), 0, it.startNodeID)
   468  }
   469  
   470  func (it *sparseIter) SetToLastInRoot() {
   471  	pos := it.ls.lastLabelPos(0)
   472  	it.append(it.ls.labelVec.GetLabel(pos), pos, it.startNodeID)
   473  }
   474  
   475  func (it *sparseIter) append(label byte, pos, nodeID uint32) {
   476  	prefix := it.ls.prefixVec.GetPrefix(it.ls.prefixID(nodeID))
   477  	it.keyBuf = append(it.keyBuf, prefix...)
   478  	it.keyBuf = append(it.keyBuf, label)
   479  	it.posInTrie[it.level] = pos
   480  	it.prefixLen[it.level] = uint32(len(prefix)) + 1
   481  	if it.level != 0 {
   482  		it.prefixLen[it.level] += it.prefixLen[it.level-1]
   483  	}
   484  	it.nodeID[it.level] = nodeID
   485  }
   486  
   487  func (it *sparseIter) setAt(level, pos, nodeID uint32) {
   488  	it.keyBuf = append(it.keyBuf[:it.prefixLen[level]-1], it.ls.labelVec.GetLabel(pos))
   489  	it.posInTrie[it.level] = pos
   490  }
   491  
   492  func (it *sparseIter) truncate(level uint32) {
   493  	it.keyBuf = it.keyBuf[:it.prefixLen[level]]
   494  }
   495  
   496  func (it *sparseIter) moveToLeftInNextSubTrie(pos, nodeID, nodeSize uint32, label byte) {
   497  	pos, ok := it.ls.labelVec.SearchGreaterThan(label, pos, nodeSize)
   498  	it.append(it.ls.labelVec.GetLabel(pos), pos, nodeID)
   499  	if ok {
   500  		it.MoveToLeftMostKey()
   501  	} else {
   502  		it.Next()
   503  	}
   504  }
   505  
   506  func (it *sparseIter) compareSuffixGreaterThan(key []byte, pos, level uint32) bool {
   507  	cmp := it.ls.suffixes.Compare(key, it.ls.suffixPos(pos), level)
   508  	if cmp < 0 {
   509  		it.Next()
   510  		return false
   511  	}
   512  	it.valid = true
   513  	return cmp == couldBePositive
   514  }