github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/table/sstable/iterator.go (about)

     1  /*
     2   * Copyright 2017 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package sstable
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"io"
    23  	"math"
    24  	"sort"
    25  
    26  	"github.com/pingcap/badger/surf"
    27  	"github.com/pingcap/badger/y"
    28  )
    29  
    30  type singleKeyIterator struct {
    31  	oldOffset uint32
    32  	loaded    bool
    33  	latestVal []byte
    34  	oldVals   entrySlice
    35  	idx       int
    36  	oldBlock  []byte
    37  }
    38  
    39  func (ski *singleKeyIterator) set(oldOffset uint32, latestVal []byte) {
    40  	ski.oldOffset = oldOffset
    41  	ski.latestVal = latestVal
    42  	ski.loaded = false
    43  	ski.idx = 0
    44  }
    45  
    46  func (ski *singleKeyIterator) getVal() (val []byte) {
    47  	if ski.idx == 0 {
    48  		return ski.latestVal
    49  	}
    50  	oldEntry := ski.oldVals.getEntry(ski.idx - 1)
    51  	return oldEntry
    52  }
    53  
    54  func (ski *singleKeyIterator) loadOld() {
    55  	numEntries := bytesToU32(ski.oldBlock[ski.oldOffset:])
    56  	endOffsStartIdx := ski.oldOffset + 4
    57  	endOffsEndIdx := endOffsStartIdx + 4*numEntries
    58  	ski.oldVals.endOffs = bytesToU32Slice(ski.oldBlock[endOffsStartIdx:endOffsEndIdx])
    59  	valueEndOff := endOffsEndIdx + ski.oldVals.endOffs[numEntries-1]
    60  	ski.oldVals.data = ski.oldBlock[endOffsEndIdx:valueEndOff]
    61  	ski.loaded = true
    62  }
    63  
    64  func (ski *singleKeyIterator) length() int {
    65  	return ski.oldVals.length() + 1
    66  }
    67  
    68  type blockIterator struct {
    69  	entries entrySlice
    70  	idx     int
    71  	err     error
    72  
    73  	globalTsBytes [8]byte
    74  	globalTs      uint64
    75  	key           y.Key
    76  	val           []byte
    77  
    78  	baseLen uint16
    79  	ski     singleKeyIterator
    80  
    81  	block *block
    82  }
    83  
    84  func (itr *blockIterator) setBlock(b *block) {
    85  	itr.block.done()
    86  	itr.block = b
    87  	itr.err = nil
    88  	itr.idx = 0
    89  	itr.key.Reset()
    90  	itr.val = itr.val[:0]
    91  	itr.loadEntries(b.data)
    92  	itr.key.UserKey = append(itr.key.UserKey[:0], b.baseKey[:itr.baseLen]...)
    93  }
    94  
    95  func (itr *blockIterator) valid() bool {
    96  	return itr != nil && itr.err == nil
    97  }
    98  
    99  func (itr *blockIterator) Error() error {
   100  	return itr.err
   101  }
   102  
   103  // loadEntries loads the entryEndOffsets for binary searching for a key.
   104  func (itr *blockIterator) loadEntries(data []byte) {
   105  	// Get the number of entries from the end of `data` (and remove it).
   106  	dataLen := len(data)
   107  	itr.baseLen = binary.LittleEndian.Uint16(data[dataLen-2:])
   108  	entriesNum := int(bytesToU32(data[dataLen-6:]))
   109  	entriesEnd := dataLen - 6
   110  	entriesStart := entriesEnd - entriesNum*4
   111  	itr.entries.endOffs = bytesToU32Slice(data[entriesStart:entriesEnd])
   112  	itr.entries.data = data[:entriesStart]
   113  }
   114  
   115  // Seek brings us to the first block element that is >= input key.
   116  // The binary search will begin at `start`, you can use it to skip some items.
   117  func (itr *blockIterator) seek(key []byte) {
   118  	foundEntryIdx := sort.Search(itr.entries.length(), func(idx int) bool {
   119  		itr.setIdx(idx)
   120  		return bytes.Compare(itr.key.UserKey, key) >= 0
   121  	})
   122  	itr.setIdx(foundEntryIdx)
   123  }
   124  
   125  // seekToFirst brings us to the first element. Valid should return true.
   126  func (itr *blockIterator) seekToFirst() {
   127  	itr.setIdx(0)
   128  }
   129  
   130  // seekToLast brings us to the last element. Valid should return true.
   131  func (itr *blockIterator) seekToLast() {
   132  	itr.setIdx(itr.entries.length() - 1)
   133  }
   134  
   135  // setIdx sets the iterator to the entry index and set the current key and value.
   136  func (itr *blockIterator) setIdx(i int) {
   137  	itr.idx = i
   138  	if i >= itr.entries.length() || i < 0 {
   139  		itr.err = io.EOF
   140  		return
   141  	}
   142  	itr.err = nil
   143  	entryData := itr.entries.getEntry(i)
   144  	diffKeyLen := binary.LittleEndian.Uint16(entryData)
   145  	entryData = entryData[2:]
   146  	itr.key.UserKey = append(itr.key.UserKey[:itr.baseLen], entryData[:diffKeyLen]...)
   147  	entryData = entryData[diffKeyLen:]
   148  	hasOld := entryData[0] != 0
   149  	entryData = entryData[1:]
   150  	var oldOffset uint32
   151  	if hasOld {
   152  		oldOffset = bytesToU32(entryData)
   153  		entryData = entryData[4:]
   154  	}
   155  	if itr.globalTs != 0 {
   156  		itr.key.Version = itr.globalTs
   157  	} else {
   158  		itr.key.Version = bytesToU64(entryData)
   159  	}
   160  	itr.val = entryData
   161  	itr.ski.set(oldOffset, itr.val)
   162  }
   163  
   164  func (itr *blockIterator) hasOldVersion() bool {
   165  	return itr.ski.oldOffset != 0
   166  }
   167  
   168  func (itr *blockIterator) next() {
   169  	itr.setIdx(itr.idx + 1)
   170  }
   171  
   172  func (itr *blockIterator) prev() {
   173  	itr.setIdx(itr.idx - 1)
   174  }
   175  
   176  func (itr *blockIterator) close() {
   177  	itr.block.done()
   178  }
   179  
   180  // Iterator is an iterator for a Table.
   181  type Iterator struct {
   182  	t    *Table
   183  	tIdx *tableIndex
   184  	surf *surf.Iterator
   185  	bpos int
   186  	bi   blockIterator
   187  	err  error
   188  
   189  	// Internally, Iterator is bidirectional. However, we only expose the
   190  	// unidirectional functionality for now.
   191  	reversed bool
   192  }
   193  
   194  // NewIterator returns a new iterator of the Table
   195  func (t *Table) newIterator(reversed bool) *Iterator {
   196  	idx, err := t.getIndex()
   197  	if err != nil {
   198  		return &Iterator{err: err}
   199  	}
   200  	return t.newIteratorWithIdx(reversed, idx)
   201  }
   202  
   203  func (t *Table) newIteratorWithIdx(reversed bool, index *tableIndex) *Iterator {
   204  	it := &Iterator{t: t, reversed: reversed, tIdx: index}
   205  	it.bi.globalTs = t.globalTs
   206  	if t.oldBlockLen > 0 {
   207  		y.Assert(len(t.oldBlock) > 0)
   208  	}
   209  	it.bi.ski.oldBlock = t.oldBlock
   210  	binary.BigEndian.PutUint64(it.bi.globalTsBytes[:], math.MaxUint64-t.globalTs)
   211  	if index.surf != nil {
   212  		it.surf = index.surf.NewIterator()
   213  	}
   214  	return it
   215  }
   216  
   217  func (itr *Iterator) reset() {
   218  	itr.bpos = 0
   219  	itr.err = nil
   220  }
   221  
   222  // Valid follows the y.Iterator interface
   223  func (itr *Iterator) Valid() bool {
   224  	return itr.err == nil
   225  }
   226  
   227  func (itr *Iterator) Error() error {
   228  	if itr.err == io.EOF {
   229  		return nil
   230  	}
   231  	return itr.err
   232  }
   233  
   234  func (itr *Iterator) seekToFirst() {
   235  	numBlocks := len(itr.tIdx.blockEndOffsets)
   236  	if numBlocks == 0 {
   237  		itr.err = io.EOF
   238  		return
   239  	}
   240  	itr.bpos = 0
   241  	block, err := itr.t.block(itr.bpos, itr.tIdx)
   242  	if err != nil {
   243  		itr.err = err
   244  		return
   245  	}
   246  	itr.bi.setBlock(block)
   247  	itr.bi.seekToFirst()
   248  	itr.err = itr.bi.Error()
   249  }
   250  
   251  func (itr *Iterator) seekToLast() {
   252  	numBlocks := len(itr.tIdx.blockEndOffsets)
   253  	if numBlocks == 0 {
   254  		itr.err = io.EOF
   255  		return
   256  	}
   257  	itr.bpos = numBlocks - 1
   258  	block, err := itr.t.block(itr.bpos, itr.tIdx)
   259  	if err != nil {
   260  		itr.err = err
   261  		return
   262  	}
   263  	itr.bi.setBlock(block)
   264  	itr.bi.seekToLast()
   265  	itr.err = itr.bi.Error()
   266  }
   267  
   268  func (itr *Iterator) seekInBlock(blockIdx int, key []byte) {
   269  	itr.bpos = blockIdx
   270  	block, err := itr.t.block(blockIdx, itr.tIdx)
   271  	if err != nil {
   272  		itr.err = err
   273  		return
   274  	}
   275  	itr.bi.setBlock(block)
   276  	itr.bi.seek(key)
   277  	itr.err = itr.bi.Error()
   278  }
   279  
   280  func (itr *Iterator) seekFromOffset(blockIdx int, offset int, key []byte) {
   281  	itr.bpos = blockIdx
   282  	block, err := itr.t.block(blockIdx, itr.tIdx)
   283  	if err != nil {
   284  		itr.err = err
   285  		return
   286  	}
   287  	itr.bi.setBlock(block)
   288  	itr.bi.setIdx(offset)
   289  	if bytes.Compare(itr.bi.key.UserKey, key) >= 0 {
   290  		return
   291  	}
   292  	itr.bi.seek(key)
   293  	itr.err = itr.bi.err
   294  }
   295  
   296  func (itr *Iterator) seekBlock(key []byte) int {
   297  	return sort.Search(len(itr.tIdx.blockEndOffsets), func(idx int) bool {
   298  		blockBaseKey := itr.tIdx.baseKeys.getEntry(idx)
   299  		return bytes.Compare(blockBaseKey, key) > 0
   300  	})
   301  }
   302  
   303  // seekFrom brings us to a key that is >= input key.
   304  func (itr *Iterator) seekFrom(key []byte) {
   305  	itr.err = nil
   306  	itr.reset()
   307  
   308  	idx := itr.seekBlock(key)
   309  	if itr.err != nil {
   310  		return
   311  	}
   312  	if idx == 0 {
   313  		// The smallest key in our table is already strictly > key. We can return that.
   314  		// This is like a SeekToFirst.
   315  		itr.seekInBlock(0, key)
   316  		return
   317  	}
   318  
   319  	// block[idx].smallest is > key.
   320  	// Since idx>0, we know block[idx-1].smallest is <= key.
   321  	// There are two cases.
   322  	// 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first
   323  	//    element of block[idx].
   324  	// 2) Some element in block[idx-1] is >= key. We should go to that element.
   325  	itr.seekInBlock(idx-1, key)
   326  	if itr.err == io.EOF {
   327  		// Case 1. Need to visit block[idx].
   328  		if idx == len(itr.tIdx.blockEndOffsets) {
   329  			// If idx == len(itr.t.blockEndOffsets), then input key is greater than ANY element of table.
   330  			// There's nothing we can do. Valid() should return false as we seek to end of table.
   331  			return
   332  		}
   333  		itr.err = nil
   334  		// Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst.
   335  		itr.seekFromOffset(idx, 0, key)
   336  	}
   337  	// Case 2: No need to do anything. We already did the seek in block[idx-1].
   338  }
   339  
   340  // seek will reset iterator and seek to >= key.
   341  func (itr *Iterator) seek(key []byte) {
   342  	itr.err = nil
   343  	itr.reset()
   344  	if itr.surf == nil {
   345  		itr.seekFrom(key)
   346  		return
   347  	}
   348  
   349  	sit := itr.surf
   350  	sit.Seek(key)
   351  	if !sit.Valid() {
   352  		itr.err = io.EOF
   353  		return
   354  	}
   355  
   356  	var pos entryPosition
   357  	pos.decode(sit.Value())
   358  	itr.seekFromOffset(int(pos.blockIdx), int(pos.offset), key)
   359  }
   360  
   361  // seekForPrev will reset iterator and seek to <= key.
   362  func (itr *Iterator) seekForPrev(key []byte) {
   363  	// TODO: Optimize this. We shouldn't have to take a Prev step.
   364  	itr.seekFrom(key)
   365  	if !bytes.Equal(itr.Key().UserKey, key) {
   366  		itr.prev()
   367  	}
   368  }
   369  
   370  func (itr *Iterator) next() {
   371  	itr.err = nil
   372  
   373  	if itr.bpos >= len(itr.tIdx.blockEndOffsets) {
   374  		itr.err = io.EOF
   375  		return
   376  	}
   377  
   378  	if itr.bi.entries.length() == 0 {
   379  		block, err := itr.t.block(itr.bpos, itr.tIdx)
   380  		if err != nil {
   381  			itr.err = err
   382  			return
   383  		}
   384  		itr.bi.setBlock(block)
   385  		itr.bi.seekToFirst()
   386  		itr.err = itr.bi.Error()
   387  		return
   388  	}
   389  
   390  	itr.bi.next()
   391  	if !itr.bi.valid() {
   392  		itr.bpos++
   393  		itr.bi.entries.reset()
   394  		itr.next()
   395  		return
   396  	}
   397  }
   398  
   399  func (itr *Iterator) prev() {
   400  	itr.err = nil
   401  	if itr.bpos < 0 {
   402  		itr.err = io.EOF
   403  		return
   404  	}
   405  
   406  	if itr.bi.entries.length() == 0 {
   407  		block, err := itr.t.block(itr.bpos, itr.tIdx)
   408  		if err != nil {
   409  			itr.err = err
   410  			return
   411  		}
   412  		itr.bi.setBlock(block)
   413  		itr.bi.seekToLast()
   414  		itr.err = itr.bi.Error()
   415  		return
   416  	}
   417  
   418  	itr.bi.prev()
   419  	if !itr.bi.valid() {
   420  		itr.bpos--
   421  		itr.bi.entries.reset()
   422  		itr.prev()
   423  		return
   424  	}
   425  }
   426  
   427  // Key follows the y.Iterator interface
   428  func (itr *Iterator) Key() y.Key {
   429  	return itr.bi.key
   430  }
   431  
   432  // Value follows the y.Iterator interface
   433  func (itr *Iterator) Value() (ret y.ValueStruct) {
   434  	ret.Decode(itr.bi.val)
   435  	return
   436  }
   437  
   438  // FillValue fill the value struct.
   439  func (itr *Iterator) FillValue(vs *y.ValueStruct) {
   440  	vs.Decode(itr.bi.val)
   441  }
   442  
   443  // Next follows the y.Iterator interface
   444  func (itr *Iterator) Next() {
   445  	if !itr.reversed {
   446  		itr.next()
   447  	} else {
   448  		itr.prev()
   449  	}
   450  }
   451  
   452  func (itr *Iterator) NextVersion() bool {
   453  	if itr.bi.ski.oldOffset == 0 {
   454  		return false
   455  	}
   456  	if !itr.bi.ski.loaded {
   457  		itr.bi.ski.loadOld()
   458  	}
   459  	if itr.bi.ski.idx+1 < itr.bi.ski.length() {
   460  		itr.bi.ski.idx++
   461  		itr.bi.val = itr.bi.ski.getVal()
   462  		itr.bi.key.Version = bytesToU64(itr.bi.val)
   463  		return true
   464  	}
   465  	return false
   466  }
   467  
   468  // Rewind follows the y.Iterator interface
   469  func (itr *Iterator) Rewind() {
   470  	if !itr.reversed {
   471  		itr.seekToFirst()
   472  	} else {
   473  		itr.seekToLast()
   474  	}
   475  }
   476  
   477  // Seek follows the y.Iterator interface
   478  func (itr *Iterator) Seek(key []byte) {
   479  	if !itr.reversed {
   480  		itr.seek(key)
   481  	} else {
   482  		itr.seekForPrev(key)
   483  	}
   484  }
   485  
   486  // Close closes the iterator (and it must be called).
   487  func (itr *Iterator) Close() error {
   488  	itr.bi.close()
   489  	return nil
   490  }