github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/table/sstable/iterator.go (about)

     1  /*
     2   * Copyright 2017 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package sstable
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"io"
    23  	"math"
    24  	"sort"
    25  
    26  	"github.com/coocood/badger/surf"
    27  	"github.com/coocood/badger/y"
    28  )
    29  
    30  type singleKeyIterator struct {
    31  	oldOffset uint32
    32  	loaded    bool
    33  	latestVal []byte
    34  	oldVals   entrySlice
    35  	idx       int
    36  	oldBlock  []byte
    37  }
    38  
    39  func (ski *singleKeyIterator) set(oldOffset uint32, latestVal []byte) {
    40  	ski.oldOffset = oldOffset
    41  	ski.latestVal = latestVal
    42  	ski.loaded = false
    43  	ski.idx = 0
    44  }
    45  
    46  func (ski *singleKeyIterator) getVal() (val []byte) {
    47  	if ski.idx == 0 {
    48  		return ski.latestVal
    49  	}
    50  	oldEntry := ski.oldVals.getEntry(ski.idx - 1)
    51  	return oldEntry
    52  }
    53  
    54  func (ski *singleKeyIterator) loadOld() {
    55  	numEntries := bytesToU32(ski.oldBlock[ski.oldOffset:])
    56  	endOffsStartIdx := ski.oldOffset + 4
    57  	endOffsEndIdx := endOffsStartIdx + 4*numEntries
    58  	ski.oldVals.endOffs = bytesToU32Slice(ski.oldBlock[endOffsStartIdx:endOffsEndIdx])
    59  	valueEndOff := endOffsEndIdx + ski.oldVals.endOffs[numEntries-1]
    60  	ski.oldVals.data = ski.oldBlock[endOffsEndIdx:valueEndOff]
    61  	ski.loaded = true
    62  }
    63  
    64  func (ski *singleKeyIterator) length() int {
    65  	return ski.oldVals.length() + 1
    66  }
    67  
    68  type blockIterator struct {
    69  	entries entrySlice
    70  	idx     int
    71  	err     error
    72  
    73  	globalTsBytes [8]byte
    74  	globalTs      uint64
    75  	key           y.Key
    76  	val           []byte
    77  
    78  	baseLen uint16
    79  	ski     singleKeyIterator
    80  }
    81  
    82  func (itr *blockIterator) setBlock(b block) {
    83  	itr.err = nil
    84  	itr.idx = 0
    85  	itr.key.Reset()
    86  	itr.val = itr.val[:0]
    87  	itr.loadEntries(b.data)
    88  	itr.key.UserKey = append(itr.key.UserKey[:0], b.baseKey[:itr.baseLen]...)
    89  }
    90  
    91  func (itr *blockIterator) valid() bool {
    92  	return itr != nil && itr.err == nil
    93  }
    94  
    95  func (itr *blockIterator) Error() error {
    96  	return itr.err
    97  }
    98  
    99  // loadEntries loads the entryEndOffsets for binary searching for a key.
   100  func (itr *blockIterator) loadEntries(data []byte) {
   101  	// Get the number of entries from the end of `data` (and remove it).
   102  	dataLen := len(data)
   103  	itr.baseLen = binary.LittleEndian.Uint16(data[dataLen-2:])
   104  	entriesNum := int(bytesToU32(data[dataLen-6:]))
   105  	entriesEnd := dataLen - 6
   106  	entriesStart := entriesEnd - entriesNum*4
   107  	itr.entries.endOffs = bytesToU32Slice(data[entriesStart:entriesEnd])
   108  	itr.entries.data = data[:entriesStart]
   109  }
   110  
   111  // Seek brings us to the first block element that is >= input key.
   112  // The binary search will begin at `start`, you can use it to skip some items.
   113  func (itr *blockIterator) seek(key []byte) {
   114  	foundEntryIdx := sort.Search(itr.entries.length(), func(idx int) bool {
   115  		itr.setIdx(idx)
   116  		return bytes.Compare(itr.key.UserKey, key) >= 0
   117  	})
   118  	itr.setIdx(foundEntryIdx)
   119  }
   120  
   121  // seekToFirst brings us to the first element. Valid should return true.
   122  func (itr *blockIterator) seekToFirst() {
   123  	itr.setIdx(0)
   124  }
   125  
   126  // seekToLast brings us to the last element. Valid should return true.
   127  func (itr *blockIterator) seekToLast() {
   128  	itr.setIdx(itr.entries.length() - 1)
   129  }
   130  
   131  // setIdx sets the iterator to the entry index and set the current key and value.
   132  func (itr *blockIterator) setIdx(i int) {
   133  	itr.idx = i
   134  	if i >= itr.entries.length() || i < 0 {
   135  		itr.err = io.EOF
   136  		return
   137  	}
   138  	itr.err = nil
   139  	entryData := itr.entries.getEntry(i)
   140  	diffKeyLen := binary.LittleEndian.Uint16(entryData)
   141  	entryData = entryData[2:]
   142  	itr.key.UserKey = append(itr.key.UserKey[:itr.baseLen], entryData[:diffKeyLen]...)
   143  	entryData = entryData[diffKeyLen:]
   144  	hasOld := entryData[0] != 0
   145  	entryData = entryData[1:]
   146  	var oldOffset uint32
   147  	if hasOld {
   148  		oldOffset = bytesToU32(entryData)
   149  		entryData = entryData[4:]
   150  	}
   151  	if itr.globalTs != 0 {
   152  		itr.key.Version = itr.globalTs
   153  	} else {
   154  		itr.key.Version = bytesToU64(entryData)
   155  	}
   156  	itr.val = entryData
   157  	itr.ski.idx = 0
   158  	if hasOld {
   159  		itr.ski.set(oldOffset, itr.val)
   160  	}
   161  }
   162  
   163  func (itr *blockIterator) hasOldVersion() bool {
   164  	return itr.ski.oldOffset != 0
   165  }
   166  
   167  func (itr *blockIterator) next() {
   168  	itr.setIdx(itr.idx + 1)
   169  }
   170  
   171  func (itr *blockIterator) prev() {
   172  	itr.setIdx(itr.idx - 1)
   173  }
   174  
   175  // Iterator is an iterator for a Table.
   176  type Iterator struct {
   177  	t    *Table
   178  	tIdx *tableIndex
   179  	surf *surf.Iterator
   180  	bpos int
   181  	bi   blockIterator
   182  	err  error
   183  
   184  	// Internally, Iterator is bidirectional. However, we only expose the
   185  	// unidirectional functionality for now.
   186  	reversed bool
   187  }
   188  
   189  // NewIterator returns a new iterator of the Table
   190  func (t *Table) newIterator(reversed bool) *Iterator {
   191  	idx, err := t.getIndex()
   192  	if err != nil {
   193  		return &Iterator{err: err}
   194  	}
   195  	return t.newIteratorWithIdx(reversed, idx)
   196  }
   197  
   198  func (t *Table) newIteratorWithIdx(reversed bool, index *tableIndex) *Iterator {
   199  	it := &Iterator{t: t, reversed: reversed, tIdx: index}
   200  	it.bi.globalTs = t.globalTs
   201  	if t.oldBlockLen > 0 {
   202  		y.Assert(len(t.oldBlock) > 0)
   203  	}
   204  	it.bi.ski.oldBlock = t.oldBlock
   205  	binary.BigEndian.PutUint64(it.bi.globalTsBytes[:], math.MaxUint64-t.globalTs)
   206  	if index.surf != nil {
   207  		it.surf = index.surf.NewIterator()
   208  	}
   209  	return it
   210  }
   211  
   212  func (itr *Iterator) reset() {
   213  	itr.bpos = 0
   214  	itr.err = nil
   215  }
   216  
   217  // Valid follows the y.Iterator interface
   218  func (itr *Iterator) Valid() bool {
   219  	return itr.err == nil
   220  }
   221  
   222  func (itr *Iterator) Error() error {
   223  	if itr.err == io.EOF {
   224  		return nil
   225  	}
   226  	return itr.err
   227  }
   228  
   229  func (itr *Iterator) seekToFirst() {
   230  	numBlocks := len(itr.tIdx.blockEndOffsets)
   231  	if numBlocks == 0 {
   232  		itr.err = io.EOF
   233  		return
   234  	}
   235  	itr.bpos = 0
   236  	block, err := itr.t.block(itr.bpos, itr.tIdx)
   237  	if err != nil {
   238  		itr.err = err
   239  		return
   240  	}
   241  	itr.bi.setBlock(block)
   242  	itr.bi.seekToFirst()
   243  	itr.err = itr.bi.Error()
   244  }
   245  
   246  func (itr *Iterator) seekToLast() {
   247  	numBlocks := len(itr.tIdx.blockEndOffsets)
   248  	if numBlocks == 0 {
   249  		itr.err = io.EOF
   250  		return
   251  	}
   252  	itr.bpos = numBlocks - 1
   253  	block, err := itr.t.block(itr.bpos, itr.tIdx)
   254  	if err != nil {
   255  		itr.err = err
   256  		return
   257  	}
   258  	itr.bi.setBlock(block)
   259  	itr.bi.seekToLast()
   260  	itr.err = itr.bi.Error()
   261  }
   262  
   263  func (itr *Iterator) seekInBlock(blockIdx int, key []byte) {
   264  	itr.bpos = blockIdx
   265  	block, err := itr.t.block(blockIdx, itr.tIdx)
   266  	if err != nil {
   267  		itr.err = err
   268  		return
   269  	}
   270  	itr.bi.setBlock(block)
   271  	itr.bi.seek(key)
   272  	itr.err = itr.bi.Error()
   273  }
   274  
   275  func (itr *Iterator) seekFromOffset(blockIdx int, offset int, key []byte) {
   276  	itr.bpos = blockIdx
   277  	block, err := itr.t.block(blockIdx, itr.tIdx)
   278  	if err != nil {
   279  		itr.err = err
   280  		return
   281  	}
   282  	itr.bi.setBlock(block)
   283  	itr.bi.setIdx(offset)
   284  	if bytes.Compare(itr.bi.key.UserKey, key) >= 0 {
   285  		return
   286  	}
   287  	itr.bi.seek(key)
   288  	itr.err = itr.bi.err
   289  }
   290  
   291  func (itr *Iterator) seekBlock(key []byte) int {
   292  	return sort.Search(len(itr.tIdx.blockEndOffsets), func(idx int) bool {
   293  		blockBaseKey := itr.tIdx.baseKeys.getEntry(idx)
   294  		return bytes.Compare(blockBaseKey, key) > 0
   295  	})
   296  }
   297  
   298  // seekFrom brings us to a key that is >= input key.
   299  func (itr *Iterator) seekFrom(key []byte) {
   300  	itr.err = nil
   301  	itr.reset()
   302  
   303  	idx := itr.seekBlock(key)
   304  	if itr.err != nil {
   305  		return
   306  	}
   307  	if idx == 0 {
   308  		// The smallest key in our table is already strictly > key. We can return that.
   309  		// This is like a SeekToFirst.
   310  		itr.seekInBlock(0, key)
   311  		return
   312  	}
   313  
   314  	// block[idx].smallest is > key.
   315  	// Since idx>0, we know block[idx-1].smallest is <= key.
   316  	// There are two cases.
   317  	// 1) Everything in block[idx-1] is strictly < key. In this case, we should go to the first
   318  	//    element of block[idx].
   319  	// 2) Some element in block[idx-1] is >= key. We should go to that element.
   320  	itr.seekInBlock(idx-1, key)
   321  	if itr.err == io.EOF {
   322  		// Case 1. Need to visit block[idx].
   323  		if idx == len(itr.tIdx.blockEndOffsets) {
   324  			// If idx == len(itr.t.blockEndOffsets), then input key is greater than ANY element of table.
   325  			// There's nothing we can do. Valid() should return false as we seek to end of table.
   326  			return
   327  		}
   328  		// Since block[idx].smallest is > key. This is essentially a block[idx].SeekToFirst.
   329  		itr.seekFromOffset(idx, 0, key)
   330  	}
   331  	// Case 2: No need to do anything. We already did the seek in block[idx-1].
   332  }
   333  
   334  // seek will reset iterator and seek to >= key.
   335  func (itr *Iterator) seek(key []byte) {
   336  	itr.err = nil
   337  	itr.reset()
   338  	if itr.surf == nil {
   339  		itr.seekFrom(key)
   340  		return
   341  	}
   342  
   343  	sit := itr.surf
   344  	sit.Seek(key)
   345  	if !sit.Valid() {
   346  		itr.err = io.EOF
   347  		return
   348  	}
   349  
   350  	var pos entryPosition
   351  	pos.decode(sit.Value())
   352  	itr.seekFromOffset(int(pos.blockIdx), int(pos.offset), key)
   353  }
   354  
   355  // seekForPrev will reset iterator and seek to <= key.
   356  func (itr *Iterator) seekForPrev(key []byte) {
   357  	// TODO: Optimize this. We shouldn't have to take a Prev step.
   358  	itr.seekFrom(key)
   359  	if !bytes.Equal(itr.Key().UserKey, key) {
   360  		itr.prev()
   361  	}
   362  }
   363  
   364  func (itr *Iterator) next() {
   365  	itr.err = nil
   366  
   367  	if itr.bpos >= len(itr.tIdx.blockEndOffsets) {
   368  		itr.err = io.EOF
   369  		return
   370  	}
   371  
   372  	if itr.bi.entries.length() == 0 {
   373  		block, err := itr.t.block(itr.bpos, itr.tIdx)
   374  		if err != nil {
   375  			itr.err = err
   376  			return
   377  		}
   378  		itr.bi.setBlock(block)
   379  		itr.bi.seekToFirst()
   380  		itr.err = itr.bi.Error()
   381  		return
   382  	}
   383  
   384  	itr.bi.next()
   385  	if !itr.bi.valid() {
   386  		itr.bpos++
   387  		itr.bi.entries.reset()
   388  		itr.next()
   389  		return
   390  	}
   391  }
   392  
   393  func (itr *Iterator) prev() {
   394  	itr.err = nil
   395  	if itr.bpos < 0 {
   396  		itr.err = io.EOF
   397  		return
   398  	}
   399  
   400  	if itr.bi.entries.length() == 0 {
   401  		block, err := itr.t.block(itr.bpos, itr.tIdx)
   402  		if err != nil {
   403  			itr.err = err
   404  			return
   405  		}
   406  		itr.bi.setBlock(block)
   407  		itr.bi.seekToLast()
   408  		itr.err = itr.bi.Error()
   409  		return
   410  	}
   411  
   412  	itr.bi.prev()
   413  	if !itr.bi.valid() {
   414  		itr.bpos--
   415  		itr.bi.entries.reset()
   416  		itr.prev()
   417  		return
   418  	}
   419  }
   420  
   421  // Key follows the y.Iterator interface
   422  func (itr *Iterator) Key() y.Key {
   423  	return itr.bi.key
   424  }
   425  
   426  // Value follows the y.Iterator interface
   427  func (itr *Iterator) Value() (ret y.ValueStruct) {
   428  	ret.Decode(itr.bi.val)
   429  	return
   430  }
   431  
   432  // FillValue fill the value struct.
   433  func (itr *Iterator) FillValue(vs *y.ValueStruct) {
   434  	vs.Decode(itr.bi.val)
   435  }
   436  
   437  // Next follows the y.Iterator interface
   438  func (itr *Iterator) Next() {
   439  	if !itr.reversed {
   440  		itr.next()
   441  	} else {
   442  		itr.prev()
   443  	}
   444  }
   445  
   446  func (itr *Iterator) NextVersion() bool {
   447  	if itr.bi.ski.oldOffset == 0 {
   448  		return false
   449  	}
   450  	if !itr.bi.ski.loaded {
   451  		itr.bi.ski.loadOld()
   452  	}
   453  	if itr.bi.ski.idx+1 < itr.bi.ski.length() {
   454  		itr.bi.ski.idx++
   455  		itr.bi.val = itr.bi.ski.getVal()
   456  		itr.bi.key.Version = bytesToU64(itr.bi.val)
   457  		return true
   458  	}
   459  	return false
   460  }
   461  
   462  // Rewind follows the y.Iterator interface
   463  func (itr *Iterator) Rewind() {
   464  	if !itr.reversed {
   465  		itr.seekToFirst()
   466  	} else {
   467  		itr.seekToLast()
   468  	}
   469  }
   470  
   471  // Seek follows the y.Iterator interface
   472  func (itr *Iterator) Seek(key []byte) {
   473  	if !itr.reversed {
   474  		itr.seek(key)
   475  	} else {
   476  		itr.seekForPrev(key)
   477  	}
   478  }