github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/sstable/block.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"encoding/binary"
     9  	"errors"
    10  	"unsafe"
    11  
    12  	"github.com/petermattis/pebble/cache"
    13  	"github.com/petermattis/pebble/internal/base"
    14  )
    15  
    16  func uvarintLen(v uint32) int {
    17  	i := 0
    18  	for v >= 0x80 {
    19  		v >>= 7
    20  		i++
    21  	}
    22  	return i + 1
    23  }
    24  
    25  type blockWriter struct {
    26  	restartInterval int
    27  	nEntries        int
    28  	buf             []byte
    29  	restarts        []uint32
    30  	curKey          []byte
    31  	curValue        []byte
    32  	prevKey         []byte
    33  	tmp             [50]byte
    34  }
    35  
    36  func (w *blockWriter) store(keySize int, value []byte) {
    37  	shared := 0
    38  	if w.nEntries%w.restartInterval == 0 {
    39  		w.restarts = append(w.restarts, uint32(len(w.buf)))
    40  	} else {
    41  		shared = base.SharedPrefixLen(w.curKey, w.prevKey)
    42  	}
    43  
    44  	n := binary.PutUvarint(w.tmp[0:], uint64(shared))
    45  	n += binary.PutUvarint(w.tmp[n:], uint64(keySize-shared))
    46  	n += binary.PutUvarint(w.tmp[n:], uint64(len(value)))
    47  	w.buf = append(w.buf, w.tmp[:n]...)
    48  	w.buf = append(w.buf, w.curKey[shared:]...)
    49  	w.buf = append(w.buf, value...)
    50  	w.curValue = w.buf[len(w.buf)-len(value):]
    51  
    52  	w.nEntries++
    53  }
    54  
    55  func (w *blockWriter) add(key InternalKey, value []byte) {
    56  	w.curKey, w.prevKey = w.prevKey, w.curKey
    57  
    58  	size := key.Size()
    59  	if cap(w.curKey) < size {
    60  		w.curKey = make([]byte, 0, size*2)
    61  	}
    62  	w.curKey = w.curKey[:size]
    63  	key.Encode(w.curKey)
    64  
    65  	w.store(size, value)
    66  }
    67  
    68  func (w *blockWriter) finish() []byte {
    69  	// Write the restart points to the buffer.
    70  	if w.nEntries == 0 {
    71  		// Every block must have at least one restart point.
    72  		if cap(w.restarts) > 0 {
    73  			w.restarts = w.restarts[:1]
    74  			w.restarts[0] = 0
    75  		} else {
    76  			w.restarts = append(w.restarts, 0)
    77  		}
    78  	}
    79  	tmp4 := w.tmp[:4]
    80  	for _, x := range w.restarts {
    81  		binary.LittleEndian.PutUint32(tmp4, x)
    82  		w.buf = append(w.buf, tmp4...)
    83  	}
    84  	binary.LittleEndian.PutUint32(tmp4, uint32(len(w.restarts)))
    85  	w.buf = append(w.buf, tmp4...)
    86  	return w.buf
    87  }
    88  
    89  func (w *blockWriter) reset() {
    90  	w.nEntries = 0
    91  	w.buf = w.buf[:0]
    92  	w.restarts = w.restarts[:0]
    93  }
    94  
    95  func (w *blockWriter) estimatedSize() int {
    96  	return len(w.buf) + 4*(len(w.restarts)+1)
    97  }
    98  
    99  type blockEntry struct {
   100  	offset   int32
   101  	keyStart int32
   102  	keyEnd   int32
   103  	valStart int32
   104  	valSize  int32
   105  }
   106  
   107  // blockIter is an iterator over a single block of data.
   108  type blockIter struct {
   109  	cmp          Compare
   110  	offset       int32
   111  	nextOffset   int32
   112  	restarts     int32
   113  	numRestarts  int32
   114  	globalSeqNum uint64
   115  	ptr          unsafe.Pointer
   116  	data         []byte
   117  	key, val     []byte
   118  	fullKey      []byte
   119  	keyBuf       [256]byte
   120  	ikey         InternalKey
   121  	cached       []blockEntry
   122  	cachedBuf    []byte
   123  	cacheHandle  cache.Handle
   124  	err          error
   125  }
   126  
   127  func newBlockIter(cmp Compare, block block) (*blockIter, error) {
   128  	i := &blockIter{}
   129  	return i, i.init(cmp, block, 0)
   130  }
   131  
   132  func (i *blockIter) init(cmp Compare, block block, globalSeqNum uint64) error {
   133  	numRestarts := int32(binary.LittleEndian.Uint32(block[len(block)-4:]))
   134  	if numRestarts == 0 {
   135  		return errors.New("pebble/table: invalid table (block has no restart points)")
   136  	}
   137  	i.cmp = cmp
   138  	i.restarts = int32(len(block)) - 4*(1+numRestarts)
   139  	i.numRestarts = numRestarts
   140  	i.globalSeqNum = globalSeqNum
   141  	i.ptr = unsafe.Pointer(&block[0])
   142  	i.data = block
   143  	if i.fullKey == nil {
   144  		i.fullKey = i.keyBuf[:0]
   145  	} else {
   146  		i.fullKey = i.fullKey[:0]
   147  	}
   148  	i.val = nil
   149  	i.clearCache()
   150  	return nil
   151  }
   152  
   153  func (i *blockIter) setCacheHandle(h cache.Handle) {
   154  	i.cacheHandle.Release()
   155  	i.cacheHandle = h
   156  }
   157  
   158  func (i *blockIter) readEntry() {
   159  	ptr := unsafe.Pointer(uintptr(i.ptr) + uintptr(i.offset))
   160  
   161  	// This is an ugly performance hack. Reading entries from blocks is one of
   162  	// the inner-most routines and decoding the 3 varints per-entry takes a
   163  	// significant time. Neither go1.11 or go1.12 will inline decodeVarint for
   164  	// us, so we do it manually. This provides a 10-15% performance improvement
   165  	// on blockIter benchmarks on both go1.11 and go1.12.
   166  	//
   167  	// TODO(peter): remove this hack if go:inline is ever supported.
   168  
   169  	var shared uint32
   170  	src := (*[5]uint8)(ptr)
   171  	if a := (*src)[0]; a < 128 {
   172  		shared = uint32(a)
   173  		ptr = unsafe.Pointer(uintptr(ptr) + 1)
   174  	} else if a, b := a&0x7f, (*src)[1]; b < 128 {
   175  		shared = uint32(b)<<7 | uint32(a)
   176  		ptr = unsafe.Pointer(uintptr(ptr) + 2)
   177  	} else if b, c := b&0x7f, (*src)[2]; c < 128 {
   178  		shared = uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   179  		ptr = unsafe.Pointer(uintptr(ptr) + 3)
   180  	} else if c, d := c&0x7f, (*src)[3]; d < 128 {
   181  		shared = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   182  		ptr = unsafe.Pointer(uintptr(ptr) + 4)
   183  	} else {
   184  		d, e := d&0x7f, (*src)[4]
   185  		shared = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   186  		ptr = unsafe.Pointer(uintptr(ptr) + 5)
   187  	}
   188  
   189  	var unshared uint32
   190  	src = (*[5]uint8)(ptr)
   191  	if a := (*src)[0]; a < 128 {
   192  		unshared = uint32(a)
   193  		ptr = unsafe.Pointer(uintptr(ptr) + 1)
   194  	} else if a, b := a&0x7f, (*src)[1]; b < 128 {
   195  		unshared = uint32(b)<<7 | uint32(a)
   196  		ptr = unsafe.Pointer(uintptr(ptr) + 2)
   197  	} else if b, c := b&0x7f, (*src)[2]; c < 128 {
   198  		unshared = uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   199  		ptr = unsafe.Pointer(uintptr(ptr) + 3)
   200  	} else if c, d := c&0x7f, (*src)[3]; d < 128 {
   201  		unshared = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   202  		ptr = unsafe.Pointer(uintptr(ptr) + 4)
   203  	} else {
   204  		d, e := d&0x7f, (*src)[4]
   205  		unshared = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   206  		ptr = unsafe.Pointer(uintptr(ptr) + 5)
   207  	}
   208  
   209  	var value uint32
   210  	src = (*[5]uint8)(ptr)
   211  	if a := (*src)[0]; a < 128 {
   212  		value = uint32(a)
   213  		ptr = unsafe.Pointer(uintptr(ptr) + 1)
   214  	} else if a, b := a&0x7f, (*src)[1]; b < 128 {
   215  		value = uint32(b)<<7 | uint32(a)
   216  		ptr = unsafe.Pointer(uintptr(ptr) + 2)
   217  	} else if b, c := b&0x7f, (*src)[2]; c < 128 {
   218  		value = uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   219  		ptr = unsafe.Pointer(uintptr(ptr) + 3)
   220  	} else if c, d := c&0x7f, (*src)[3]; d < 128 {
   221  		value = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   222  		ptr = unsafe.Pointer(uintptr(ptr) + 4)
   223  	} else {
   224  		d, e := d&0x7f, (*src)[4]
   225  		value = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   226  		ptr = unsafe.Pointer(uintptr(ptr) + 5)
   227  	}
   228  
   229  	unsharedKey := getBytes(ptr, int(unshared))
   230  	i.fullKey = append(i.fullKey[:shared], unsharedKey...)
   231  	if shared == 0 {
   232  		// Provide stability for the key across positioning calls if the key
   233  		// doesn't share a prefix with the previous key. This removes requiring the
   234  		// key to be copied if the caller knows the block has a restart interval of
   235  		// 1. An important example of this is range-del blocks.
   236  		i.key = unsharedKey
   237  	} else {
   238  		i.key = i.fullKey
   239  	}
   240  	ptr = unsafe.Pointer(uintptr(ptr) + uintptr(unshared))
   241  	i.val = getBytes(ptr, int(value))
   242  	i.nextOffset = int32(uintptr(ptr)-uintptr(i.ptr)) + int32(value)
   243  }
   244  
   245  func (i *blockIter) decodeInternalKey(key []byte) {
   246  	// Manually inlining base.DecodeInternalKey provides a 5-10% speedup on
   247  	// BlockIter benchmarks.
   248  	if n := len(key) - 8; n >= 0 {
   249  		i.ikey.Trailer = binary.LittleEndian.Uint64(key[n:])
   250  		i.ikey.UserKey = key[:n:n]
   251  		if i.globalSeqNum != 0 {
   252  			i.ikey.SetSeqNum(i.globalSeqNum)
   253  		}
   254  	} else {
   255  		i.ikey.Trailer = uint64(InternalKeyKindInvalid)
   256  		i.ikey.UserKey = nil
   257  	}
   258  }
   259  
   260  func (i *blockIter) clearCache() {
   261  	i.cached = i.cached[:0]
   262  	i.cachedBuf = i.cachedBuf[:0]
   263  }
   264  
   265  func (i *blockIter) cacheEntry() {
   266  	var valStart int32
   267  	valSize := int32(len(i.val))
   268  	if valSize > 0 {
   269  		valStart = int32(uintptr(unsafe.Pointer(&i.val[0])) - uintptr(i.ptr))
   270  	}
   271  
   272  	i.cached = append(i.cached, blockEntry{
   273  		offset:   i.offset,
   274  		keyStart: int32(len(i.cachedBuf)),
   275  		keyEnd:   int32(len(i.cachedBuf) + len(i.key)),
   276  		valStart: valStart,
   277  		valSize:  valSize,
   278  	})
   279  	i.cachedBuf = append(i.cachedBuf, i.key...)
   280  }
   281  
   282  // SeekGE implements internalIterator.SeekGE, as documented in the pebble
   283  // package.
   284  func (i *blockIter) SeekGE(key []byte) (*InternalKey, []byte) {
   285  	ikey := base.MakeSearchKey(key)
   286  
   287  	// Find the index of the smallest restart point whose key is > the key
   288  	// sought; index will be numRestarts if there is no such restart point.
   289  	i.offset = 0
   290  	var index int32
   291  
   292  	{
   293  		// NB: manually inlined sort.Seach is ~5% faster.
   294  		//
   295  		// Define f(-1) == false and f(n) == true.
   296  		// Invariant: f(index-1) == false, f(upper) == true.
   297  		upper := i.numRestarts
   298  		for index < upper {
   299  			h := int32(uint(index+upper) >> 1) // avoid overflow when computing h
   300  			// index ≤ h < upper
   301  			offset := int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*h:]))
   302  			// For a restart point, there are 0 bytes shared with the previous key.
   303  			// The varint encoding of 0 occupies 1 byte.
   304  			ptr := unsafe.Pointer(uintptr(i.ptr) + uintptr(offset+1))
   305  
   306  			// Decode the key at that restart point, and compare it to the key
   307  			// sought. See the comment in readEntry for why we manually inline the
   308  			// varint decoding.
   309  			var v1 uint32
   310  			src := (*[5]uint8)(ptr)
   311  			if a := (*src)[0]; a < 128 {
   312  				v1 = uint32(a)
   313  				ptr = unsafe.Pointer(uintptr(ptr) + 1)
   314  			} else if a, b := a&0x7f, (*src)[1]; b < 128 {
   315  				v1 = uint32(b)<<7 | uint32(a)
   316  				ptr = unsafe.Pointer(uintptr(ptr) + 2)
   317  			} else if b, c := b&0x7f, (*src)[2]; c < 128 {
   318  				v1 = uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   319  				ptr = unsafe.Pointer(uintptr(ptr) + 3)
   320  			} else if c, d := c&0x7f, (*src)[3]; d < 128 {
   321  				v1 = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   322  				ptr = unsafe.Pointer(uintptr(ptr) + 4)
   323  			} else {
   324  				d, e := d&0x7f, (*src)[4]
   325  				v1 = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   326  				ptr = unsafe.Pointer(uintptr(ptr) + 5)
   327  			}
   328  
   329  			if src := (*[5]uint8)(ptr); (*src)[0] < 128 {
   330  				ptr = unsafe.Pointer(uintptr(ptr) + 1)
   331  			} else if (*src)[1] < 128 {
   332  				ptr = unsafe.Pointer(uintptr(ptr) + 2)
   333  			} else if (*src)[2] < 128 {
   334  				ptr = unsafe.Pointer(uintptr(ptr) + 3)
   335  			} else if (*src)[3] < 128 {
   336  				ptr = unsafe.Pointer(uintptr(ptr) + 4)
   337  			} else {
   338  				ptr = unsafe.Pointer(uintptr(ptr) + 5)
   339  			}
   340  
   341  			// Manually inlining base.DecodeInternalKey provides a 5-10% speedup on
   342  			// BlockIter benchmarks.
   343  			s := getBytes(ptr, int(v1))
   344  			var k InternalKey
   345  			if n := len(s) - 8; n >= 0 {
   346  				k.Trailer = binary.LittleEndian.Uint64(s[n:])
   347  				k.UserKey = s[:n:n]
   348  				// NB: We can't have duplicate keys if the globalSeqNum != 0, so we
   349  				// leave the seqnum on this key as 0 as it won't affect our search
   350  				// since ikey has the maximum seqnum.
   351  			} else {
   352  				k.Trailer = uint64(InternalKeyKindInvalid)
   353  			}
   354  
   355  			if base.InternalCompare(i.cmp, ikey, k) >= 0 {
   356  				index = h + 1 // preserves f(i-1) == false
   357  			} else {
   358  				upper = h // preserves f(j) == true
   359  			}
   360  		}
   361  		// index == upper, f(index-1) == false, and f(upper) (= f(index)) == true
   362  		// => answer is index.
   363  	}
   364  
   365  	// Since keys are strictly increasing, if index > 0 then the restart point at
   366  	// index-1 will be the largest whose key is <= the key sought.  If index ==
   367  	// 0, then all keys in this block are larger than the key sought, and offset
   368  	// remains at zero.
   369  	if index > 0 {
   370  		i.offset = int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*(index-1):]))
   371  	}
   372  	i.readEntry()
   373  	i.decodeInternalKey(i.key)
   374  
   375  	// Iterate from that restart point to somewhere >= the key sought.
   376  	for ; i.Valid(); i.Next() {
   377  		if base.InternalCompare(i.cmp, i.ikey, ikey) >= 0 {
   378  			return &i.ikey, i.val
   379  		}
   380  	}
   381  
   382  	return nil, nil
   383  }
   384  
   385  // SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the
   386  // pebble package.
   387  func (i *blockIter) SeekPrefixGE(prefix, key []byte) (*InternalKey, []byte) {
   388  	// This should never be called as prefix iteration is handled by sstable.Iterator.
   389  	panic("pebble: SeekPrefixGE unimplemented")
   390  }
   391  
   392  // SeekLT implements internalIterator.SeekLT, as documented in the pebble
   393  // package.
   394  func (i *blockIter) SeekLT(key []byte) (*InternalKey, []byte) {
   395  	ikey := base.MakeSearchKey(key)
   396  
   397  	// Find the index of the smallest restart point whose key is >= the key
   398  	// sought; index will be numRestarts if there is no such restart point.
   399  	i.offset = 0
   400  	var index int32
   401  
   402  	{
   403  		// NB: manually inlined sort.Search is ~5% faster.
   404  		//
   405  		// Define f(-1) == false and f(n) == true.
   406  		// Invariant: f(index-1) == false, f(upper) == true.
   407  		upper := i.numRestarts
   408  		for index < upper {
   409  			h := int32(uint(index+upper) >> 1) // avoid overflow when computing h
   410  			// index ≤ h < upper
   411  			offset := int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*h:]))
   412  			// For a restart point, there are 0 bytes shared with the previous key.
   413  			// The varint encoding of 0 occupies 1 byte.
   414  			ptr := unsafe.Pointer(uintptr(i.ptr) + uintptr(offset+1))
   415  
   416  			// Decode the key at that restart point, and compare it to the key
   417  			// sought. See the comment in readEntry for why we manually inline the
   418  			// varint decoding.
   419  			var v1 uint32
   420  			src := (*[5]uint8)(ptr)
   421  			if a := (*src)[0]; a < 128 {
   422  				v1 = uint32(a)
   423  				ptr = unsafe.Pointer(uintptr(ptr) + 1)
   424  			} else if a, b := a&0x7f, (*src)[1]; b < 128 {
   425  				v1 = uint32(b)<<7 | uint32(a)
   426  				ptr = unsafe.Pointer(uintptr(ptr) + 2)
   427  			} else if b, c := b&0x7f, (*src)[2]; c < 128 {
   428  				v1 = uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   429  				ptr = unsafe.Pointer(uintptr(ptr) + 3)
   430  			} else if c, d := c&0x7f, (*src)[3]; d < 128 {
   431  				v1 = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   432  				ptr = unsafe.Pointer(uintptr(ptr) + 4)
   433  			} else {
   434  				d, e := d&0x7f, (*src)[4]
   435  				v1 = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
   436  				ptr = unsafe.Pointer(uintptr(ptr) + 5)
   437  			}
   438  
   439  			if src := (*[5]uint8)(ptr); (*src)[0] < 128 {
   440  				ptr = unsafe.Pointer(uintptr(ptr) + 1)
   441  			} else if (*src)[1] < 128 {
   442  				ptr = unsafe.Pointer(uintptr(ptr) + 2)
   443  			} else if (*src)[2] < 128 {
   444  				ptr = unsafe.Pointer(uintptr(ptr) + 3)
   445  			} else if (*src)[3] < 128 {
   446  				ptr = unsafe.Pointer(uintptr(ptr) + 4)
   447  			} else {
   448  				ptr = unsafe.Pointer(uintptr(ptr) + 5)
   449  			}
   450  
   451  			// Manually inlining base.DecodeInternalKey provides a 5-10% speedup on
   452  			// BlockIter benchmarks.
   453  			s := getBytes(ptr, int(v1))
   454  			var k InternalKey
   455  			if n := len(s) - 8; n >= 0 {
   456  				k.Trailer = binary.LittleEndian.Uint64(s[n:])
   457  				k.UserKey = s[:n:n]
   458  				// NB: We can't have duplicate keys if the globalSeqNum != 0, so we
   459  				// leave the seqnum on this key as 0 as it won't affect our search
   460  				// since ikey has the maximum seqnum.
   461  			} else {
   462  				k.Trailer = uint64(InternalKeyKindInvalid)
   463  			}
   464  
   465  			if base.InternalCompare(i.cmp, ikey, k) > 0 {
   466  				index = h + 1 // preserves f(i-1) == false
   467  			} else {
   468  				upper = h // preserves f(j) == true
   469  			}
   470  		}
   471  		// index == upper, f(index-1) == false, and f(upper) (= f(index)) == true
   472  		// => answer is index.
   473  	}
   474  
   475  	// Since keys are strictly increasing, if index > 0 then the restart point at
   476  	// index-1 will be the largest whose key is < the key sought.
   477  	if index > 0 {
   478  		i.offset = int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*(index-1):]))
   479  	} else if index == 0 {
   480  		// If index == 0 then all keys in this block are larger than the key
   481  		// sought.
   482  		i.offset = -1
   483  		i.nextOffset = 0
   484  		return nil, nil
   485  	}
   486  
   487  	// Iterate from that restart point to somewhere >= the key sought, then back
   488  	// up to the previous entry. The expectation is that we'll be performing
   489  	// reverse iteration, so we cache the entries as we advance forward.
   490  	i.clearCache()
   491  	i.nextOffset = i.offset
   492  
   493  	for {
   494  		i.offset = i.nextOffset
   495  		i.readEntry()
   496  		i.decodeInternalKey(i.key)
   497  		i.cacheEntry()
   498  
   499  		if i.cmp(i.ikey.UserKey, ikey.UserKey) >= 0 {
   500  			// The current key is greater than or equal to our search key. Back up to
   501  			// the previous key which was less than our search key.
   502  			i.Prev()
   503  			return &i.ikey, i.val
   504  		}
   505  
   506  		if i.nextOffset >= i.restarts {
   507  			// We've reached the end of the block. Return the current key.
   508  			break
   509  		}
   510  	}
   511  
   512  	if !i.Valid() {
   513  		return nil, nil
   514  	}
   515  	return &i.ikey, i.val
   516  }
   517  
   518  // First implements internalIterator.First, as documented in the pebble
   519  // package.
   520  func (i *blockIter) First() (*InternalKey, []byte) {
   521  	i.offset = 0
   522  	if !i.Valid() {
   523  		return nil, nil
   524  	}
   525  	i.readEntry()
   526  	i.decodeInternalKey(i.key)
   527  	return &i.ikey, i.val
   528  }
   529  
   530  // Last implements internalIterator.Last, as documented in the pebble package.
   531  func (i *blockIter) Last() (*InternalKey, []byte) {
   532  	// Seek forward from the last restart point.
   533  	i.offset = int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*(i.numRestarts-1):]))
   534  	if !i.Valid() {
   535  		return nil, nil
   536  	}
   537  
   538  	i.readEntry()
   539  	i.clearCache()
   540  	i.cacheEntry()
   541  
   542  	for i.nextOffset < i.restarts {
   543  		i.offset = i.nextOffset
   544  		i.readEntry()
   545  		i.cacheEntry()
   546  	}
   547  
   548  	i.decodeInternalKey(i.key)
   549  	return &i.ikey, i.val
   550  }
   551  
   552  // Next implements internalIterator.Next, as documented in the pebble
   553  // package.
   554  func (i *blockIter) Next() (*InternalKey, []byte) {
   555  	i.offset = i.nextOffset
   556  	if !i.Valid() {
   557  		return nil, nil
   558  	}
   559  	i.readEntry()
   560  	// Manually inlined version of i.decodeInternalKey(i.key).
   561  	if n := len(i.key) - 8; n >= 0 {
   562  		i.ikey.Trailer = binary.LittleEndian.Uint64(i.key[n:])
   563  		i.ikey.UserKey = i.key[:n:n]
   564  		if i.globalSeqNum != 0 {
   565  			i.ikey.SetSeqNum(i.globalSeqNum)
   566  		}
   567  	} else {
   568  		i.ikey.Trailer = uint64(InternalKeyKindInvalid)
   569  		i.ikey.UserKey = nil
   570  	}
   571  	return &i.ikey, i.val
   572  }
   573  
   574  // Prev implements internalIterator.Prev, as documented in the pebble
   575  // package.
   576  func (i *blockIter) Prev() (*InternalKey, []byte) {
   577  	if n := len(i.cached) - 1; n > 0 && i.cached[n].offset == i.offset {
   578  		i.nextOffset = i.offset
   579  		e := &i.cached[n-1]
   580  		i.offset = e.offset
   581  		i.val = getBytes(unsafe.Pointer(uintptr(i.ptr)+uintptr(e.valStart)), int(e.valSize))
   582  		// Manually inlined version of i.decodeInternalKey(key).
   583  		key := i.cachedBuf[e.keyStart:e.keyEnd]
   584  		if n := len(key) - 8; n >= 0 {
   585  			i.ikey.Trailer = binary.LittleEndian.Uint64(key[n:])
   586  			i.ikey.UserKey = key[:n:n]
   587  			if i.globalSeqNum != 0 {
   588  				i.ikey.SetSeqNum(i.globalSeqNum)
   589  			}
   590  		} else {
   591  			i.ikey.Trailer = uint64(InternalKeyKindInvalid)
   592  			i.ikey.UserKey = nil
   593  		}
   594  		i.cached = i.cached[:n]
   595  		return &i.ikey, i.val
   596  	}
   597  
   598  	if i.offset == 0 {
   599  		i.offset = -1
   600  		i.nextOffset = 0
   601  		return nil, nil
   602  	}
   603  
   604  	targetOffset := i.offset
   605  	var index int32
   606  
   607  	{
   608  		// NB: manually inlined sort.Sort is ~5% faster.
   609  		//
   610  		// Define f(-1) == false and f(n) == true.
   611  		// Invariant: f(index-1) == false, f(upper) == true.
   612  		upper := i.numRestarts
   613  		for index < upper {
   614  			h := int32(uint(index+upper) >> 1) // avoid overflow when computing h
   615  			// index ≤ h < upper
   616  			offset := int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*h:]))
   617  			if offset < targetOffset {
   618  				index = h + 1 // preserves f(i-1) == false
   619  			} else {
   620  				upper = h // preserves f(j) == true
   621  			}
   622  		}
   623  		// index == upper, f(index-1) == false, and f(upper) (= f(index)) == true
   624  		// => answer is index.
   625  	}
   626  
   627  	i.offset = 0
   628  	if index > 0 {
   629  		i.offset = int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*(index-1):]))
   630  	}
   631  
   632  	i.readEntry()
   633  	i.clearCache()
   634  	i.cacheEntry()
   635  
   636  	for i.nextOffset < targetOffset {
   637  		i.offset = i.nextOffset
   638  		i.readEntry()
   639  		i.cacheEntry()
   640  	}
   641  
   642  	i.decodeInternalKey(i.key)
   643  	return &i.ikey, i.val
   644  }
   645  
   646  // Key implements internalIterator.Key, as documented in the pebble package.
   647  func (i *blockIter) Key() *InternalKey {
   648  	return &i.ikey
   649  }
   650  
   651  // Value implements internalIterator.Value, as documented in the pebble
   652  // package.
   653  func (i *blockIter) Value() []byte {
   654  	return i.val
   655  }
   656  
   657  // Valid implements internalIterator.Valid, as documented in the pebble
   658  // package.
   659  func (i *blockIter) Valid() bool {
   660  	return i.offset >= 0 && i.offset < i.restarts
   661  }
   662  
   663  // Error implements internalIterator.Error, as documented in the pebble
   664  // package.
   665  func (i *blockIter) Error() error {
   666  	return i.err
   667  }
   668  
   669  // Close implements internalIterator.Close, as documented in the pebble
   670  // package.
   671  func (i *blockIter) Close() error {
   672  	i.cacheHandle.Release()
   673  	i.val = nil
   674  	return i.err
   675  }
   676  
   677  func (i *blockIter) SetBounds(lower, upper []byte) {
   678  	// This should never be called as bounds are handled by sstable.Iterator.
   679  	panic("pebble: SetBounds unimplemented")
   680  }
   681  
   682  // invalidate the iterator, positioning it below the first entry.
   683  func (i *blockIter) invalidateLower() {
   684  	i.offset = -1
   685  }
   686  
   687  // invalidate the iterator, positioning it after the last entry.
   688  func (i *blockIter) invalidateUpper() {
   689  	i.offset = i.restarts
   690  }