github.com/df-mc/goleveldb@v1.1.9/leveldb/table/reader.go (about)

     1  // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
     2  // All rights reserved.
     3  //
     4  // Use of this source code is governed by a BSD-style license that can be
     5  // found in the LICENSE file.
     6  
     7  package table
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/binary"
    12  	"fmt"
    13  	"github.com/klauspost/compress/flate"
    14  	"io"
    15  	"sort"
    16  	"strings"
    17  	"sync"
    18  
    19  	"github.com/golang/snappy"
    20  
    21  	"github.com/df-mc/goleveldb/leveldb/cache"
    22  	"github.com/df-mc/goleveldb/leveldb/comparer"
    23  	"github.com/df-mc/goleveldb/leveldb/errors"
    24  	"github.com/df-mc/goleveldb/leveldb/filter"
    25  	"github.com/df-mc/goleveldb/leveldb/iterator"
    26  	"github.com/df-mc/goleveldb/leveldb/opt"
    27  	"github.com/df-mc/goleveldb/leveldb/storage"
    28  	"github.com/df-mc/goleveldb/leveldb/util"
    29  )
    30  
    31  // Reader errors.
    32  var (
    33  	ErrNotFound       = errors.ErrNotFound
    34  	ErrReaderReleased = errors.New("leveldb/table: reader released")
    35  	ErrIterReleased   = errors.New("leveldb/table: iterator released")
    36  )
    37  
    38  // ErrCorrupted describes error due to corruption. This error will be wrapped
    39  // with errors.ErrCorrupted.
    40  type ErrCorrupted struct {
    41  	Pos    int64
    42  	Size   int64
    43  	Kind   string
    44  	Reason string
    45  }
    46  
    47  func (e *ErrCorrupted) Error() string {
    48  	return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
    49  }
    50  
    51  func max(x, y int) int {
    52  	if x > y {
    53  		return x
    54  	}
    55  	return y
    56  }
    57  
    58  type block struct {
    59  	bpool          *util.BufferPool
    60  	bh             blockHandle
    61  	data           []byte
    62  	restartsLen    int
    63  	restartsOffset int
    64  }
    65  
    66  func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
    67  	index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
    68  		offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
    69  		offset++                                    // shared always zero, since this is a restart point
    70  		v1, n1 := binary.Uvarint(b.data[offset:])   // key length
    71  		_, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
    72  		m := offset + n1 + n2
    73  		return cmp.Compare(b.data[m:m+int(v1)], key) > 0
    74  	}) + rstart - 1
    75  	if index < rstart {
    76  		// The smallest key is greater-than key sought.
    77  		index = rstart
    78  	}
    79  	offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
    80  	return
    81  }
    82  
    83  func (b *block) restartIndex(rstart, rlimit, offset int) int {
    84  	return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
    85  		return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset
    86  	}) + rstart - 1
    87  }
    88  
    89  func (b *block) restartOffset(index int) int {
    90  	return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
    91  }
    92  
    93  func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
    94  	if offset >= b.restartsOffset {
    95  		if offset != b.restartsOffset {
    96  			err = &ErrCorrupted{Reason: "entries offset not aligned"}
    97  		}
    98  		return
    99  	}
   100  	v0, n0 := binary.Uvarint(b.data[offset:])       // Shared prefix length
   101  	v1, n1 := binary.Uvarint(b.data[offset+n0:])    // Key length
   102  	v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length
   103  	m := n0 + n1 + n2
   104  	n = m + int(v1) + int(v2)
   105  	if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
   106  		err = &ErrCorrupted{Reason: "entries corrupted"}
   107  		return
   108  	}
   109  	key = b.data[offset+m : offset+m+int(v1)]
   110  	value = b.data[offset+m+int(v1) : offset+n]
   111  	nShared = int(v0)
   112  	return
   113  }
   114  
   115  func (b *block) Release() {
   116  	b.bpool.Put(b.data)
   117  	b.bpool = nil
   118  	b.data = nil
   119  }
   120  
   121  type dir int
   122  
   123  const (
   124  	dirReleased dir = iota - 1
   125  	dirSOI
   126  	dirEOI
   127  	dirBackward
   128  	dirForward
   129  )
   130  
   131  type blockIter struct {
   132  	tr            *Reader
   133  	block         *block
   134  	blockReleaser util.Releaser
   135  	releaser      util.Releaser
   136  	key, value    []byte
   137  	offset        int
   138  	// Previous offset, only filled by Next.
   139  	prevOffset   int
   140  	prevNode     []int
   141  	prevKeys     []byte
   142  	restartIndex int
   143  	// Iterator direction.
   144  	dir dir
   145  	// Restart index slice range.
   146  	riStart int
   147  	riLimit int
   148  	// Offset slice range.
   149  	offsetStart     int
   150  	offsetRealStart int
   151  	offsetLimit     int
   152  	// Error.
   153  	err error
   154  }
   155  
   156  func (i *blockIter) sErr(err error) {
   157  	i.err = err
   158  	i.key = nil
   159  	i.value = nil
   160  	i.prevNode = nil
   161  	i.prevKeys = nil
   162  }
   163  
   164  func (i *blockIter) reset() {
   165  	if i.dir == dirBackward {
   166  		i.prevNode = i.prevNode[:0]
   167  		i.prevKeys = i.prevKeys[:0]
   168  	}
   169  	i.restartIndex = i.riStart
   170  	i.offset = i.offsetStart
   171  	i.dir = dirSOI
   172  	i.key = i.key[:0]
   173  	i.value = nil
   174  }
   175  
   176  func (i *blockIter) isFirst() bool {
   177  	switch i.dir {
   178  	case dirForward:
   179  		return i.prevOffset == i.offsetRealStart
   180  	case dirBackward:
   181  		return len(i.prevNode) == 1 && i.restartIndex == i.riStart
   182  	}
   183  	return false
   184  }
   185  
   186  func (i *blockIter) isLast() bool {
   187  	switch i.dir {
   188  	case dirForward, dirBackward:
   189  		return i.offset == i.offsetLimit
   190  	}
   191  	return false
   192  }
   193  
   194  func (i *blockIter) First() bool {
   195  	if i.err != nil {
   196  		return false
   197  	} else if i.dir == dirReleased {
   198  		i.err = ErrIterReleased
   199  		return false
   200  	}
   201  
   202  	if i.dir == dirBackward {
   203  		i.prevNode = i.prevNode[:0]
   204  		i.prevKeys = i.prevKeys[:0]
   205  	}
   206  	i.dir = dirSOI
   207  	return i.Next()
   208  }
   209  
   210  func (i *blockIter) Last() bool {
   211  	if i.err != nil {
   212  		return false
   213  	} else if i.dir == dirReleased {
   214  		i.err = ErrIterReleased
   215  		return false
   216  	}
   217  
   218  	if i.dir == dirBackward {
   219  		i.prevNode = i.prevNode[:0]
   220  		i.prevKeys = i.prevKeys[:0]
   221  	}
   222  	i.dir = dirEOI
   223  	return i.Prev()
   224  }
   225  
   226  func (i *blockIter) Seek(key []byte) bool {
   227  	if i.err != nil {
   228  		return false
   229  	} else if i.dir == dirReleased {
   230  		i.err = ErrIterReleased
   231  		return false
   232  	}
   233  
   234  	ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
   235  	if err != nil {
   236  		i.sErr(err)
   237  		return false
   238  	}
   239  	i.restartIndex = ri
   240  	i.offset = max(i.offsetStart, offset)
   241  	if i.dir == dirSOI || i.dir == dirEOI {
   242  		i.dir = dirForward
   243  	}
   244  	for i.Next() {
   245  		if i.tr.cmp.Compare(i.key, key) >= 0 {
   246  			return true
   247  		}
   248  	}
   249  	return false
   250  }
   251  
   252  func (i *blockIter) Next() bool {
   253  	if i.dir == dirEOI || i.err != nil {
   254  		return false
   255  	} else if i.dir == dirReleased {
   256  		i.err = ErrIterReleased
   257  		return false
   258  	}
   259  
   260  	if i.dir == dirSOI {
   261  		i.restartIndex = i.riStart
   262  		i.offset = i.offsetStart
   263  	} else if i.dir == dirBackward {
   264  		i.prevNode = i.prevNode[:0]
   265  		i.prevKeys = i.prevKeys[:0]
   266  	}
   267  	for i.offset < i.offsetRealStart {
   268  		key, value, nShared, n, err := i.block.entry(i.offset)
   269  		if err != nil {
   270  			i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
   271  			return false
   272  		}
   273  		if n == 0 {
   274  			i.dir = dirEOI
   275  			return false
   276  		}
   277  		i.key = append(i.key[:nShared], key...)
   278  		i.value = value
   279  		i.offset += n
   280  	}
   281  	if i.offset >= i.offsetLimit {
   282  		i.dir = dirEOI
   283  		if i.offset != i.offsetLimit {
   284  			i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
   285  		}
   286  		return false
   287  	}
   288  	key, value, nShared, n, err := i.block.entry(i.offset)
   289  	if err != nil {
   290  		i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
   291  		return false
   292  	}
   293  	if n == 0 {
   294  		i.dir = dirEOI
   295  		return false
   296  	}
   297  	i.key = append(i.key[:nShared], key...)
   298  	i.value = value
   299  	i.prevOffset = i.offset
   300  	i.offset += n
   301  	i.dir = dirForward
   302  	return true
   303  }
   304  
   305  func (i *blockIter) Prev() bool {
   306  	if i.dir == dirSOI || i.err != nil {
   307  		return false
   308  	} else if i.dir == dirReleased {
   309  		i.err = ErrIterReleased
   310  		return false
   311  	}
   312  
   313  	var ri int
   314  	if i.dir == dirForward {
   315  		// Change direction.
   316  		i.offset = i.prevOffset
   317  		if i.offset == i.offsetRealStart {
   318  			i.dir = dirSOI
   319  			return false
   320  		}
   321  		ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset)
   322  		i.dir = dirBackward
   323  	} else if i.dir == dirEOI {
   324  		// At the end of iterator.
   325  		i.restartIndex = i.riLimit
   326  		i.offset = i.offsetLimit
   327  		if i.offset == i.offsetRealStart {
   328  			i.dir = dirSOI
   329  			return false
   330  		}
   331  		ri = i.riLimit - 1
   332  		i.dir = dirBackward
   333  	} else if len(i.prevNode) == 1 {
   334  		// This is the end of a restart range.
   335  		i.offset = i.prevNode[0]
   336  		i.prevNode = i.prevNode[:0]
   337  		if i.restartIndex == i.riStart {
   338  			i.dir = dirSOI
   339  			return false
   340  		}
   341  		i.restartIndex--
   342  		ri = i.restartIndex
   343  	} else {
   344  		// In the middle of restart range, get from cache.
   345  		n := len(i.prevNode) - 3
   346  		node := i.prevNode[n:]
   347  		i.prevNode = i.prevNode[:n]
   348  		// Get the key.
   349  		ko := node[0]
   350  		i.key = append(i.key[:0], i.prevKeys[ko:]...)
   351  		i.prevKeys = i.prevKeys[:ko]
   352  		// Get the value.
   353  		vo := node[1]
   354  		vl := vo + node[2]
   355  		i.value = i.block.data[vo:vl]
   356  		i.offset = vl
   357  		return true
   358  	}
   359  	// Build entries cache.
   360  	i.key = i.key[:0]
   361  	i.value = nil
   362  	offset := i.block.restartOffset(ri)
   363  	if offset == i.offset {
   364  		ri--
   365  		if ri < 0 {
   366  			i.dir = dirSOI
   367  			return false
   368  		}
   369  		offset = i.block.restartOffset(ri)
   370  	}
   371  	i.prevNode = append(i.prevNode, offset)
   372  	for {
   373  		key, value, nShared, n, err := i.block.entry(offset)
   374  		if err != nil {
   375  			i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
   376  			return false
   377  		}
   378  		if offset >= i.offsetRealStart {
   379  			if i.value != nil {
   380  				// Appends 3 variables:
   381  				// 1. Previous keys offset
   382  				// 2. Value offset in the data block
   383  				// 3. Value length
   384  				i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value))
   385  				i.prevKeys = append(i.prevKeys, i.key...)
   386  			}
   387  			i.value = value
   388  		}
   389  		i.key = append(i.key[:nShared], key...)
   390  		offset += n
   391  		// Stop if target offset reached.
   392  		if offset >= i.offset {
   393  			if offset != i.offset {
   394  				i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
   395  				return false
   396  			}
   397  
   398  			break
   399  		}
   400  	}
   401  	i.restartIndex = ri
   402  	i.offset = offset
   403  	return true
   404  }
   405  
   406  func (i *blockIter) Key() []byte {
   407  	if i.err != nil || i.dir <= dirEOI {
   408  		return nil
   409  	}
   410  	return i.key
   411  }
   412  
   413  func (i *blockIter) Value() []byte {
   414  	if i.err != nil || i.dir <= dirEOI {
   415  		return nil
   416  	}
   417  	return i.value
   418  }
   419  
   420  func (i *blockIter) Release() {
   421  	if i.dir != dirReleased {
   422  		i.tr = nil
   423  		i.block = nil
   424  		i.prevNode = nil
   425  		i.prevKeys = nil
   426  		i.key = nil
   427  		i.value = nil
   428  		i.dir = dirReleased
   429  		if i.blockReleaser != nil {
   430  			i.blockReleaser.Release()
   431  			i.blockReleaser = nil
   432  		}
   433  		if i.releaser != nil {
   434  			i.releaser.Release()
   435  			i.releaser = nil
   436  		}
   437  	}
   438  }
   439  
   440  func (i *blockIter) SetReleaser(releaser util.Releaser) {
   441  	if i.dir == dirReleased {
   442  		panic(util.ErrReleased)
   443  	}
   444  	if i.releaser != nil && releaser != nil {
   445  		panic(util.ErrHasReleaser)
   446  	}
   447  	i.releaser = releaser
   448  }
   449  
   450  func (i *blockIter) Valid() bool {
   451  	return i.err == nil && (i.dir == dirBackward || i.dir == dirForward)
   452  }
   453  
   454  func (i *blockIter) Error() error {
   455  	return i.err
   456  }
   457  
   458  type filterBlock struct {
   459  	bpool      *util.BufferPool
   460  	data       []byte
   461  	oOffset    int
   462  	baseLg     uint
   463  	filtersNum int
   464  }
   465  
   466  func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
   467  	i := int(offset >> b.baseLg)
   468  	if i < b.filtersNum {
   469  		o := b.data[b.oOffset+i*4:]
   470  		n := int(binary.LittleEndian.Uint32(o))
   471  		m := int(binary.LittleEndian.Uint32(o[4:]))
   472  		if n < m && m <= b.oOffset {
   473  			return filter.Contains(b.data[n:m], key)
   474  		} else if n == m {
   475  			return false
   476  		}
   477  	}
   478  	return true
   479  }
   480  
   481  func (b *filterBlock) Release() {
   482  	b.bpool.Put(b.data)
   483  	b.bpool = nil
   484  	b.data = nil
   485  }
   486  
   487  type indexIter struct {
   488  	*blockIter
   489  	tr    *Reader
   490  	slice *util.Range
   491  	// Options
   492  	fillCache bool
   493  }
   494  
   495  func (i *indexIter) Get() iterator.Iterator {
   496  	value := i.Value()
   497  	if value == nil {
   498  		return nil
   499  	}
   500  	dataBH, n := decodeBlockHandle(value)
   501  	if n == 0 {
   502  		return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
   503  	}
   504  
   505  	var slice *util.Range
   506  	if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
   507  		slice = i.slice
   508  	}
   509  	return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
   510  }
   511  
   512  // Reader is a table reader.
   513  type Reader struct {
   514  	mu     sync.RWMutex
   515  	fd     storage.FileDesc
   516  	reader io.ReaderAt
   517  	cache  *cache.NamespaceGetter
   518  	err    error
   519  	bpool  *util.BufferPool
   520  	// Options
   521  	o              *opt.Options
   522  	cmp            comparer.Comparer
   523  	filter         filter.Filter
   524  	verifyChecksum bool
   525  
   526  	dataEnd                   int64
   527  	metaBH, indexBH, filterBH blockHandle
   528  	indexBlock                *block
   529  	filterBlock               *filterBlock
   530  }
   531  
   532  func (r *Reader) blockKind(bh blockHandle) string {
   533  	switch bh.offset {
   534  	case r.metaBH.offset:
   535  		return "meta-block"
   536  	case r.indexBH.offset:
   537  		return "index-block"
   538  	case r.filterBH.offset:
   539  		if r.filterBH.length > 0 {
   540  			return "filter-block"
   541  		}
   542  	}
   543  	return "data-block"
   544  }
   545  
   546  func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
   547  	return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
   548  }
   549  
   550  func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
   551  	return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
   552  }
   553  
   554  func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
   555  	if cerr, ok := err.(*ErrCorrupted); ok {
   556  		cerr.Pos = int64(bh.offset)
   557  		cerr.Size = int64(bh.length)
   558  		cerr.Kind = r.blockKind(bh)
   559  		return &errors.ErrCorrupted{Fd: r.fd, Err: cerr}
   560  	}
   561  	return err
   562  }
   563  
   564  var bufferPool = sync.Pool{
   565  	New: func() interface{} {
   566  		return bytes.NewBuffer(make([]byte, 0, 4096))
   567  	},
   568  }
   569  var reader io.ReadCloser
   570  var rmu sync.Mutex
   571  
   572  func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
   573  	data := r.bpool.Get(int(bh.length + blockTrailerLen))
   574  	if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
   575  		return nil, err
   576  	}
   577  
   578  	if verifyChecksum {
   579  		n := bh.length + 1
   580  		checksum0 := binary.LittleEndian.Uint32(data[n:])
   581  		checksum1 := util.NewCRC(data[:n]).Value()
   582  		if checksum0 != checksum1 {
   583  			r.bpool.Put(data)
   584  			return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
   585  		}
   586  	}
   587  
   588  	switch data[bh.length] {
   589  	case blockTypeNoCompression:
   590  		data = data[:bh.length]
   591  	case blockTypeSnappyCompression:
   592  		decLen, err := snappy.DecodedLen(data[:bh.length])
   593  		if err != nil {
   594  			r.bpool.Put(data)
   595  			return nil, r.newErrCorruptedBH(bh, err.Error())
   596  		}
   597  		decData := r.bpool.Get(decLen)
   598  		decData, err = snappy.Decode(decData, data[:bh.length])
   599  		r.bpool.Put(data)
   600  		if err != nil {
   601  			r.bpool.Put(decData)
   602  			return nil, r.newErrCorruptedBH(bh, err.Error())
   603  		}
   604  		data = decData
   605  	case blockTypeFlateCompression:
   606  		buf := bufferPool.Get().(*bytes.Buffer)
   607  
   608  		rmu.Lock()
   609  		if reader == nil {
   610  			reader = flate.NewReader(bytes.NewBuffer(data[:bh.length]))
   611  		} else {
   612  			_ = reader.(flate.Resetter).Reset(bytes.NewBuffer(data[:bh.length]), nil)
   613  		}
   614  		_, _ = buf.ReadFrom(reader)
   615  
   616  		if err := reader.Close(); err != nil {
   617  			buf.Reset()
   618  			bufferPool.Put(buf)
   619  			rmu.Unlock()
   620  			return nil, r.newErrCorruptedBH(bh, err.Error())
   621  		}
   622  		data = append([]byte(nil), buf.Bytes()...)
   623  		buf.Reset()
   624  		rmu.Unlock()
   625  
   626  		bufferPool.Put(buf)
   627  	default:
   628  		r.bpool.Put(data)
   629  		return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
   630  	}
   631  	return data, nil
   632  }
   633  
   634  func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
   635  	data, err := r.readRawBlock(bh, verifyChecksum)
   636  	if err != nil {
   637  		return nil, err
   638  	}
   639  	restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
   640  	b := &block{
   641  		bpool:          r.bpool,
   642  		bh:             bh,
   643  		data:           data,
   644  		restartsLen:    restartsLen,
   645  		restartsOffset: len(data) - (restartsLen+1)*4,
   646  	}
   647  	return b, nil
   648  }
   649  
   650  func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
   651  	if r.cache != nil {
   652  		var (
   653  			err error
   654  			ch  *cache.Handle
   655  		)
   656  		if fillCache {
   657  			ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
   658  				var b *block
   659  				b, err = r.readBlock(bh, verifyChecksum)
   660  				if err != nil {
   661  					return 0, nil
   662  				}
   663  				return cap(b.data), b
   664  			})
   665  		} else {
   666  			ch = r.cache.Get(bh.offset, nil)
   667  		}
   668  		if ch != nil {
   669  			b, ok := ch.Value().(*block)
   670  			if !ok {
   671  				ch.Release()
   672  				return nil, nil, errors.New("leveldb/table: inconsistent block type")
   673  			}
   674  			return b, ch, err
   675  		} else if err != nil {
   676  			return nil, nil, err
   677  		}
   678  	}
   679  
   680  	b, err := r.readBlock(bh, verifyChecksum)
   681  	return b, b, err
   682  }
   683  
   684  func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
   685  	data, err := r.readRawBlock(bh, true)
   686  	if err != nil {
   687  		return nil, err
   688  	}
   689  	n := len(data)
   690  	if n < 5 {
   691  		return nil, r.newErrCorruptedBH(bh, "too short")
   692  	}
   693  	m := n - 5
   694  	oOffset := int(binary.LittleEndian.Uint32(data[m:]))
   695  	if oOffset > m {
   696  		return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
   697  	}
   698  	b := &filterBlock{
   699  		bpool:      r.bpool,
   700  		data:       data,
   701  		oOffset:    oOffset,
   702  		baseLg:     uint(data[n-1]),
   703  		filtersNum: (m - oOffset) / 4,
   704  	}
   705  	return b, nil
   706  }
   707  
   708  func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) {
   709  	if r.cache != nil {
   710  		var (
   711  			err error
   712  			ch  *cache.Handle
   713  		)
   714  		if fillCache {
   715  			ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
   716  				var b *filterBlock
   717  				b, err = r.readFilterBlock(bh)
   718  				if err != nil {
   719  					return 0, nil
   720  				}
   721  				return cap(b.data), b
   722  			})
   723  		} else {
   724  			ch = r.cache.Get(bh.offset, nil)
   725  		}
   726  		if ch != nil {
   727  			b, ok := ch.Value().(*filterBlock)
   728  			if !ok {
   729  				ch.Release()
   730  				return nil, nil, errors.New("leveldb/table: inconsistent block type")
   731  			}
   732  			return b, ch, err
   733  		} else if err != nil {
   734  			return nil, nil, err
   735  		}
   736  	}
   737  
   738  	b, err := r.readFilterBlock(bh)
   739  	return b, b, err
   740  }
   741  
   742  func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
   743  	if r.indexBlock == nil {
   744  		return r.readBlockCached(r.indexBH, true, fillCache)
   745  	}
   746  	return r.indexBlock, util.NoopReleaser{}, nil
   747  }
   748  
   749  func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
   750  	if r.filterBlock == nil {
   751  		return r.readFilterBlockCached(r.filterBH, fillCache)
   752  	}
   753  	return r.filterBlock, util.NoopReleaser{}, nil
   754  }
   755  
   756  func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
   757  	bi := &blockIter{
   758  		tr:            r,
   759  		block:         b,
   760  		blockReleaser: bReleaser,
   761  		// Valid key should never be nil.
   762  		key:             make([]byte, 0),
   763  		dir:             dirSOI,
   764  		riStart:         0,
   765  		riLimit:         b.restartsLen,
   766  		offsetStart:     0,
   767  		offsetRealStart: 0,
   768  		offsetLimit:     b.restartsOffset,
   769  	}
   770  	if slice != nil {
   771  		if slice.Start != nil {
   772  			if bi.Seek(slice.Start) {
   773  				bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
   774  				bi.offsetStart = b.restartOffset(bi.riStart)
   775  				bi.offsetRealStart = bi.prevOffset
   776  			} else {
   777  				bi.riStart = b.restartsLen
   778  				bi.offsetStart = b.restartsOffset
   779  				bi.offsetRealStart = b.restartsOffset
   780  			}
   781  		}
   782  		if slice.Limit != nil {
   783  			if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
   784  				bi.offsetLimit = bi.prevOffset
   785  				bi.riLimit = bi.restartIndex + 1
   786  			}
   787  		}
   788  		bi.reset()
   789  		if bi.offsetStart > bi.offsetLimit {
   790  			bi.sErr(errors.New("leveldb/table: invalid slice range"))
   791  		}
   792  	}
   793  	return bi
   794  }
   795  
   796  func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
   797  	b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
   798  	if err != nil {
   799  		return iterator.NewEmptyIterator(err)
   800  	}
   801  	return r.newBlockIter(b, rel, slice, false)
   802  }
   803  
   804  func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
   805  	r.mu.RLock()
   806  	defer r.mu.RUnlock()
   807  
   808  	if r.err != nil {
   809  		return iterator.NewEmptyIterator(r.err)
   810  	}
   811  
   812  	return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
   813  }
   814  
   815  // NewIterator creates an iterator from the table.
   816  //
   817  // Slice allows slicing the iterator to only contains keys in the given
   818  // range. A nil Range.Start is treated as a key before all keys in the
   819  // table. And a nil Range.Limit is treated as a key after all keys in
   820  // the table.
   821  //
   822  // WARNING: Any slice returned by interator (e.g. slice returned by calling
   823  // Iterator.Key() or Iterator.Key() methods), its content should not be modified
   824  // unless noted otherwise.
   825  //
   826  // The returned iterator is not safe for concurrent use and should be released
   827  // after use.
   828  //
   829  // Also read Iterator documentation of the leveldb/iterator package.
   830  func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
   831  	r.mu.RLock()
   832  	defer r.mu.RUnlock()
   833  
   834  	if r.err != nil {
   835  		return iterator.NewEmptyIterator(r.err)
   836  	}
   837  
   838  	fillCache := !ro.GetDontFillCache()
   839  	indexBlock, rel, err := r.getIndexBlock(fillCache)
   840  	if err != nil {
   841  		return iterator.NewEmptyIterator(err)
   842  	}
   843  	index := &indexIter{
   844  		blockIter: r.newBlockIter(indexBlock, rel, slice, true),
   845  		tr:        r,
   846  		slice:     slice,
   847  		fillCache: !ro.GetDontFillCache(),
   848  	}
   849  	return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
   850  }
   851  
   852  func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
   853  	r.mu.RLock()
   854  	defer r.mu.RUnlock()
   855  
   856  	if r.err != nil {
   857  		err = r.err
   858  		return
   859  	}
   860  
   861  	indexBlock, rel, err := r.getIndexBlock(true)
   862  	if err != nil {
   863  		return
   864  	}
   865  	defer rel.Release()
   866  
   867  	index := r.newBlockIter(indexBlock, nil, nil, true)
   868  	defer index.Release()
   869  
   870  	if !index.Seek(key) {
   871  		if err = index.Error(); err == nil {
   872  			err = ErrNotFound
   873  		}
   874  		return
   875  	}
   876  
   877  	dataBH, n := decodeBlockHandle(index.Value())
   878  	if n == 0 {
   879  		r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
   880  		return nil, nil, r.err
   881  	}
   882  
   883  	// The filter should only used for exact match.
   884  	if filtered && r.filter != nil {
   885  		filterBlock, frel, ferr := r.getFilterBlock(true)
   886  		if ferr == nil {
   887  			if !filterBlock.contains(r.filter, dataBH.offset, key) {
   888  				frel.Release()
   889  				return nil, nil, ErrNotFound
   890  			}
   891  			frel.Release()
   892  		} else if !errors.IsCorrupted(ferr) {
   893  			return nil, nil, ferr
   894  		}
   895  	}
   896  
   897  	data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
   898  	if !data.Seek(key) {
   899  		data.Release()
   900  		if err = data.Error(); err != nil {
   901  			return
   902  		}
   903  
   904  		// The nearest greater-than key is the first key of the next block.
   905  		if !index.Next() {
   906  			if err = index.Error(); err == nil {
   907  				err = ErrNotFound
   908  			}
   909  			return
   910  		}
   911  
   912  		dataBH, n = decodeBlockHandle(index.Value())
   913  		if n == 0 {
   914  			r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
   915  			return nil, nil, r.err
   916  		}
   917  
   918  		data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
   919  		if !data.Next() {
   920  			data.Release()
   921  			if err = data.Error(); err == nil {
   922  				err = ErrNotFound
   923  			}
   924  			return
   925  		}
   926  	}
   927  
   928  	// Key doesn't use block buffer, no need to copy the buffer.
   929  	rkey = data.Key()
   930  	if !noValue {
   931  		if r.bpool == nil {
   932  			value = data.Value()
   933  		} else {
   934  			// Value does use block buffer, and since the buffer will be
   935  			// recycled, it need to be copied.
   936  			value = append([]byte{}, data.Value()...)
   937  		}
   938  	}
   939  	data.Release()
   940  	return
   941  }
   942  
   943  // Find finds key/value pair whose key is greater than or equal to the
   944  // given key. It returns ErrNotFound if the table doesn't contain
   945  // such pair.
   946  // If filtered is true then the nearest 'block' will be checked against
   947  // 'filter data' (if present) and will immediately return ErrNotFound if
   948  // 'filter data' indicates that such pair doesn't exist.
   949  //
   950  // The caller may modify the contents of the returned slice as it is its
   951  // own copy.
   952  // It is safe to modify the contents of the argument after Find returns.
   953  func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
   954  	return r.find(key, filtered, ro, false)
   955  }
   956  
   957  // FindKey finds key that is greater than or equal to the given key.
   958  // It returns ErrNotFound if the table doesn't contain such key.
   959  // If filtered is true then the nearest 'block' will be checked against
   960  // 'filter data' (if present) and will immediately return ErrNotFound if
   961  // 'filter data' indicates that such key doesn't exist.
   962  //
   963  // The caller may modify the contents of the returned slice as it is its
   964  // own copy.
   965  // It is safe to modify the contents of the argument after Find returns.
   966  func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
   967  	rkey, _, err = r.find(key, filtered, ro, true)
   968  	return
   969  }
   970  
   971  // Get gets the value for the given key. It returns errors.ErrNotFound
   972  // if the table does not contain the key.
   973  //
   974  // The caller may modify the contents of the returned slice as it is its
   975  // own copy.
   976  // It is safe to modify the contents of the argument after Find returns.
   977  func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
   978  	r.mu.RLock()
   979  	defer r.mu.RUnlock()
   980  
   981  	if r.err != nil {
   982  		err = r.err
   983  		return
   984  	}
   985  
   986  	rkey, value, err := r.find(key, false, ro, false)
   987  	if err == nil && r.cmp.Compare(rkey, key) != 0 {
   988  		value = nil
   989  		err = ErrNotFound
   990  	}
   991  	return
   992  }
   993  
   994  // OffsetOf returns approximate offset for the given key.
   995  //
   996  // It is safe to modify the contents of the argument after Get returns.
   997  func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
   998  	r.mu.RLock()
   999  	defer r.mu.RUnlock()
  1000  
  1001  	if r.err != nil {
  1002  		err = r.err
  1003  		return
  1004  	}
  1005  
  1006  	indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
  1007  	if err != nil {
  1008  		return
  1009  	}
  1010  	defer rel.Release()
  1011  
  1012  	index := r.newBlockIter(indexBlock, nil, nil, true)
  1013  	defer index.Release()
  1014  	if index.Seek(key) {
  1015  		dataBH, n := decodeBlockHandle(index.Value())
  1016  		if n == 0 {
  1017  			r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
  1018  			return
  1019  		}
  1020  		offset = int64(dataBH.offset)
  1021  		return
  1022  	}
  1023  	err = index.Error()
  1024  	if err == nil {
  1025  		offset = r.dataEnd
  1026  	}
  1027  	return
  1028  }
  1029  
  1030  // Release implements util.Releaser.
  1031  // It also close the file if it is an io.Closer.
  1032  func (r *Reader) Release() {
  1033  	r.mu.Lock()
  1034  	defer r.mu.Unlock()
  1035  
  1036  	if closer, ok := r.reader.(io.Closer); ok {
  1037  		closer.Close()
  1038  	}
  1039  	if r.indexBlock != nil {
  1040  		r.indexBlock.Release()
  1041  		r.indexBlock = nil
  1042  	}
  1043  	if r.filterBlock != nil {
  1044  		r.filterBlock.Release()
  1045  		r.filterBlock = nil
  1046  	}
  1047  	r.reader = nil
  1048  	r.cache = nil
  1049  	r.bpool = nil
  1050  	r.err = ErrReaderReleased
  1051  }
  1052  
  1053  // NewReader creates a new initialized table reader for the file.
  1054  // The fi, cache and bpool is optional and can be nil.
  1055  //
  1056  // The returned table reader instance is safe for concurrent use.
  1057  func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
  1058  	if f == nil {
  1059  		return nil, errors.New("leveldb/table: nil file")
  1060  	}
  1061  
  1062  	r := &Reader{
  1063  		fd:             fd,
  1064  		reader:         f,
  1065  		cache:          cache,
  1066  		bpool:          bpool,
  1067  		o:              o,
  1068  		cmp:            o.GetComparer(),
  1069  		verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
  1070  	}
  1071  
  1072  	if size < footerLen {
  1073  		r.err = r.newErrCorrupted(0, size, "table", "too small")
  1074  		return r, nil
  1075  	}
  1076  
  1077  	footerPos := size - footerLen
  1078  	var footer [footerLen]byte
  1079  	if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
  1080  		return nil, err
  1081  	}
  1082  	if string(footer[footerLen-len(magic):footerLen]) != magic {
  1083  		r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
  1084  		return r, nil
  1085  	}
  1086  
  1087  	var n int
  1088  	// Decode the metaindex block handle.
  1089  	r.metaBH, n = decodeBlockHandle(footer[:])
  1090  	if n == 0 {
  1091  		r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
  1092  		return r, nil
  1093  	}
  1094  
  1095  	// Decode the index block handle.
  1096  	r.indexBH, n = decodeBlockHandle(footer[n:])
  1097  	if n == 0 {
  1098  		r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
  1099  		return r, nil
  1100  	}
  1101  
  1102  	// Read metaindex block.
  1103  	metaBlock, err := r.readBlock(r.metaBH, true)
  1104  	if err != nil {
  1105  		if errors.IsCorrupted(err) {
  1106  			r.err = err
  1107  			return r, nil
  1108  		}
  1109  		return nil, err
  1110  	}
  1111  
  1112  	// Set data end.
  1113  	r.dataEnd = int64(r.metaBH.offset)
  1114  
  1115  	// Read metaindex.
  1116  	metaIter := r.newBlockIter(metaBlock, nil, nil, true)
  1117  	for metaIter.Next() {
  1118  		key := string(metaIter.Key())
  1119  		if !strings.HasPrefix(key, "filter.") {
  1120  			continue
  1121  		}
  1122  		fn := key[7:]
  1123  		if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn {
  1124  			r.filter = f0
  1125  		} else {
  1126  			for _, f0 := range o.GetAltFilters() {
  1127  				if f0.Name() == fn {
  1128  					r.filter = f0
  1129  					break
  1130  				}
  1131  			}
  1132  		}
  1133  		if r.filter != nil {
  1134  			filterBH, n := decodeBlockHandle(metaIter.Value())
  1135  			if n == 0 {
  1136  				continue
  1137  			}
  1138  			r.filterBH = filterBH
  1139  			// Update data end.
  1140  			r.dataEnd = int64(filterBH.offset)
  1141  			break
  1142  		}
  1143  	}
  1144  	metaIter.Release()
  1145  	metaBlock.Release()
  1146  
  1147  	// Cache index and filter block locally, since we don't have global cache.
  1148  	if cache == nil {
  1149  		r.indexBlock, err = r.readBlock(r.indexBH, true)
  1150  		if err != nil {
  1151  			if errors.IsCorrupted(err) {
  1152  				r.err = err
  1153  				return r, nil
  1154  			}
  1155  			return nil, err
  1156  		}
  1157  		if r.filter != nil {
  1158  			r.filterBlock, err = r.readFilterBlock(r.filterBH)
  1159  			if err != nil {
  1160  				if !errors.IsCorrupted(err) {
  1161  					return nil, err
  1162  				}
  1163  
  1164  				// Don't use filter then.
  1165  				r.filter = nil
  1166  			}
  1167  		}
  1168  	}
  1169  
  1170  	return r, nil
  1171  }