github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/reader_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"math"
    14  	"os"
    15  	"path"
    16  	"path/filepath"
    17  	"strconv"
    18  	"strings"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/errors"
    23  	"github.com/stretchr/testify/require"
    24  	"github.com/zuoyebang/bitalostable/bloom"
    25  	"github.com/zuoyebang/bitalostable/internal/base"
    26  	"github.com/zuoyebang/bitalostable/internal/cache"
    27  	"github.com/zuoyebang/bitalostable/internal/datadriven"
    28  	"github.com/zuoyebang/bitalostable/internal/errorfs"
    29  	"github.com/zuoyebang/bitalostable/vfs"
    30  	"golang.org/x/exp/rand"
    31  )
    32  
    33  // get is a testing helper that simulates a read and helps verify bloom filters
    34  // until they are available through iterators.
    35  func (r *Reader) get(key []byte) (value []byte, err error) {
    36  	if r.err != nil {
    37  		return nil, r.err
    38  	}
    39  
    40  	if r.tableFilter != nil {
    41  		dataH, err := r.readFilter()
    42  		if err != nil {
    43  			return nil, err
    44  		}
    45  		var lookupKey []byte
    46  		if r.Split != nil {
    47  			lookupKey = key[:r.Split(key)]
    48  		} else {
    49  			lookupKey = key
    50  		}
    51  		mayContain := r.tableFilter.mayContain(dataH.Get(), lookupKey)
    52  		dataH.Release()
    53  		if !mayContain {
    54  			return nil, base.ErrNotFound
    55  		}
    56  	}
    57  
    58  	i, err := r.NewIter(nil /* lower */, nil /* upper */)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	ikey, value := i.SeekGE(key, base.SeekGEFlagsNone)
    63  
    64  	if ikey == nil || r.Compare(key, ikey.UserKey) != 0 {
    65  		err := i.Close()
    66  		if err == nil {
    67  			err = base.ErrNotFound
    68  		}
    69  		return nil, err
    70  	}
    71  
    72  	// The value will be "freed" when the iterator is closed, so make a copy
    73  	// which will outlast the lifetime of the iterator.
    74  	newValue := make([]byte, len(value))
    75  	copy(newValue, value)
    76  	if err := i.Close(); err != nil {
    77  		return nil, err
    78  	}
    79  	return newValue, nil
    80  }
    81  
    82  // iterAdapter adapts the new Iterator API which returns the key and value from
    83  // positioning methods (Seek*, First, Last, Next, Prev) to the old API which
    84  // returned a boolean corresponding to Valid. Only used by test code.
    85  type iterAdapter struct {
    86  	Iterator
    87  	key *InternalKey
    88  	val []byte
    89  }
    90  
    91  func newIterAdapter(iter Iterator) *iterAdapter {
    92  	return &iterAdapter{
    93  		Iterator: iter,
    94  	}
    95  }
    96  
    97  func (i *iterAdapter) update(key *InternalKey, val []byte) bool {
    98  	i.key = key
    99  	i.val = val
   100  	return i.key != nil
   101  }
   102  
   103  func (i *iterAdapter) String() string {
   104  	return "iter-adapter"
   105  }
   106  
   107  func (i *iterAdapter) SeekGE(key []byte, flags base.SeekGEFlags) bool {
   108  	return i.update(i.Iterator.SeekGE(key, flags))
   109  }
   110  
   111  func (i *iterAdapter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) bool {
   112  	return i.update(i.Iterator.SeekPrefixGE(prefix, key, flags))
   113  }
   114  
   115  func (i *iterAdapter) SeekLT(key []byte, flags base.SeekLTFlags) bool {
   116  	return i.update(i.Iterator.SeekLT(key, flags))
   117  }
   118  
   119  func (i *iterAdapter) First() bool {
   120  	return i.update(i.Iterator.First())
   121  }
   122  
   123  func (i *iterAdapter) Last() bool {
   124  	return i.update(i.Iterator.Last())
   125  }
   126  
   127  func (i *iterAdapter) Next() bool {
   128  	return i.update(i.Iterator.Next())
   129  }
   130  
   131  func (i *iterAdapter) NextIgnoreResult() {
   132  	i.Iterator.Next()
   133  	i.update(nil, nil)
   134  }
   135  
   136  func (i *iterAdapter) Prev() bool {
   137  	return i.update(i.Iterator.Prev())
   138  }
   139  
   140  func (i *iterAdapter) Key() *InternalKey {
   141  	return i.key
   142  }
   143  
   144  func (i *iterAdapter) Value() []byte {
   145  	return i.val
   146  }
   147  
   148  func (i *iterAdapter) Valid() bool {
   149  	return i.key != nil
   150  }
   151  
   152  func (i *iterAdapter) SetBounds(lower, upper []byte) {
   153  	i.Iterator.SetBounds(lower, upper)
   154  	i.key = nil
   155  }
   156  
   157  func TestReader(t *testing.T) {
   158  	writerOpts := map[string]WriterOptions{
   159  		// No bloom filters.
   160  		"default": {},
   161  		"bloom10bit": {
   162  			// The standard policy.
   163  			FilterPolicy: bloom.FilterPolicy(10),
   164  			FilterType:   base.TableFilter,
   165  		},
   166  		"bloom1bit": {
   167  			// A policy with many false positives.
   168  			FilterPolicy: bloom.FilterPolicy(1),
   169  			FilterType:   base.TableFilter,
   170  		},
   171  		"bloom100bit": {
   172  			// A policy unlikely to have false positives.
   173  			FilterPolicy: bloom.FilterPolicy(100),
   174  			FilterType:   base.TableFilter,
   175  		},
   176  	}
   177  
   178  	blockSizes := map[string]int{
   179  		"1bytes":   1,
   180  		"5bytes":   5,
   181  		"10bytes":  10,
   182  		"25bytes":  25,
   183  		"Maxbytes": math.MaxInt32,
   184  	}
   185  
   186  	opts := map[string]*Comparer{
   187  		"default":      nil,
   188  		"prefixFilter": fixtureComparer,
   189  	}
   190  
   191  	testDirs := map[string]string{
   192  		"default":      "testdata/reader",
   193  		"prefixFilter": "testdata/prefixreader",
   194  	}
   195  
   196  	for dName, blockSize := range blockSizes {
   197  		for iName, indexBlockSize := range blockSizes {
   198  			for lName, tableOpt := range writerOpts {
   199  				for oName, cmp := range opts {
   200  					tableOpt.BlockSize = blockSize
   201  					tableOpt.Comparer = cmp
   202  					tableOpt.IndexBlockSize = indexBlockSize
   203  
   204  					t.Run(
   205  						fmt.Sprintf("opts=%s,writerOpts=%s,blockSize=%s,indexSize=%s",
   206  							oName, lName, dName, iName),
   207  						func(t *testing.T) {
   208  							runTestReader(
   209  								t, tableOpt, testDirs[oName], nil /* Reader */, 0)
   210  						})
   211  				}
   212  			}
   213  		}
   214  	}
   215  }
   216  
   217  func TestHamletReader(t *testing.T) {
   218  	prebuiltSSTs := []string{
   219  		"testdata/h.ldb",
   220  		"testdata/h.sst",
   221  		"testdata/h.no-compression.sst",
   222  		"testdata/h.no-compression.two_level_index.sst",
   223  		"testdata/h.block-bloom.no-compression.sst",
   224  		"testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
   225  		"testdata/h.table-bloom.no-compression.sst",
   226  	}
   227  
   228  	for _, prebuiltSST := range prebuiltSSTs {
   229  		f, err := os.Open(filepath.FromSlash(prebuiltSST))
   230  		require.NoError(t, err)
   231  
   232  		r, err := NewReader(f, ReaderOptions{})
   233  		require.NoError(t, err)
   234  
   235  		t.Run(
   236  			fmt.Sprintf("sst=%s", prebuiltSST),
   237  			func(t *testing.T) { runTestReader(t, WriterOptions{}, "testdata/hamletreader", r, 0) },
   238  		)
   239  	}
   240  }
   241  
   242  func TestReaderStats(t *testing.T) {
   243  	tableOpt := WriterOptions{
   244  		BlockSize:      30,
   245  		IndexBlockSize: 30,
   246  	}
   247  	runTestReader(t, tableOpt, "testdata/readerstats", nil, 10000)
   248  }
   249  
   250  func TestInjectedErrors(t *testing.T) {
   251  	prebuiltSSTs := []string{
   252  		"testdata/h.ldb",
   253  		"testdata/h.sst",
   254  		"testdata/h.no-compression.sst",
   255  		"testdata/h.no-compression.two_level_index.sst",
   256  		"testdata/h.block-bloom.no-compression.sst",
   257  		"testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
   258  		"testdata/h.table-bloom.no-compression.sst",
   259  	}
   260  
   261  	for _, prebuiltSST := range prebuiltSSTs {
   262  		run := func(i int) (reterr error) {
   263  			f, err := os.Open(filepath.FromSlash(prebuiltSST))
   264  			require.NoError(t, err)
   265  			r, err := NewReader(errorfs.WrapFile(f, errorfs.OnIndex(int32(i))), ReaderOptions{})
   266  			if err != nil {
   267  				return firstError(err, f.Close())
   268  			}
   269  			defer func() { reterr = firstError(reterr, r.Close()) }()
   270  
   271  			_, err = r.EstimateDiskUsage([]byte("borrower"), []byte("lender"))
   272  			if err != nil {
   273  				return err
   274  			}
   275  
   276  			iter, err := r.NewIter(nil, nil)
   277  			if err != nil {
   278  				return err
   279  			}
   280  			defer func() { reterr = firstError(reterr, iter.Close()) }()
   281  			for k, v := iter.First(); k != nil && v != nil; k, v = iter.Next() {
   282  			}
   283  			if err = iter.Error(); err != nil {
   284  				return err
   285  			}
   286  			return nil
   287  		}
   288  		for i := 0; ; i++ {
   289  			err := run(i)
   290  			if errors.Is(err, errorfs.ErrInjected) {
   291  				t.Logf("%q, index %d: %s", prebuiltSST, i, err)
   292  				continue
   293  			}
   294  			if err != nil {
   295  				t.Errorf("%q, index %d: non-injected error: %+v", prebuiltSST, i, err)
   296  				break
   297  			}
   298  			t.Logf("%q: no error at index %d", prebuiltSST, i)
   299  			break
   300  		}
   301  	}
   302  }
   303  
   304  func TestInvalidReader(t *testing.T) {
   305  	testCases := []struct {
   306  		file     vfs.File
   307  		expected string
   308  	}{
   309  		{nil, "nil file"},
   310  		{vfs.NewMemFile([]byte("invalid sst bytes")), "invalid table"},
   311  	}
   312  	for _, tc := range testCases {
   313  		r, err := NewReader(tc.file, ReaderOptions{})
   314  		if !strings.Contains(err.Error(), tc.expected) {
   315  			t.Fatalf("expected %q, but found %q", tc.expected, err.Error())
   316  		}
   317  		if r != nil {
   318  			t.Fatalf("found non-nil reader returned with non-nil error %q", err.Error())
   319  		}
   320  	}
   321  }
   322  
   323  func runTestReader(t *testing.T, o WriterOptions, dir string, r *Reader, cacheSize int) {
   324  	datadriven.Walk(t, dir, func(t *testing.T, path string) {
   325  		defer func() {
   326  			if r != nil {
   327  				r.Close()
   328  				r = nil
   329  			}
   330  		}()
   331  
   332  		datadriven.RunTest(t, path, func(d *datadriven.TestData) string {
   333  			switch d.Cmd {
   334  			case "build":
   335  				if r != nil {
   336  					r.Close()
   337  					r = nil
   338  				}
   339  				var err error
   340  				_, r, err = runBuildCmd(d, &o, cacheSize)
   341  				if err != nil {
   342  					return err.Error()
   343  				}
   344  				return ""
   345  
   346  			case "iter":
   347  				seqNum, err := scanGlobalSeqNum(d)
   348  				if err != nil {
   349  					return err.Error()
   350  				}
   351  				var stats base.InternalIteratorStats
   352  				r.Properties.GlobalSeqNum = seqNum
   353  				iter, err := r.NewIterWithBlockPropertyFilters(
   354  					nil,  /* lower */
   355  					nil,  /* upper */
   356  					nil,  /* filterer */
   357  					true, /* use filter block */
   358  					&stats,
   359  				)
   360  				if err != nil {
   361  					return err.Error()
   362  				}
   363  				return runIterCmd(d, iter, runIterCmdStats(&stats))
   364  
   365  			case "get":
   366  				var b bytes.Buffer
   367  				for _, k := range strings.Split(d.Input, "\n") {
   368  					v, err := r.get([]byte(k))
   369  					if err != nil {
   370  						fmt.Fprintf(&b, "<err: %s>\n", err)
   371  					} else {
   372  						fmt.Fprintln(&b, string(v))
   373  					}
   374  				}
   375  				return b.String()
   376  			default:
   377  				return fmt.Sprintf("unknown command: %s", d.Cmd)
   378  			}
   379  		})
   380  	})
   381  }
   382  
   383  func TestReaderCheckComparerMerger(t *testing.T) {
   384  	const testTable = "test"
   385  
   386  	testComparer := &base.Comparer{
   387  		Name:      "test.comparer",
   388  		Compare:   base.DefaultComparer.Compare,
   389  		Equal:     base.DefaultComparer.Equal,
   390  		Separator: base.DefaultComparer.Separator,
   391  		Successor: base.DefaultComparer.Successor,
   392  	}
   393  	testMerger := &base.Merger{
   394  		Name:  "test.merger",
   395  		Merge: base.DefaultMerger.Merge,
   396  	}
   397  	writerOpts := WriterOptions{
   398  		Comparer:   testComparer,
   399  		MergerName: "test.merger",
   400  	}
   401  
   402  	mem := vfs.NewMem()
   403  	f0, err := mem.Create(testTable)
   404  	require.NoError(t, err)
   405  
   406  	w := NewWriter(f0, writerOpts)
   407  	require.NoError(t, w.Set([]byte("test"), nil))
   408  	require.NoError(t, w.Close())
   409  
   410  	testCases := []struct {
   411  		comparers []*base.Comparer
   412  		mergers   []*base.Merger
   413  		expected  string
   414  	}{
   415  		{
   416  			[]*base.Comparer{testComparer},
   417  			[]*base.Merger{testMerger},
   418  			"",
   419  		},
   420  		{
   421  			[]*base.Comparer{testComparer, base.DefaultComparer},
   422  			[]*base.Merger{testMerger, base.DefaultMerger},
   423  			"",
   424  		},
   425  		{
   426  			[]*base.Comparer{},
   427  			[]*base.Merger{testMerger},
   428  			"unknown comparer test.comparer",
   429  		},
   430  		{
   431  			[]*base.Comparer{base.DefaultComparer},
   432  			[]*base.Merger{testMerger},
   433  			"unknown comparer test.comparer",
   434  		},
   435  		{
   436  			[]*base.Comparer{testComparer},
   437  			[]*base.Merger{},
   438  			"unknown merger test.merger",
   439  		},
   440  		{
   441  			[]*base.Comparer{testComparer},
   442  			[]*base.Merger{base.DefaultMerger},
   443  			"unknown merger test.merger",
   444  		},
   445  	}
   446  
   447  	for _, c := range testCases {
   448  		t.Run("", func(t *testing.T) {
   449  			f1, err := mem.Open(testTable)
   450  			require.NoError(t, err)
   451  
   452  			comparers := make(Comparers)
   453  			for _, comparer := range c.comparers {
   454  				comparers[comparer.Name] = comparer
   455  			}
   456  			mergers := make(Mergers)
   457  			for _, merger := range c.mergers {
   458  				mergers[merger.Name] = merger
   459  			}
   460  			r, err := NewReader(f1, ReaderOptions{}, comparers, mergers)
   461  			if err != nil {
   462  				if r != nil {
   463  					t.Fatalf("found non-nil reader returned with non-nil error %q", err.Error())
   464  				}
   465  				if !strings.HasSuffix(err.Error(), c.expected) {
   466  					t.Fatalf("expected %q, but found %q", c.expected, err.Error())
   467  				}
   468  			} else if c.expected != "" {
   469  				t.Fatalf("expected %q, but found success", c.expected)
   470  			}
   471  			if r != nil {
   472  				_ = r.Close()
   473  			}
   474  		})
   475  	}
   476  }
   477  func checkValidPrefix(prefix, key []byte) bool {
   478  	return prefix == nil || bytes.HasPrefix(key, prefix)
   479  }
   480  
   481  func testBytesIteratedWithCompression(
   482  	t *testing.T,
   483  	compression Compression,
   484  	allowedSizeDeviationPercent uint64,
   485  	blockSizes []int,
   486  	maxNumEntries []uint64,
   487  ) {
   488  	for i, blockSize := range blockSizes {
   489  		for _, indexBlockSize := range blockSizes {
   490  			for _, numEntries := range []uint64{0, 1, maxNumEntries[i]} {
   491  				r := buildTestTable(t, numEntries, blockSize, indexBlockSize, compression)
   492  				var bytesIterated, prevIterated uint64
   493  				citer, err := r.NewCompactionIter(&bytesIterated)
   494  				require.NoError(t, err)
   495  
   496  				for key, _ := citer.First(); key != nil; key, _ = citer.Next() {
   497  					if bytesIterated < prevIterated {
   498  						t.Fatalf("bytesIterated moved backward: %d < %d", bytesIterated, prevIterated)
   499  					}
   500  					prevIterated = bytesIterated
   501  				}
   502  
   503  				expected := r.Properties.DataSize
   504  				allowedSizeDeviation := expected * allowedSizeDeviationPercent / 100
   505  				// There is some inaccuracy due to compression estimation.
   506  				if bytesIterated < expected-allowedSizeDeviation || bytesIterated > expected+allowedSizeDeviation {
   507  					t.Fatalf("bytesIterated: got %d, want %d", bytesIterated, expected)
   508  				}
   509  
   510  				require.NoError(t, citer.Close())
   511  				require.NoError(t, r.Close())
   512  			}
   513  		}
   514  	}
   515  }
   516  
   517  func TestBytesIterated(t *testing.T) {
   518  	blockSizes := []int{10, 100, 1000, 4096, math.MaxInt32}
   519  	t.Run("Compressed", func(t *testing.T) {
   520  		testBytesIteratedWithCompression(t, SnappyCompression, 1, blockSizes, []uint64{1e5, 1e5, 1e5, 1e5, 1e5})
   521  	})
   522  	t.Run("Uncompressed", func(t *testing.T) {
   523  		testBytesIteratedWithCompression(t, NoCompression, 0, blockSizes, []uint64{1e5, 1e5, 1e5, 1e5, 1e5})
   524  	})
   525  	t.Run("Zstd", func(t *testing.T) {
   526  		// compression with zstd is extremely slow with small block size (esp the nocgo version).
   527  		// use less numEntries to make the test run at reasonable speed (under 10 seconds).
   528  		maxNumEntries := []uint64{1e2, 1e2, 1e3, 4e3, 1e5}
   529  		if useStandardZstdLib {
   530  			maxNumEntries = []uint64{1e3, 1e3, 1e4, 4e4, 1e5}
   531  		}
   532  		testBytesIteratedWithCompression(t, ZstdCompression, 1, blockSizes, maxNumEntries)
   533  	})
   534  }
   535  
   536  func TestCompactionIteratorSetupForCompaction(t *testing.T) {
   537  	blockSizes := []int{10, 100, 1000, 4096, math.MaxInt32}
   538  	for _, blockSize := range blockSizes {
   539  		for _, indexBlockSize := range blockSizes {
   540  			for _, numEntries := range []uint64{0, 1, 1e5} {
   541  				r := buildTestTable(t, numEntries, blockSize, indexBlockSize, DefaultCompression)
   542  				var bytesIterated uint64
   543  				citer, err := r.NewCompactionIter(&bytesIterated)
   544  				require.NoError(t, err)
   545  				switch i := citer.(type) {
   546  				case *compactionIterator:
   547  					require.NotNil(t, i.dataRS.sequentialFile)
   548  				case *twoLevelCompactionIterator:
   549  					require.NotNil(t, i.dataRS.sequentialFile)
   550  				default:
   551  					require.Failf(t, fmt.Sprintf("unknown compaction iterator type: %T", citer), "")
   552  				}
   553  				require.NoError(t, citer.Close())
   554  				require.NoError(t, r.Close())
   555  			}
   556  		}
   557  	}
   558  }
   559  
   560  func TestMaybeReadahead(t *testing.T) {
   561  	var rs readaheadState
   562  	datadriven.RunTest(t, "testdata/readahead", func(d *datadriven.TestData) string {
   563  		cacheHit := false
   564  		switch d.Cmd {
   565  		case "reset":
   566  			rs.size = initialReadaheadSize
   567  			rs.limit = 0
   568  			rs.numReads = 0
   569  			return ""
   570  
   571  		case "cache-read":
   572  			cacheHit = true
   573  			fallthrough
   574  		case "read":
   575  			args := strings.Split(d.Input, ",")
   576  			if len(args) != 2 {
   577  				return "expected 2 args: offset, size"
   578  			}
   579  
   580  			offset, err := strconv.ParseInt(strings.TrimSpace(args[0]), 10, 64)
   581  			require.NoError(t, err)
   582  			size, err := strconv.ParseInt(strings.TrimSpace(args[1]), 10, 64)
   583  			require.NoError(t, err)
   584  			var raSize int64
   585  			if cacheHit {
   586  				rs.recordCacheHit(offset, size)
   587  			} else {
   588  				raSize = rs.maybeReadahead(offset, size)
   589  			}
   590  
   591  			var buf strings.Builder
   592  			fmt.Fprintf(&buf, "readahead:  %d\n", raSize)
   593  			fmt.Fprintf(&buf, "numReads:   %d\n", rs.numReads)
   594  			fmt.Fprintf(&buf, "size:       %d\n", rs.size)
   595  			fmt.Fprintf(&buf, "prevSize:   %d\n", rs.prevSize)
   596  			fmt.Fprintf(&buf, "limit:      %d", rs.limit)
   597  			return buf.String()
   598  		default:
   599  			return fmt.Sprintf("unknown command: %s", d.Cmd)
   600  		}
   601  	})
   602  }
   603  
   604  func TestReaderChecksumErrors(t *testing.T) {
   605  	for _, checksumType := range []ChecksumType{ChecksumTypeCRC32c, ChecksumTypeXXHash64} {
   606  		t.Run(fmt.Sprintf("checksum-type=%d", checksumType), func(t *testing.T) {
   607  			for _, twoLevelIndex := range []bool{false, true} {
   608  				t.Run(fmt.Sprintf("two-level-index=%t", twoLevelIndex), func(t *testing.T) {
   609  					mem := vfs.NewMem()
   610  
   611  					{
   612  						// Create an sstable with 3 data blocks.
   613  						f, err := mem.Create("test")
   614  						require.NoError(t, err)
   615  
   616  						const blockSize = 32
   617  						indexBlockSize := 4096
   618  						if twoLevelIndex {
   619  							indexBlockSize = 1
   620  						}
   621  
   622  						w := NewWriter(f, WriterOptions{
   623  							BlockSize:      blockSize,
   624  							IndexBlockSize: indexBlockSize,
   625  							Checksum:       checksumType,
   626  						})
   627  						require.NoError(t, w.Set(bytes.Repeat([]byte("a"), blockSize), nil))
   628  						require.NoError(t, w.Set(bytes.Repeat([]byte("b"), blockSize), nil))
   629  						require.NoError(t, w.Set(bytes.Repeat([]byte("c"), blockSize), nil))
   630  						require.NoError(t, w.Close())
   631  					}
   632  
   633  					// Load the layout so that we no the location of the data blocks.
   634  					var layout *Layout
   635  					{
   636  						f, err := mem.Open("test")
   637  						require.NoError(t, err)
   638  
   639  						r, err := NewReader(f, ReaderOptions{})
   640  						require.NoError(t, err)
   641  						layout, err = r.Layout()
   642  						require.NoError(t, err)
   643  						require.EqualValues(t, len(layout.Data), 3)
   644  						require.NoError(t, r.Close())
   645  					}
   646  
   647  					for _, bh := range layout.Data {
   648  						// Read the sstable and corrupt the first byte in the target data
   649  						// block.
   650  						orig, err := mem.Open("test")
   651  						require.NoError(t, err)
   652  						data, err := ioutil.ReadAll(orig)
   653  						require.NoError(t, err)
   654  						require.NoError(t, orig.Close())
   655  
   656  						// Corrupt the first byte in the block.
   657  						data[bh.Offset] ^= 0xff
   658  
   659  						corrupted, err := mem.Create("corrupted")
   660  						require.NoError(t, err)
   661  						_, err = corrupted.Write(data)
   662  						require.NoError(t, err)
   663  						require.NoError(t, corrupted.Close())
   664  
   665  						// Verify that we encounter a checksum mismatch error while iterating
   666  						// over the sstable.
   667  						corrupted, err = mem.Open("corrupted")
   668  						require.NoError(t, err)
   669  
   670  						r, err := NewReader(corrupted, ReaderOptions{})
   671  						require.NoError(t, err)
   672  
   673  						iter, err := r.NewIter(nil, nil)
   674  						require.NoError(t, err)
   675  						for k, _ := iter.First(); k != nil; k, _ = iter.Next() {
   676  						}
   677  						require.Regexp(t, `checksum mismatch`, iter.Error())
   678  						require.Regexp(t, `checksum mismatch`, iter.Close())
   679  
   680  						iter, err = r.NewIter(nil, nil)
   681  						require.NoError(t, err)
   682  						for k, _ := iter.Last(); k != nil; k, _ = iter.Prev() {
   683  						}
   684  						require.Regexp(t, `checksum mismatch`, iter.Error())
   685  						require.Regexp(t, `checksum mismatch`, iter.Close())
   686  
   687  						require.NoError(t, r.Close())
   688  					}
   689  				})
   690  			}
   691  		})
   692  	}
   693  }
   694  
   695  func TestValidateBlockChecksums(t *testing.T) {
   696  	seed := uint64(time.Now().UnixNano())
   697  	rng := rand.New(rand.NewSource(seed))
   698  	t.Logf("using seed = %d", seed)
   699  
   700  	allFiles := []string{
   701  		"testdata/h.no-compression.sst",
   702  		"testdata/h.no-compression.two_level_index.sst",
   703  		"testdata/h.sst",
   704  		"testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
   705  		"testdata/h.table-bloom.no-compression.sst",
   706  		"testdata/h.table-bloom.sst",
   707  		"testdata/h.zstd-compression.sst",
   708  	}
   709  
   710  	type corruptionLocation int
   711  	const (
   712  		corruptionLocationData corruptionLocation = iota
   713  		corruptionLocationIndex
   714  		corruptionLocationTopIndex
   715  		corruptionLocationFilter
   716  		corruptionLocationRangeDel
   717  		corruptionLocationProperties
   718  		corruptionLocationMetaIndex
   719  	)
   720  
   721  	testCases := []struct {
   722  		name                string
   723  		files               []string
   724  		corruptionLocations []corruptionLocation
   725  	}{
   726  		{
   727  			name:                "no corruption",
   728  			corruptionLocations: []corruptionLocation{},
   729  		},
   730  		{
   731  			name: "data block corruption",
   732  			corruptionLocations: []corruptionLocation{
   733  				corruptionLocationData,
   734  			},
   735  		},
   736  		{
   737  			name: "index block corruption",
   738  			corruptionLocations: []corruptionLocation{
   739  				corruptionLocationIndex,
   740  			},
   741  		},
   742  		{
   743  			name: "top index block corruption",
   744  			files: []string{
   745  				"testdata/h.no-compression.two_level_index.sst",
   746  			},
   747  			corruptionLocations: []corruptionLocation{
   748  				corruptionLocationTopIndex,
   749  			},
   750  		},
   751  		{
   752  			name: "filter block corruption",
   753  			files: []string{
   754  				"testdata/h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst",
   755  				"testdata/h.table-bloom.no-compression.sst",
   756  				"testdata/h.table-bloom.sst",
   757  			},
   758  			corruptionLocations: []corruptionLocation{
   759  				corruptionLocationFilter,
   760  			},
   761  		},
   762  		{
   763  			name: "range deletion block corruption",
   764  			corruptionLocations: []corruptionLocation{
   765  				corruptionLocationRangeDel,
   766  			},
   767  		},
   768  		{
   769  			name: "properties block corruption",
   770  			corruptionLocations: []corruptionLocation{
   771  				corruptionLocationProperties,
   772  			},
   773  		},
   774  		{
   775  			name: "metaindex block corruption",
   776  			corruptionLocations: []corruptionLocation{
   777  				corruptionLocationMetaIndex,
   778  			},
   779  		},
   780  		{
   781  			name: "multiple blocks corrupted",
   782  			corruptionLocations: []corruptionLocation{
   783  				corruptionLocationData,
   784  				corruptionLocationIndex,
   785  				corruptionLocationRangeDel,
   786  				corruptionLocationProperties,
   787  				corruptionLocationMetaIndex,
   788  			},
   789  		},
   790  	}
   791  
   792  	testFn := func(t *testing.T, file string, corruptionLocations []corruptionLocation) {
   793  		// Create a copy of the SSTable that we can freely corrupt.
   794  		f, err := os.Open(filepath.FromSlash(file))
   795  		require.NoError(t, err)
   796  
   797  		pathCopy := path.Join(t.TempDir(), path.Base(file))
   798  		fCopy, err := os.OpenFile(pathCopy, os.O_CREATE|os.O_RDWR, 0600)
   799  		require.NoError(t, err)
   800  		defer fCopy.Close()
   801  
   802  		_, err = io.Copy(fCopy, f)
   803  		require.NoError(t, err)
   804  		err = fCopy.Sync()
   805  		require.NoError(t, err)
   806  		require.NoError(t, f.Close())
   807  
   808  		filter := bloom.FilterPolicy(10)
   809  		r, err := NewReader(fCopy, ReaderOptions{
   810  			Filters: map[string]FilterPolicy{
   811  				filter.Name(): filter,
   812  			},
   813  		})
   814  		require.NoError(t, err)
   815  		defer func() { require.NoError(t, r.Close()) }()
   816  
   817  		// Prior to corruption, validation is successful.
   818  		require.NoError(t, r.ValidateBlockChecksums())
   819  
   820  		// If we are not testing for corruption, we can stop here.
   821  		if len(corruptionLocations) == 0 {
   822  			return
   823  		}
   824  
   825  		// Perform bit flips in various corruption locations.
   826  		layout, err := r.Layout()
   827  		require.NoError(t, err)
   828  		for _, location := range corruptionLocations {
   829  			var bh BlockHandle
   830  			switch location {
   831  			case corruptionLocationData:
   832  				bh = layout.Data[rng.Intn(len(layout.Data))].BlockHandle
   833  			case corruptionLocationIndex:
   834  				bh = layout.Index[rng.Intn(len(layout.Index))]
   835  			case corruptionLocationTopIndex:
   836  				bh = layout.TopIndex
   837  			case corruptionLocationFilter:
   838  				bh = layout.Filter
   839  			case corruptionLocationRangeDel:
   840  				bh = layout.RangeDel
   841  			case corruptionLocationProperties:
   842  				bh = layout.Properties
   843  			case corruptionLocationMetaIndex:
   844  				bh = layout.MetaIndex
   845  			default:
   846  				t.Fatalf("unknown location")
   847  			}
   848  
   849  			// Corrupt a random byte within the selected block.
   850  			pos := int64(bh.Offset) + rng.Int63n(int64(bh.Length))
   851  			t.Logf("altering file=%s @ offset = %d", file, pos)
   852  
   853  			b := make([]byte, 1)
   854  			n, err := fCopy.ReadAt(b, pos)
   855  			require.NoError(t, err)
   856  			require.Equal(t, 1, n)
   857  			t.Logf("data (before) = %08b", b)
   858  
   859  			b[0] ^= 0xff
   860  			t.Logf("data (after) = %08b", b)
   861  
   862  			_, err = fCopy.WriteAt(b, pos)
   863  			require.NoError(t, err)
   864  		}
   865  
   866  		// Write back to the file.
   867  		err = fCopy.Sync()
   868  		require.NoError(t, err)
   869  
   870  		// Confirm that checksum validation fails.
   871  		err = r.ValidateBlockChecksums()
   872  		require.Error(t, err)
   873  		require.Regexp(t, `checksum mismatch`, err.Error())
   874  	}
   875  
   876  	for _, tc := range testCases {
   877  		// By default, test across all files, unless overridden.
   878  		files := tc.files
   879  		if files == nil {
   880  			files = allFiles
   881  		}
   882  		for _, file := range files {
   883  			t.Run(tc.name+" "+path.Base(file), func(t *testing.T) {
   884  				testFn(t, file, tc.corruptionLocations)
   885  			})
   886  		}
   887  	}
   888  }
   889  
   890  func TestReader_TableFormat(t *testing.T) {
   891  	test := func(t *testing.T, want TableFormat) {
   892  		fs := vfs.NewMem()
   893  		f, err := fs.Create("test")
   894  		require.NoError(t, err)
   895  
   896  		opts := WriterOptions{TableFormat: want}
   897  		w := NewWriter(f, opts)
   898  		err = w.Close()
   899  		require.NoError(t, err)
   900  
   901  		f, err = fs.Open("test")
   902  		require.NoError(t, err)
   903  		r, err := NewReader(f, ReaderOptions{})
   904  		require.NoError(t, err)
   905  		defer r.Close()
   906  
   907  		got, err := r.TableFormat()
   908  		require.NoError(t, err)
   909  		require.Equal(t, want, got)
   910  	}
   911  
   912  	for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ {
   913  		t.Run(tf.String(), func(t *testing.T) {
   914  			test(t, tf)
   915  		})
   916  	}
   917  }
   918  
   919  func buildTestTable(
   920  	t *testing.T, numEntries uint64, blockSize, indexBlockSize int, compression Compression,
   921  ) *Reader {
   922  	mem := vfs.NewMem()
   923  	f0, err := mem.Create("test")
   924  	require.NoError(t, err)
   925  
   926  	w := NewWriter(f0, WriterOptions{
   927  		BlockSize:      blockSize,
   928  		IndexBlockSize: indexBlockSize,
   929  		Compression:    compression,
   930  		FilterPolicy:   nil,
   931  	})
   932  
   933  	var ikey InternalKey
   934  	for i := uint64(0); i < numEntries; i++ {
   935  		key := make([]byte, 8+i%3)
   936  		value := make([]byte, i%100)
   937  		binary.BigEndian.PutUint64(key, i)
   938  		ikey.UserKey = key
   939  		w.Add(ikey, value)
   940  	}
   941  
   942  	require.NoError(t, w.Close())
   943  
   944  	// Re-open that filename for reading.
   945  	f1, err := mem.Open("test")
   946  	require.NoError(t, err)
   947  
   948  	c := cache.New(128 << 20)
   949  	defer c.Unref()
   950  	r, err := NewReader(f1, ReaderOptions{
   951  		Cache: c,
   952  	}, FileReopenOpt{
   953  		FS:       mem,
   954  		Filename: "test",
   955  	})
   956  	require.NoError(t, err)
   957  	return r
   958  }
   959  
   960  func buildBenchmarkTable(b *testing.B, options WriterOptions) (*Reader, [][]byte) {
   961  	mem := vfs.NewMem()
   962  	f0, err := mem.Create("bench")
   963  	if err != nil {
   964  		b.Fatal(err)
   965  	}
   966  
   967  	w := NewWriter(f0, options)
   968  
   969  	var keys [][]byte
   970  	var ikey InternalKey
   971  	for i := uint64(0); i < 1e6; i++ {
   972  		key := make([]byte, 8)
   973  		binary.BigEndian.PutUint64(key, i)
   974  		keys = append(keys, key)
   975  		ikey.UserKey = key
   976  		w.Add(ikey, nil)
   977  	}
   978  
   979  	if err := w.Close(); err != nil {
   980  		b.Fatal(err)
   981  	}
   982  
   983  	// Re-open that filename for reading.
   984  	f1, err := mem.Open("bench")
   985  	if err != nil {
   986  		b.Fatal(err)
   987  	}
   988  	c := cache.New(128 << 20)
   989  	defer c.Unref()
   990  	r, err := NewReader(f1, ReaderOptions{
   991  		Cache: c,
   992  	})
   993  	if err != nil {
   994  		b.Fatal(err)
   995  	}
   996  	return r, keys
   997  }
   998  
   999  var basicBenchmarks = []struct {
  1000  	name    string
  1001  	options WriterOptions
  1002  }{
  1003  	{
  1004  		name: "restart=16,compression=Snappy",
  1005  		options: WriterOptions{
  1006  			BlockSize:            32 << 10,
  1007  			BlockRestartInterval: 16,
  1008  			FilterPolicy:         nil,
  1009  			Compression:          SnappyCompression,
  1010  		},
  1011  	},
  1012  	{
  1013  		name: "restart=16,compression=ZSTD",
  1014  		options: WriterOptions{
  1015  			BlockSize:            32 << 10,
  1016  			BlockRestartInterval: 16,
  1017  			FilterPolicy:         nil,
  1018  			Compression:          ZstdCompression,
  1019  		},
  1020  	},
  1021  }
  1022  
  1023  func BenchmarkTableIterSeekGE(b *testing.B) {
  1024  	for _, bm := range basicBenchmarks {
  1025  		b.Run(bm.name,
  1026  			func(b *testing.B) {
  1027  				r, keys := buildBenchmarkTable(b, bm.options)
  1028  				it, err := r.NewIter(nil /* lower */, nil /* upper */)
  1029  				require.NoError(b, err)
  1030  				rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  1031  
  1032  				b.ResetTimer()
  1033  				for i := 0; i < b.N; i++ {
  1034  					it.SeekGE(keys[rng.Intn(len(keys))], base.SeekGEFlagsNone)
  1035  				}
  1036  
  1037  				b.StopTimer()
  1038  				it.Close()
  1039  				r.Close()
  1040  			})
  1041  	}
  1042  }
  1043  
  1044  func BenchmarkTableIterSeekLT(b *testing.B) {
  1045  	for _, bm := range basicBenchmarks {
  1046  		b.Run(bm.name,
  1047  			func(b *testing.B) {
  1048  				r, keys := buildBenchmarkTable(b, bm.options)
  1049  				it, err := r.NewIter(nil /* lower */, nil /* upper */)
  1050  				require.NoError(b, err)
  1051  				rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
  1052  
  1053  				b.ResetTimer()
  1054  				for i := 0; i < b.N; i++ {
  1055  					it.SeekLT(keys[rng.Intn(len(keys))], base.SeekLTFlagsNone)
  1056  				}
  1057  
  1058  				b.StopTimer()
  1059  				it.Close()
  1060  				r.Close()
  1061  			})
  1062  	}
  1063  }
  1064  
  1065  func BenchmarkTableIterNext(b *testing.B) {
  1066  	for _, bm := range basicBenchmarks {
  1067  		b.Run(bm.name,
  1068  			func(b *testing.B) {
  1069  				r, _ := buildBenchmarkTable(b, bm.options)
  1070  				it, err := r.NewIter(nil /* lower */, nil /* upper */)
  1071  				require.NoError(b, err)
  1072  
  1073  				b.ResetTimer()
  1074  				var sum int64
  1075  				var key *InternalKey
  1076  				for i := 0; i < b.N; i++ {
  1077  					if key == nil {
  1078  						key, _ = it.First()
  1079  					}
  1080  					sum += int64(binary.BigEndian.Uint64(key.UserKey))
  1081  					key, _ = it.Next()
  1082  				}
  1083  				if testing.Verbose() {
  1084  					fmt.Fprint(ioutil.Discard, sum)
  1085  				}
  1086  
  1087  				b.StopTimer()
  1088  				it.Close()
  1089  				r.Close()
  1090  			})
  1091  	}
  1092  }
  1093  
  1094  func BenchmarkTableIterPrev(b *testing.B) {
  1095  	for _, bm := range basicBenchmarks {
  1096  		b.Run(bm.name,
  1097  			func(b *testing.B) {
  1098  				r, _ := buildBenchmarkTable(b, bm.options)
  1099  				it, err := r.NewIter(nil /* lower */, nil /* upper */)
  1100  				require.NoError(b, err)
  1101  
  1102  				b.ResetTimer()
  1103  				var sum int64
  1104  				var key *InternalKey
  1105  				for i := 0; i < b.N; i++ {
  1106  					if key == nil {
  1107  						key, _ = it.Last()
  1108  					}
  1109  					sum += int64(binary.BigEndian.Uint64(key.UserKey))
  1110  					key, _ = it.Prev()
  1111  				}
  1112  				if testing.Verbose() {
  1113  					fmt.Fprint(ioutil.Discard, sum)
  1114  				}
  1115  
  1116  				b.StopTimer()
  1117  				it.Close()
  1118  				r.Close()
  1119  			})
  1120  	}
  1121  }
  1122  
  1123  func BenchmarkLayout(b *testing.B) {
  1124  	r, _ := buildBenchmarkTable(b, WriterOptions{})
  1125  	b.ResetTimer()
  1126  	for i := 0; i < b.N; i++ {
  1127  		r.Layout()
  1128  	}
  1129  	b.StopTimer()
  1130  	r.Close()
  1131  }