github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/table_test.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"encoding/binary"
    11  	"fmt"
    12  	"io"
    13  	"math"
    14  	"os"
    15  	"path/filepath"
    16  	"sort"
    17  	"strings"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/cockroachdb/errors"
    22  	"github.com/kr/pretty"
    23  	"github.com/stretchr/testify/require"
    24  	"github.com/zuoyebang/bitalostable/bloom"
    25  	"github.com/zuoyebang/bitalostable/internal/base"
    26  	"github.com/zuoyebang/bitalostable/vfs"
    27  	"golang.org/x/exp/rand"
    28  )
    29  
    30  // nonsenseWords are words that aren't in testdata/h.txt.
    31  var nonsenseWords = []string{
    32  	// Edge cases.
    33  	"",
    34  	"\x00",
    35  	"\xff",
    36  	"`",
    37  	"a\x00",
    38  	"aaaaaa",
    39  	"pol\x00nius",
    40  	"youth\x00",
    41  	"youti",
    42  	"zzzzzz",
    43  	// Capitalized versions of actual words in testdata/h.txt.
    44  	"A",
    45  	"Hamlet",
    46  	"thEE",
    47  	"YOUTH",
    48  	// The following were generated by http://soybomb.com/tricks/words/
    49  	"pectures",
    50  	"exectly",
    51  	"tricatrippian",
    52  	"recens",
    53  	"whiratroce",
    54  	"troped",
    55  	"balmous",
    56  	"droppewry",
    57  	"toilizing",
    58  	"crocias",
    59  	"eathrass",
    60  	"cheakden",
    61  	"speablett",
    62  	"skirinies",
    63  	"prefing",
    64  	"bonufacision",
    65  }
    66  
    67  var (
    68  	wordCount = map[string]string{}
    69  	minWord   = ""
    70  	maxWord   = ""
    71  )
    72  
    73  func init() {
    74  	f, err := os.Open(filepath.FromSlash("testdata/h.txt"))
    75  	if err != nil {
    76  		panic(err)
    77  	}
    78  	defer f.Close()
    79  	r := bufio.NewReader(f)
    80  
    81  	for first := true; ; {
    82  		s, err := r.ReadBytes('\n')
    83  		if err == io.EOF {
    84  			break
    85  		}
    86  		if err != nil {
    87  			panic(err)
    88  		}
    89  		k := strings.TrimSpace(string(s[8:]))
    90  		v := strings.TrimSpace(string(s[:8]))
    91  		wordCount[k] = v
    92  
    93  		if first {
    94  			first = false
    95  			minWord = k
    96  			maxWord = k
    97  			continue
    98  		}
    99  		if minWord > k {
   100  			minWord = k
   101  		}
   102  		if maxWord < k {
   103  			maxWord = k
   104  		}
   105  	}
   106  
   107  	if len(wordCount) != 1710 {
   108  		panic(fmt.Sprintf("h.txt entry count: got %d, want %d", len(wordCount), 1710))
   109  	}
   110  
   111  	for _, s := range nonsenseWords {
   112  		if _, ok := wordCount[s]; ok {
   113  			panic(fmt.Sprintf("nonsense word %q was in h.txt", s))
   114  		}
   115  	}
   116  }
   117  
   118  func check(f vfs.File, comparer *Comparer, fp FilterPolicy) error {
   119  	opts := ReaderOptions{
   120  		Comparer: comparer,
   121  	}
   122  	if fp != nil {
   123  		opts.Filters = map[string]FilterPolicy{
   124  			fp.Name(): fp,
   125  		}
   126  	}
   127  
   128  	r, err := NewReader(f, opts)
   129  	if err != nil {
   130  		return err
   131  	}
   132  
   133  	// Check that each key/value pair in wordCount is also in the table.
   134  	words := make([]string, 0, len(wordCount))
   135  	for k, v := range wordCount {
   136  		words = append(words, k)
   137  		// Check using Get.
   138  		if v1, err := r.get([]byte(k)); string(v1) != string(v) || err != nil {
   139  			return errors.Errorf("Get %q: got (%q, %v), want (%q, %v)", k, v1, err, v, error(nil))
   140  		} else if len(v1) != cap(v1) {
   141  			return errors.Errorf("Get %q: len(v1)=%d, cap(v1)=%d", k, len(v1), cap(v1))
   142  		}
   143  
   144  		// Check using SeekGE.
   145  		iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   146  		if err != nil {
   147  			return err
   148  		}
   149  		i := newIterAdapter(iter)
   150  		if !i.SeekGE([]byte(k), base.SeekGEFlagsNone) || string(i.Key().UserKey) != k {
   151  			return errors.Errorf("Find %q: key was not in the table", k)
   152  		}
   153  		if k1 := i.Key().UserKey; len(k1) != cap(k1) {
   154  			return errors.Errorf("Find %q: len(k1)=%d, cap(k1)=%d", k, len(k1), cap(k1))
   155  		}
   156  		if string(i.Value()) != v {
   157  			return errors.Errorf("Find %q: got value %q, want %q", k, i.Value(), v)
   158  		}
   159  		if v1 := i.Value(); len(v1) != cap(v1) {
   160  			return errors.Errorf("Find %q: len(v1)=%d, cap(v1)=%d", k, len(v1), cap(v1))
   161  		}
   162  
   163  		// Check using SeekLT.
   164  		if !i.SeekLT([]byte(k), base.SeekLTFlagsNone) {
   165  			i.First()
   166  		} else {
   167  			i.Next()
   168  		}
   169  		if string(i.Key().UserKey) != k {
   170  			return errors.Errorf("Find %q: key was not in the table", k)
   171  		}
   172  		if k1 := i.Key().UserKey; len(k1) != cap(k1) {
   173  			return errors.Errorf("Find %q: len(k1)=%d, cap(k1)=%d", k, len(k1), cap(k1))
   174  		}
   175  		if string(i.Value()) != v {
   176  			return errors.Errorf("Find %q: got value %q, want %q", k, i.Value(), v)
   177  		}
   178  		if v1 := i.Value(); len(v1) != cap(v1) {
   179  			return errors.Errorf("Find %q: len(v1)=%d, cap(v1)=%d", k, len(v1), cap(v1))
   180  		}
   181  
   182  		if err := i.Close(); err != nil {
   183  			return err
   184  		}
   185  	}
   186  
   187  	// Check that nonsense words are not in the table.
   188  	for _, s := range nonsenseWords {
   189  		// Check using Get.
   190  		if _, err := r.get([]byte(s)); err != base.ErrNotFound {
   191  			return errors.Errorf("Get %q: got %v, want ErrNotFound", s, err)
   192  		}
   193  
   194  		// Check using Find.
   195  		iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   196  		if err != nil {
   197  			return err
   198  		}
   199  		i := newIterAdapter(iter)
   200  		if i.SeekGE([]byte(s), base.SeekGEFlagsNone) && s == string(i.Key().UserKey) {
   201  			return errors.Errorf("Find %q: unexpectedly found key in the table", s)
   202  		}
   203  		if err := i.Close(); err != nil {
   204  			return err
   205  		}
   206  	}
   207  
   208  	// Check that the number of keys >= a given start key matches the expected number.
   209  	var countTests = []struct {
   210  		count int
   211  		start string
   212  	}{
   213  		// cat h.txt | cut -c 9- | wc -l gives 1710.
   214  		{1710, ""},
   215  		// cat h.txt | cut -c 9- | grep -v "^[a-b]" | wc -l gives 1522.
   216  		{1522, "c"},
   217  		// cat h.txt | cut -c 9- | grep -v "^[a-j]" | wc -l gives 940.
   218  		{940, "k"},
   219  		// cat h.txt | cut -c 9- | grep -v "^[a-x]" | wc -l gives 12.
   220  		{12, "y"},
   221  		// cat h.txt | cut -c 9- | grep -v "^[a-z]" | wc -l gives 0.
   222  		{0, "~"},
   223  	}
   224  	for _, ct := range countTests {
   225  		iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   226  		if err != nil {
   227  			return err
   228  		}
   229  		n, i := 0, newIterAdapter(iter)
   230  		for valid := i.SeekGE([]byte(ct.start), base.SeekGEFlagsNone); valid; valid = i.Next() {
   231  			n++
   232  		}
   233  		if n != ct.count {
   234  			return errors.Errorf("count %q: got %d, want %d", ct.start, n, ct.count)
   235  		}
   236  		n = 0
   237  		for valid := i.Last(); valid; valid = i.Prev() {
   238  			if bytes.Compare(i.Key().UserKey, []byte(ct.start)) < 0 {
   239  				break
   240  			}
   241  			n++
   242  		}
   243  		if n != ct.count {
   244  			return errors.Errorf("count %q: got %d, want %d", ct.start, n, ct.count)
   245  		}
   246  		if err := i.Close(); err != nil {
   247  			return err
   248  		}
   249  	}
   250  
   251  	// Check lower/upper bounds behavior. Randomly choose a lower and upper bound
   252  	// and then guarantee that iteration finds the expected number if entries.
   253  	rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   254  	sort.Strings(words)
   255  	for i := 0; i < 10; i++ {
   256  		lowerIdx := -1
   257  		upperIdx := len(words)
   258  		if rng.Intn(5) != 0 {
   259  			lowerIdx = rng.Intn(len(words))
   260  		}
   261  		if rng.Intn(5) != 0 {
   262  			upperIdx = rng.Intn(len(words))
   263  		}
   264  		if lowerIdx > upperIdx {
   265  			lowerIdx, upperIdx = upperIdx, lowerIdx
   266  		}
   267  
   268  		var lower, upper []byte
   269  		if lowerIdx >= 0 {
   270  			lower = []byte(words[lowerIdx])
   271  		} else {
   272  			lowerIdx = 0
   273  		}
   274  		if upperIdx < len(words) {
   275  			upper = []byte(words[upperIdx])
   276  		}
   277  
   278  		iter, err := r.NewIter(lower, upper)
   279  		if err != nil {
   280  			return err
   281  		}
   282  		i := newIterAdapter(iter)
   283  
   284  		if lower == nil {
   285  			n := 0
   286  			for valid := i.First(); valid; valid = i.Next() {
   287  				n++
   288  			}
   289  			if expected := upperIdx; expected != n {
   290  				return errors.Errorf("expected %d, but found %d", expected, n)
   291  			}
   292  		}
   293  
   294  		if upper == nil {
   295  			n := 0
   296  			for valid := i.Last(); valid; valid = i.Prev() {
   297  				n++
   298  			}
   299  			if expected := len(words) - lowerIdx; expected != n {
   300  				return errors.Errorf("expected %d, but found %d", expected, n)
   301  			}
   302  		}
   303  
   304  		if lower != nil {
   305  			n := 0
   306  			for valid := i.SeekGE(lower, base.SeekGEFlagsNone); valid; valid = i.Next() {
   307  				n++
   308  			}
   309  			if expected := upperIdx - lowerIdx; expected != n {
   310  				return errors.Errorf("expected %d, but found %d", expected, n)
   311  			}
   312  		}
   313  
   314  		if upper != nil {
   315  			n := 0
   316  			for valid := i.SeekLT(upper, base.SeekLTFlagsNone); valid; valid = i.Prev() {
   317  				n++
   318  			}
   319  			if expected := upperIdx - lowerIdx; expected != n {
   320  				return errors.Errorf("expected %d, but found %d", expected, n)
   321  			}
   322  		}
   323  
   324  		if err := i.Close(); err != nil {
   325  			return err
   326  		}
   327  	}
   328  
   329  	return r.Close()
   330  }
   331  
   332  var (
   333  	memFileSystem = vfs.NewMem()
   334  	tmpFileCount  int
   335  )
   336  
   337  func build(
   338  	compression Compression,
   339  	fp FilterPolicy,
   340  	ftype FilterType,
   341  	comparer *Comparer,
   342  	propCollector func() TablePropertyCollector,
   343  	blockSize int,
   344  	indexBlockSize int,
   345  ) (vfs.File, error) {
   346  	// Create a sorted list of wordCount's keys.
   347  	keys := make([]string, len(wordCount))
   348  	i := 0
   349  	for k := range wordCount {
   350  		keys[i] = k
   351  		i++
   352  	}
   353  	sort.Strings(keys)
   354  
   355  	// Write the key/value pairs to a new table, in increasing key order.
   356  	filename := fmt.Sprintf("/tmp%d", tmpFileCount)
   357  	f0, err := memFileSystem.Create(filename)
   358  	if err != nil {
   359  		return nil, err
   360  	}
   361  	tmpFileCount++
   362  
   363  	writerOpts := WriterOptions{
   364  		BlockSize:      blockSize,
   365  		Comparer:       comparer,
   366  		Compression:    compression,
   367  		FilterPolicy:   fp,
   368  		FilterType:     ftype,
   369  		IndexBlockSize: indexBlockSize,
   370  		MergerName:     "nullptr",
   371  	}
   372  	if propCollector != nil {
   373  		writerOpts.TablePropertyCollectors = append(writerOpts.TablePropertyCollectors, propCollector)
   374  	}
   375  
   376  	w := NewWriter(f0, writerOpts)
   377  	// Use rangeDelV1Format for testing byte equality with RocksDB.
   378  	w.rangeDelV1Format = true
   379  	var rangeDelLength int
   380  	var rangeDelCounter int
   381  	var rangeDelStart InternalKey
   382  	for i, k := range keys {
   383  		v := wordCount[k]
   384  		ikey := base.MakeInternalKey([]byte(k), 0, InternalKeyKindSet)
   385  		if err := w.Add(ikey, []byte(v)); err != nil {
   386  			return nil, err
   387  		}
   388  		// This mirrors the logic in `make-table.cc`. It adds range deletions of
   389  		// increasing length for every 100 keys added.
   390  		if i%100 == 0 {
   391  			rangeDelStart = ikey.Clone()
   392  			rangeDelCounter = 0
   393  			rangeDelLength++
   394  		}
   395  		rangeDelCounter++
   396  
   397  		if rangeDelCounter == rangeDelLength {
   398  			if err := w.DeleteRange(rangeDelStart.UserKey, ikey.UserKey); err != nil {
   399  				return nil, err
   400  			}
   401  		}
   402  	}
   403  	if err := w.Close(); err != nil {
   404  		return nil, err
   405  	}
   406  
   407  	// Re-open that filename for reading.
   408  	f1, err := memFileSystem.Open(filename)
   409  	if err != nil {
   410  		return nil, err
   411  	}
   412  	return f1, nil
   413  }
   414  
   415  func testReader(t *testing.T, filename string, comparer *Comparer, fp FilterPolicy) {
   416  	// Check that we can read a pre-made table.
   417  	f, err := os.Open(filepath.FromSlash("testdata/" + filename))
   418  	if err != nil {
   419  		t.Error(err)
   420  		return
   421  	}
   422  	err = check(f, comparer, fp)
   423  	if err != nil {
   424  		t.Error(err)
   425  		return
   426  	}
   427  }
   428  
   429  func TestReaderLevelDB(t *testing.T)            { testReader(t, "h.ldb", nil, nil) }
   430  func TestReaderDefaultCompression(t *testing.T) { testReader(t, "h.sst", nil, nil) }
   431  func TestReaderNoCompression(t *testing.T)      { testReader(t, "h.no-compression.sst", nil, nil) }
   432  func TestReaderBlockBloomIgnored(t *testing.T) {
   433  	testReader(t, "h.block-bloom.no-compression.sst", nil, nil)
   434  }
   435  func TestReaderTableBloomIgnored(t *testing.T) {
   436  	testReader(t, "h.table-bloom.no-compression.sst", nil, nil)
   437  }
   438  
   439  func TestReaderBloomUsed(t *testing.T) {
   440  	// wantActualNegatives is the minimum number of nonsense words (i.e. false
   441  	// positives or true negatives) to run through our filter. Some nonsense
   442  	// words might be rejected even before the filtering step, if they are out
   443  	// of the [minWord, maxWord] range of keys in the table.
   444  	wantActualNegatives := 0
   445  	for _, s := range nonsenseWords {
   446  		if minWord < s && s < maxWord {
   447  			wantActualNegatives++
   448  		}
   449  	}
   450  
   451  	files := []struct {
   452  		path     string
   453  		comparer *Comparer
   454  	}{
   455  		{"h.table-bloom.no-compression.sst", nil},
   456  		{"h.table-bloom.no-compression.prefix_extractor.no_whole_key_filter.sst", fixtureComparer},
   457  	}
   458  	for _, tc := range files {
   459  		t.Run(tc.path, func(t *testing.T) {
   460  			for _, degenerate := range []bool{false, true} {
   461  				t.Run(fmt.Sprintf("degenerate=%t", degenerate), func(t *testing.T) {
   462  					c := &countingFilterPolicy{
   463  						FilterPolicy: bloom.FilterPolicy(10),
   464  						degenerate:   degenerate,
   465  					}
   466  					testReader(t, tc.path, tc.comparer, c)
   467  
   468  					if c.truePositives != len(wordCount) {
   469  						t.Errorf("degenerate=%t: true positives: got %d, want %d", degenerate, c.truePositives, len(wordCount))
   470  					}
   471  					if c.falseNegatives != 0 {
   472  						t.Errorf("degenerate=%t: false negatives: got %d, want %d", degenerate, c.falseNegatives, 0)
   473  					}
   474  
   475  					if got := c.falsePositives + c.trueNegatives; got < wantActualNegatives {
   476  						t.Errorf("degenerate=%t: actual negatives (false positives + true negatives): "+
   477  							"got %d (%d + %d), want >= %d",
   478  							degenerate, got, c.falsePositives, c.trueNegatives, wantActualNegatives)
   479  					}
   480  
   481  					if !degenerate {
   482  						// The true negative count should be much greater than the false
   483  						// positive count.
   484  						if c.trueNegatives < 10*c.falsePositives {
   485  							t.Errorf("degenerate=%t: true negative to false positive ratio (%d:%d) is too small",
   486  								degenerate, c.trueNegatives, c.falsePositives)
   487  						}
   488  					}
   489  				})
   490  			}
   491  		})
   492  	}
   493  }
   494  
   495  func TestBloomFilterFalsePositiveRate(t *testing.T) {
   496  	f, err := os.Open(filepath.FromSlash("testdata/h.table-bloom.no-compression.sst"))
   497  	require.NoError(t, err)
   498  
   499  	c := &countingFilterPolicy{
   500  		FilterPolicy: bloom.FilterPolicy(1),
   501  	}
   502  	r, err := NewReader(f, ReaderOptions{
   503  		Filters: map[string]FilterPolicy{
   504  			c.Name(): c,
   505  		},
   506  	})
   507  	require.NoError(t, err)
   508  
   509  	const n = 10000
   510  	// key is a buffer that will be re-used for n Get calls, each with a
   511  	// different key. The "m" in the 2-byte prefix means that the key falls in
   512  	// the [minWord, maxWord] range and so will not be rejected prior to
   513  	// applying the Bloom filter. The "!" in the 2-byte prefix means that the
   514  	// key is not actually in the table. The filter will only see actual
   515  	// negatives: false positives or true negatives.
   516  	key := []byte("m!....")
   517  	for i := 0; i < n; i++ {
   518  		binary.LittleEndian.PutUint32(key[2:6], uint32(i))
   519  		r.get(key)
   520  	}
   521  
   522  	if c.truePositives != 0 {
   523  		t.Errorf("true positives: got %d, want 0", c.truePositives)
   524  	}
   525  	if c.falseNegatives != 0 {
   526  		t.Errorf("false negatives: got %d, want 0", c.falseNegatives)
   527  	}
   528  	if got := c.falsePositives + c.trueNegatives; got != n {
   529  		t.Errorf("actual negatives (false positives + true negatives): got %d (%d + %d), want %d",
   530  			got, c.falsePositives, c.trueNegatives, n)
   531  	}
   532  
   533  	// According the the comments in the C++ LevelDB code, the false positive
   534  	// rate should be approximately 1% for for bloom.FilterPolicy(10). The 10
   535  	// was the parameter used to write the .sst file. When reading the file,
   536  	// the 1 in the bloom.FilterPolicy(1) above doesn't matter, only the
   537  	// bloom.FilterPolicy matters.
   538  	if got := float64(100*c.falsePositives) / n; got < 0.2 || 5 < got {
   539  		t.Errorf("false positive rate: got %v%%, want approximately 1%%", got)
   540  	}
   541  
   542  	require.NoError(t, r.Close())
   543  }
   544  
   545  type countingFilterPolicy struct {
   546  	FilterPolicy
   547  	degenerate bool
   548  
   549  	truePositives  int
   550  	falsePositives int
   551  	falseNegatives int
   552  	trueNegatives  int
   553  }
   554  
   555  func (c *countingFilterPolicy) MayContain(ftype FilterType, filter, key []byte) bool {
   556  	got := true
   557  	if c.degenerate {
   558  		// When degenerate is true, we override the embedded FilterPolicy's
   559  		// MayContain method to always return true. Doing so is a valid, if
   560  		// inefficient, implementation of the FilterPolicy interface.
   561  	} else {
   562  		got = c.FilterPolicy.MayContain(ftype, filter, key)
   563  	}
   564  	_, want := wordCount[string(key)]
   565  
   566  	switch {
   567  	case got && want:
   568  		c.truePositives++
   569  	case got && !want:
   570  		c.falsePositives++
   571  	case !got && want:
   572  		c.falseNegatives++
   573  	case !got && !want:
   574  		c.trueNegatives++
   575  	}
   576  	return got
   577  }
   578  
   579  func TestWriterRoundTrip(t *testing.T) {
   580  	blockSizes := []int{100, 1000, 2048, 4096, math.MaxInt32}
   581  	for _, blockSize := range blockSizes {
   582  		for _, indexBlockSize := range blockSizes {
   583  			for name, fp := range map[string]FilterPolicy{
   584  				"none":       nil,
   585  				"bloom10bit": bloom.FilterPolicy(10),
   586  			} {
   587  				t.Run(fmt.Sprintf("bloom=%s", name), func(t *testing.T) {
   588  					f, err := build(DefaultCompression, fp, TableFilter,
   589  						nil, nil, blockSize, indexBlockSize)
   590  					require.NoError(t, err)
   591  
   592  					// Check that we can read a freshly made table.
   593  					require.NoError(t, check(f, nil, nil))
   594  				})
   595  			}
   596  		}
   597  	}
   598  }
   599  
   600  func TestFinalBlockIsWritten(t *testing.T) {
   601  	keys := []string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J"}
   602  	valueLengths := []int{0, 1, 22, 28, 33, 40, 50, 61, 87, 100, 143, 200}
   603  	xxx := bytes.Repeat([]byte("x"), valueLengths[len(valueLengths)-1])
   604  	for _, blockSize := range []int{5, 10, 25, 50, 100} {
   605  		for _, indexBlockSize := range []int{5, 10, 25, 50, 100, math.MaxInt32} {
   606  			for nk := 0; nk <= len(keys); nk++ {
   607  			loop:
   608  				for _, vLen := range valueLengths {
   609  					got, memFS := 0, vfs.NewMem()
   610  
   611  					wf, err := memFS.Create("foo")
   612  					if err != nil {
   613  						t.Errorf("nk=%d, vLen=%d: memFS create: %v", nk, vLen, err)
   614  						continue
   615  					}
   616  					w := NewWriter(wf, WriterOptions{
   617  						BlockSize:      blockSize,
   618  						IndexBlockSize: indexBlockSize,
   619  					})
   620  					for _, k := range keys[:nk] {
   621  						if err := w.Add(InternalKey{UserKey: []byte(k)}, xxx[:vLen]); err != nil {
   622  							t.Errorf("nk=%d, vLen=%d: set: %v", nk, vLen, err)
   623  							continue loop
   624  						}
   625  					}
   626  					if err := w.Close(); err != nil {
   627  						t.Errorf("nk=%d, vLen=%d: writer close: %v", nk, vLen, err)
   628  						continue
   629  					}
   630  
   631  					rf, err := memFS.Open("foo")
   632  					if err != nil {
   633  						t.Errorf("nk=%d, vLen=%d: memFS open: %v", nk, vLen, err)
   634  						continue
   635  					}
   636  					r, err := NewReader(rf, ReaderOptions{})
   637  					if err != nil {
   638  						t.Errorf("nk=%d, vLen=%d: reader open: %v", nk, vLen, err)
   639  					}
   640  					iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   641  					require.NoError(t, err)
   642  					i := newIterAdapter(iter)
   643  					for valid := i.First(); valid; valid = i.Next() {
   644  						got++
   645  					}
   646  					if err := i.Close(); err != nil {
   647  						t.Errorf("nk=%d, vLen=%d: Iterator close: %v", nk, vLen, err)
   648  						continue
   649  					}
   650  					if err := r.Close(); err != nil {
   651  						t.Errorf("nk=%d, vLen=%d: reader close: %v", nk, vLen, err)
   652  						continue
   653  					}
   654  
   655  					if got != nk {
   656  						t.Errorf("nk=%2d, vLen=%3d: got %2d keys, want %2d", nk, vLen, got, nk)
   657  						continue
   658  					}
   659  				}
   660  			}
   661  		}
   662  	}
   663  }
   664  
   665  func TestReaderGlobalSeqNum(t *testing.T) {
   666  	f, err := os.Open(filepath.FromSlash("testdata/h.sst"))
   667  	require.NoError(t, err)
   668  
   669  	r, err := NewReader(f, ReaderOptions{})
   670  	require.NoError(t, err)
   671  
   672  	const globalSeqNum = 42
   673  	r.Properties.GlobalSeqNum = globalSeqNum
   674  
   675  	iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   676  	require.NoError(t, err)
   677  	i := newIterAdapter(iter)
   678  	for valid := i.First(); valid; valid = i.Next() {
   679  		if globalSeqNum != i.Key().SeqNum() {
   680  			t.Fatalf("expected %d, but found %d", globalSeqNum, i.Key().SeqNum())
   681  		}
   682  	}
   683  	require.NoError(t, i.Close())
   684  	require.NoError(t, r.Close())
   685  }
   686  
   687  func TestMetaIndexEntriesSorted(t *testing.T) {
   688  	f, err := build(DefaultCompression, nil, /* filter policy */
   689  		TableFilter, nil, nil, 4096, 4096)
   690  	require.NoError(t, err)
   691  
   692  	r, err := NewReader(f, ReaderOptions{})
   693  	require.NoError(t, err)
   694  
   695  	b, _, err := r.readBlock(r.metaIndexBH, nil /* transform */, nil /* attrs */)
   696  	require.NoError(t, err)
   697  	defer b.Release()
   698  
   699  	i, err := newRawBlockIter(bytes.Compare, b.Get())
   700  	require.NoError(t, err)
   701  
   702  	var keys []string
   703  	for valid := i.First(); valid; valid = i.Next() {
   704  		keys = append(keys, string(i.Key().UserKey))
   705  	}
   706  	if !sort.StringsAreSorted(keys) {
   707  		t.Fatalf("metaindex block out of order: %v", keys)
   708  	}
   709  
   710  	require.NoError(t, i.Close())
   711  	require.NoError(t, r.Close())
   712  }
   713  
   714  func TestFooterRoundTrip(t *testing.T) {
   715  	buf := make([]byte, 100+maxFooterLen)
   716  	for format := TableFormatLevelDB; format < TableFormatMax; format++ {
   717  		t.Run(fmt.Sprintf("format=%s", format), func(t *testing.T) {
   718  			checksums := []ChecksumType{ChecksumTypeCRC32c}
   719  			if format != TableFormatLevelDB {
   720  				checksums = []ChecksumType{ChecksumTypeCRC32c, ChecksumTypeXXHash64}
   721  			}
   722  			for _, checksum := range checksums {
   723  				t.Run(fmt.Sprintf("checksum=%d", checksum), func(t *testing.T) {
   724  					footer := footer{
   725  						format:      format,
   726  						checksum:    checksum,
   727  						metaindexBH: BlockHandle{Offset: 1, Length: 2},
   728  						indexBH:     BlockHandle{Offset: 3, Length: 4},
   729  					}
   730  					for _, offset := range []int64{0, 1, 100} {
   731  						t.Run(fmt.Sprintf("offset=%d", offset), func(t *testing.T) {
   732  							mem := vfs.NewMem()
   733  							f, err := mem.Create("test")
   734  							require.NoError(t, err)
   735  
   736  							_, err = f.Write(buf[:offset])
   737  							require.NoError(t, err)
   738  
   739  							encoded := footer.encode(buf[100:])
   740  							_, err = f.Write(encoded)
   741  							require.NoError(t, err)
   742  							require.NoError(t, f.Close())
   743  
   744  							footer.footerBH.Offset = uint64(offset)
   745  							footer.footerBH.Length = uint64(len(encoded))
   746  
   747  							f, err = mem.Open("test")
   748  							require.NoError(t, err)
   749  
   750  							result, err := readFooter(f)
   751  							require.NoError(t, err)
   752  							require.NoError(t, f.Close())
   753  
   754  							if diff := pretty.Diff(footer, result); diff != nil {
   755  								t.Fatalf("expected %+v, but found %+v\n%s",
   756  									footer, result, strings.Join(diff, "\n"))
   757  							}
   758  						})
   759  					}
   760  				})
   761  			}
   762  		})
   763  	}
   764  }
   765  
   766  func TestReadFooter(t *testing.T) {
   767  	encode := func(format TableFormat, checksum ChecksumType) string {
   768  		f := footer{
   769  			format:   format,
   770  			checksum: checksum,
   771  		}
   772  		return string(f.encode(make([]byte, maxFooterLen)))
   773  	}
   774  
   775  	testCases := []struct {
   776  		encoded  string
   777  		expected string
   778  	}{
   779  		{strings.Repeat("a", minFooterLen-1), "file size is too small"},
   780  		{strings.Repeat("a", levelDBFooterLen), "bad magic number"},
   781  		{strings.Repeat("a", rocksDBFooterLen), "bad magic number"},
   782  		{encode(TableFormatLevelDB, 0)[1:], "file size is too small"},
   783  		{encode(TableFormatRocksDBv2, 0)[1:], "footer too short"},
   784  		{encode(TableFormatRocksDBv2, ChecksumTypeNone), "unsupported checksum type"},
   785  		{encode(TableFormatRocksDBv2, ChecksumTypeXXHash), "unsupported checksum type"},
   786  	}
   787  	for _, c := range testCases {
   788  		t.Run("", func(t *testing.T) {
   789  			mem := vfs.NewMem()
   790  			f, err := mem.Create("test")
   791  			require.NoError(t, err)
   792  
   793  			_, err = f.Write([]byte(c.encoded))
   794  			require.NoError(t, err)
   795  			require.NoError(t, f.Close())
   796  
   797  			f, err = mem.Open("test")
   798  			require.NoError(t, err)
   799  
   800  			if _, err := readFooter(f); err == nil {
   801  				t.Fatalf("expected %q, but found success", c.expected)
   802  			} else if !strings.Contains(err.Error(), c.expected) {
   803  				t.Fatalf("expected %q, but found %v", c.expected, err)
   804  			}
   805  		})
   806  	}
   807  }
   808  
   809  type errorPropCollector struct{}
   810  
   811  func (errorPropCollector) Add(key InternalKey, _ []byte) error {
   812  	return errors.Errorf("add %s failed", key)
   813  }
   814  
   815  func (errorPropCollector) Finish(_ map[string]string) error {
   816  	return errors.Errorf("finish failed")
   817  }
   818  
   819  func (errorPropCollector) Name() string {
   820  	return "errorPropCollector"
   821  }
   822  
   823  func TestTablePropertyCollectorErrors(t *testing.T) {
   824  
   825  	var testcases map[string]func(w *Writer) error = map[string]func(w *Writer) error{
   826  		"add a#0,1 failed": func(w *Writer) error {
   827  			return w.Set([]byte("a"), []byte("b"))
   828  		},
   829  		"add c#0,0 failed": func(w *Writer) error {
   830  			return w.Delete([]byte("c"))
   831  		},
   832  		"add d#0,15 failed": func(w *Writer) error {
   833  			return w.DeleteRange([]byte("d"), []byte("e"))
   834  		},
   835  		"add f#0,2 failed": func(w *Writer) error {
   836  			return w.Merge([]byte("f"), []byte("g"))
   837  		},
   838  		"finish failed": func(w *Writer) error {
   839  			return w.Close()
   840  		},
   841  	}
   842  
   843  	for e, fun := range testcases {
   844  		mem := vfs.NewMem()
   845  		f, err := mem.Create("foo")
   846  		require.NoError(t, err)
   847  
   848  		var opts WriterOptions
   849  		opts.TablePropertyCollectors = append(opts.TablePropertyCollectors,
   850  			func() TablePropertyCollector {
   851  				return errorPropCollector{}
   852  			})
   853  
   854  		w := NewWriter(f, opts)
   855  
   856  		require.Regexp(t, e, fun(w))
   857  	}
   858  }