github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/writer_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"math/rand"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"testing"
    16  	"unsafe"
    17  
    18  	"github.com/cockroachdb/datadriven"
    19  	"github.com/cockroachdb/errors"
    20  	"github.com/cockroachdb/pebble/bloom"
    21  	"github.com/cockroachdb/pebble/internal/base"
    22  	"github.com/cockroachdb/pebble/internal/cache"
    23  	"github.com/cockroachdb/pebble/internal/humanize"
    24  	"github.com/cockroachdb/pebble/internal/testkeys"
    25  	"github.com/cockroachdb/pebble/objstorage"
    26  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    27  	"github.com/cockroachdb/pebble/vfs"
    28  	"github.com/stretchr/testify/require"
    29  )
    30  
    31  func testWriterParallelism(t *testing.T, parallelism bool) {
    32  	for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} {
    33  		tdFile := "testdata/writer"
    34  		if format == TableFormatPebblev3 {
    35  			tdFile = "testdata/writer_v3"
    36  		}
    37  		t.Run(format.String(), func(t *testing.T) { runDataDriven(t, tdFile, format, parallelism) })
    38  	}
    39  }
    40  func TestWriter(t *testing.T) {
    41  	testWriterParallelism(t, false)
    42  }
    43  
    44  func testRewriterParallelism(t *testing.T, parallelism bool) {
    45  	for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} {
    46  		tdFile := "testdata/rewriter"
    47  		if format == TableFormatPebblev3 {
    48  			tdFile = "testdata/rewriter_v3"
    49  		}
    50  		t.Run(format.String(), func(t *testing.T) { runDataDriven(t, tdFile, format, parallelism) })
    51  	}
    52  }
    53  
    54  func TestRewriter(t *testing.T) {
    55  	testRewriterParallelism(t, false)
    56  }
    57  
    58  func TestWriterParallel(t *testing.T) {
    59  	testWriterParallelism(t, true)
    60  }
    61  
    62  func TestRewriterParallel(t *testing.T) {
    63  	testRewriterParallelism(t, true)
    64  }
    65  
    66  func runDataDriven(t *testing.T, file string, tableFormat TableFormat, parallelism bool) {
    67  	var r *Reader
    68  	defer func() {
    69  		if r != nil {
    70  			require.NoError(t, r.Close())
    71  		}
    72  	}()
    73  
    74  	format := func(td *datadriven.TestData, m *WriterMetadata) string {
    75  		var requestedProps []string
    76  		for _, cmdArg := range td.CmdArgs {
    77  			switch cmdArg.Key {
    78  			case "props":
    79  				requestedProps = cmdArg.Vals
    80  			}
    81  		}
    82  
    83  		var b bytes.Buffer
    84  		if m.HasPointKeys {
    85  			fmt.Fprintf(&b, "point:    [%s-%s]\n", m.SmallestPoint, m.LargestPoint)
    86  		}
    87  		if m.HasRangeDelKeys {
    88  			fmt.Fprintf(&b, "rangedel: [%s-%s]\n", m.SmallestRangeDel, m.LargestRangeDel)
    89  		}
    90  		if m.HasRangeKeys {
    91  			fmt.Fprintf(&b, "rangekey: [%s-%s]\n", m.SmallestRangeKey, m.LargestRangeKey)
    92  		}
    93  		fmt.Fprintf(&b, "seqnums:  [%d-%d]\n", m.SmallestSeqNum, m.LargestSeqNum)
    94  
    95  		if len(requestedProps) > 0 {
    96  			props := strings.Split(r.Properties.String(), "\n")
    97  			for _, requestedProp := range requestedProps {
    98  				fmt.Fprintf(&b, "props %q:\n", requestedProp)
    99  				for _, prop := range props {
   100  					if strings.Contains(prop, requestedProp) {
   101  						fmt.Fprintf(&b, "  %s\n", prop)
   102  					}
   103  				}
   104  			}
   105  		}
   106  
   107  		return b.String()
   108  	}
   109  
   110  	datadriven.RunTest(t, file, func(t *testing.T, td *datadriven.TestData) string {
   111  		switch td.Cmd {
   112  		case "build":
   113  			if r != nil {
   114  				_ = r.Close()
   115  				r = nil
   116  			}
   117  			var meta *WriterMetadata
   118  			var err error
   119  			meta, r, err = runBuildCmd(td, &WriterOptions{
   120  				TableFormat: tableFormat,
   121  				Parallelism: parallelism,
   122  			}, 0)
   123  			if err != nil {
   124  				return err.Error()
   125  			}
   126  			return format(td, meta)
   127  
   128  		case "build-raw":
   129  			if r != nil {
   130  				_ = r.Close()
   131  				r = nil
   132  			}
   133  			var meta *WriterMetadata
   134  			var err error
   135  			meta, r, err = runBuildRawCmd(td, &WriterOptions{
   136  				TableFormat: tableFormat,
   137  			})
   138  			if err != nil {
   139  				return err.Error()
   140  			}
   141  			return format(td, meta)
   142  
   143  		case "scan":
   144  			origIter, err := r.NewIter(nil /* lower */, nil /* upper */)
   145  			if err != nil {
   146  				return err.Error()
   147  			}
   148  			iter := newIterAdapter(origIter)
   149  			defer iter.Close()
   150  
   151  			var buf bytes.Buffer
   152  			for valid := iter.First(); valid; valid = iter.Next() {
   153  				fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), iter.Value())
   154  			}
   155  			return buf.String()
   156  
   157  		case "get":
   158  			var buf bytes.Buffer
   159  			for _, k := range strings.Split(td.Input, "\n") {
   160  				value, err := r.get([]byte(k))
   161  				if err != nil {
   162  					fmt.Fprintf(&buf, "get %s: %s\n", k, err.Error())
   163  				} else {
   164  					fmt.Fprintf(&buf, "%s\n", value)
   165  				}
   166  			}
   167  			return buf.String()
   168  
   169  		case "scan-range-del":
   170  			iter, err := r.NewRawRangeDelIter()
   171  			if err != nil {
   172  				return err.Error()
   173  			}
   174  			if iter == nil {
   175  				return ""
   176  			}
   177  			defer iter.Close()
   178  
   179  			var buf bytes.Buffer
   180  			for s := iter.First(); s != nil; s = iter.Next() {
   181  				fmt.Fprintf(&buf, "%s\n", s)
   182  			}
   183  			return buf.String()
   184  
   185  		case "scan-range-key":
   186  			iter, err := r.NewRawRangeKeyIter()
   187  			if err != nil {
   188  				return err.Error()
   189  			}
   190  			if iter == nil {
   191  				return ""
   192  			}
   193  			defer iter.Close()
   194  
   195  			var buf bytes.Buffer
   196  			for s := iter.First(); s != nil; s = iter.Next() {
   197  				fmt.Fprintf(&buf, "%s\n", s)
   198  			}
   199  			return buf.String()
   200  
   201  		case "layout":
   202  			l, err := r.Layout()
   203  			if err != nil {
   204  				return err.Error()
   205  			}
   206  			verbose := false
   207  			if len(td.CmdArgs) > 0 {
   208  				if td.CmdArgs[0].Key == "verbose" {
   209  					verbose = true
   210  				} else {
   211  					return "unknown arg"
   212  				}
   213  			}
   214  			var buf bytes.Buffer
   215  			l.Describe(&buf, verbose, r, nil)
   216  			return buf.String()
   217  
   218  		case "rewrite":
   219  			var meta *WriterMetadata
   220  			var err error
   221  			meta, r, err = runRewriteCmd(td, r, WriterOptions{
   222  				TableFormat: tableFormat,
   223  			})
   224  			if err != nil {
   225  				return err.Error()
   226  			}
   227  			if err != nil {
   228  				return err.Error()
   229  			}
   230  			return format(td, meta)
   231  
   232  		default:
   233  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   234  		}
   235  	})
   236  }
   237  
   238  func TestWriterWithValueBlocks(t *testing.T) {
   239  	var r *Reader
   240  	defer func() {
   241  		if r != nil {
   242  			require.NoError(t, r.Close())
   243  		}
   244  	}()
   245  	formatVersion := TableFormatMax
   246  	formatMeta := func(m *WriterMetadata) string {
   247  		return fmt.Sprintf("value-blocks: num-values %d, num-blocks: %d, size: %d",
   248  			m.Properties.NumValuesInValueBlocks, m.Properties.NumValueBlocks,
   249  			m.Properties.ValueBlocksSize)
   250  	}
   251  
   252  	parallelism := false
   253  	if rand.Intn(2) == 0 {
   254  		parallelism = true
   255  	}
   256  	t.Logf("writer parallelism %t", parallelism)
   257  	attributeExtractor := func(
   258  		key []byte, keyPrefixLen int, value []byte) (base.ShortAttribute, error) {
   259  		require.NotNil(t, key)
   260  		require.Less(t, 0, keyPrefixLen)
   261  		attribute := base.ShortAttribute(len(value) & '\x07')
   262  		return attribute, nil
   263  	}
   264  
   265  	datadriven.RunTest(t, "testdata/writer_value_blocks", func(t *testing.T, td *datadriven.TestData) string {
   266  		switch td.Cmd {
   267  		case "build":
   268  			if r != nil {
   269  				_ = r.Close()
   270  				r = nil
   271  			}
   272  			var meta *WriterMetadata
   273  			var err error
   274  			var blockSize int
   275  			if td.HasArg("block-size") {
   276  				td.ScanArgs(t, "block-size", &blockSize)
   277  			}
   278  			var inPlaceValueBound UserKeyPrefixBound
   279  			if td.HasArg("in-place-bound") {
   280  				var l, u string
   281  				td.ScanArgs(t, "in-place-bound", &l, &u)
   282  				inPlaceValueBound.Lower = []byte(l)
   283  				inPlaceValueBound.Upper = []byte(u)
   284  			}
   285  			meta, r, err = runBuildCmd(td, &WriterOptions{
   286  				BlockSize:                 blockSize,
   287  				Comparer:                  testkeys.Comparer,
   288  				TableFormat:               formatVersion,
   289  				Parallelism:               parallelism,
   290  				RequiredInPlaceValueBound: inPlaceValueBound,
   291  				ShortAttributeExtractor:   attributeExtractor,
   292  			}, 0)
   293  			if err != nil {
   294  				return err.Error()
   295  			}
   296  			return formatMeta(meta)
   297  
   298  		case "layout":
   299  			l, err := r.Layout()
   300  			if err != nil {
   301  				return err.Error()
   302  			}
   303  			var buf bytes.Buffer
   304  			l.Describe(&buf, true, r, func(key *base.InternalKey, value []byte) {
   305  				fmt.Fprintf(&buf, "  %s:%s\n", key.String(), string(value))
   306  			})
   307  			return buf.String()
   308  
   309  		case "scan-raw":
   310  			// Raw scan does not fetch from value blocks.
   311  			origIter, err := r.NewIter(nil /* lower */, nil /* upper */)
   312  			if err != nil {
   313  				return err.Error()
   314  			}
   315  			forceIgnoreValueBlocks := func(i *singleLevelIterator) {
   316  				i.vbReader = nil
   317  				i.data.lazyValueHandling.vbr = nil
   318  				i.data.lazyValueHandling.hasValuePrefix = false
   319  			}
   320  			switch i := origIter.(type) {
   321  			case *twoLevelIterator:
   322  				forceIgnoreValueBlocks(&i.singleLevelIterator)
   323  			case *singleLevelIterator:
   324  				forceIgnoreValueBlocks(i)
   325  			}
   326  			iter := newIterAdapter(origIter)
   327  			defer iter.Close()
   328  
   329  			var buf bytes.Buffer
   330  			for valid := iter.First(); valid; valid = iter.Next() {
   331  				v := iter.Value()
   332  				if iter.Key().Kind() == InternalKeyKindSet {
   333  					prefix := valuePrefix(v[0])
   334  					setWithSamePrefix := setHasSamePrefix(prefix)
   335  					if isValueHandle(prefix) {
   336  						attribute := getShortAttribute(prefix)
   337  						vh := decodeValueHandle(v[1:])
   338  						fmt.Fprintf(&buf, "%s:value-handle len %d block %d offset %d, att %d, same-pre %t\n",
   339  							iter.Key(), vh.valueLen, vh.blockNum, vh.offsetInBlock, attribute, setWithSamePrefix)
   340  					} else {
   341  						fmt.Fprintf(&buf, "%s:in-place %s, same-pre %t\n", iter.Key(), v[1:], setWithSamePrefix)
   342  					}
   343  				} else {
   344  					fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), v)
   345  				}
   346  			}
   347  			return buf.String()
   348  
   349  		case "scan":
   350  			origIter, err := r.NewIter(nil /* lower */, nil /* upper */)
   351  			if err != nil {
   352  				return err.Error()
   353  			}
   354  			iter := newIterAdapter(origIter)
   355  			defer iter.Close()
   356  			var buf bytes.Buffer
   357  			for valid := iter.First(); valid; valid = iter.Next() {
   358  				fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), iter.Value())
   359  			}
   360  			return buf.String()
   361  
   362  		case "scan-cloned-lazy-values":
   363  			iter, err := r.NewIter(nil /* lower */, nil /* upper */)
   364  			if err != nil {
   365  				return err.Error()
   366  			}
   367  			var fetchers [100]base.LazyFetcher
   368  			var values []base.LazyValue
   369  			n := 0
   370  			var b []byte
   371  			for k, lv := iter.First(); k != nil; k, lv = iter.Next() {
   372  				var lvClone base.LazyValue
   373  				lvClone, b = lv.Clone(b, &fetchers[n])
   374  				if lv.Fetcher != nil {
   375  					_, callerOwned, err := lv.Value(nil)
   376  					require.False(t, callerOwned)
   377  					require.NoError(t, err)
   378  				}
   379  				n++
   380  				values = append(values, lvClone)
   381  			}
   382  			require.NoError(t, iter.Error())
   383  			iter.Close()
   384  			var buf bytes.Buffer
   385  			for i := range values {
   386  				fmt.Fprintf(&buf, "%d", i)
   387  				v, callerOwned, err := values[i].Value(nil)
   388  				require.NoError(t, err)
   389  				if values[i].Fetcher != nil {
   390  					require.True(t, callerOwned)
   391  					fmt.Fprintf(&buf, "(lazy: len %d, attr: %d): %s\n",
   392  						values[i].Len(), values[i].Fetcher.Attribute.ShortAttribute, string(v))
   393  					v2, callerOwned, err := values[i].Value(nil)
   394  					require.NoError(t, err)
   395  					require.True(t, callerOwned)
   396  					require.Equal(t, &v[0], &v2[0])
   397  
   398  				} else {
   399  					require.False(t, callerOwned)
   400  					fmt.Fprintf(&buf, "(in-place: len %d): %s\n", values[i].Len(), string(v))
   401  				}
   402  			}
   403  			return buf.String()
   404  
   405  		default:
   406  			return fmt.Sprintf("unknown command: %s", td.Cmd)
   407  		}
   408  	})
   409  }
   410  
   411  func testBlockBufClear(t *testing.T, b1, b2 *blockBuf) {
   412  	require.Equal(t, b1.tmp, b2.tmp)
   413  }
   414  
   415  func TestBlockBufClear(t *testing.T) {
   416  	b1 := &blockBuf{}
   417  	b1.tmp[0] = 1
   418  	b1.compressedBuf = make([]byte, 1)
   419  	b1.clear()
   420  	testBlockBufClear(t, b1, &blockBuf{})
   421  }
   422  
   423  func TestClearDataBlockBuf(t *testing.T) {
   424  	d := newDataBlockBuf(1, ChecksumTypeCRC32c)
   425  	d.blockBuf.compressedBuf = make([]byte, 1)
   426  	d.dataBlock.add(ikey("apple"), nil)
   427  	d.dataBlock.add(ikey("banana"), nil)
   428  
   429  	d.clear()
   430  	testBlockCleared(t, &d.dataBlock, &blockWriter{})
   431  	testBlockBufClear(t, &d.blockBuf, &blockBuf{})
   432  
   433  	dataBlockBufPool.Put(d)
   434  }
   435  
   436  func TestClearIndexBlockBuf(t *testing.T) {
   437  	i := newIndexBlockBuf(false)
   438  	i.block.add(ikey("apple"), nil)
   439  	i.block.add(ikey("banana"), nil)
   440  	i.clear()
   441  
   442  	testBlockCleared(t, &i.block, &blockWriter{})
   443  	require.Equal(
   444  		t, i.size.estimate, sizeEstimate{emptySize: emptyBlockSize},
   445  	)
   446  	indexBlockBufPool.Put(i)
   447  }
   448  
   449  func TestClearWriteTask(t *testing.T) {
   450  	w := writeTaskPool.Get().(*writeTask)
   451  	ch := make(chan bool, 1)
   452  	w.compressionDone = ch
   453  	w.buf = &dataBlockBuf{}
   454  	w.flushableIndexBlock = &indexBlockBuf{}
   455  	w.currIndexBlock = &indexBlockBuf{}
   456  	w.indexEntrySep = ikey("apple")
   457  	w.indexInflightSize = 1
   458  	w.finishedIndexProps = []byte{'a', 'v'}
   459  
   460  	w.clear()
   461  
   462  	var nilDataBlockBuf *dataBlockBuf
   463  	var nilIndexBlockBuf *indexBlockBuf
   464  	// Channels should be the same(no new channel should be allocated)
   465  	require.Equal(t, w.compressionDone, ch)
   466  	require.Equal(t, w.buf, nilDataBlockBuf)
   467  	require.Equal(t, w.flushableIndexBlock, nilIndexBlockBuf)
   468  	require.Equal(t, w.currIndexBlock, nilIndexBlockBuf)
   469  	require.Equal(t, w.indexEntrySep, base.InvalidInternalKey)
   470  	require.Equal(t, w.indexInflightSize, 0)
   471  	require.Equal(t, w.finishedIndexProps, []byte(nil))
   472  
   473  	writeTaskPool.Put(w)
   474  }
   475  
   476  func TestDoubleClose(t *testing.T) {
   477  	// There is code in Cockroach land which relies on Writer.Close being
   478  	// idempotent. We should test this in Pebble, so that we don't cause
   479  	// Cockroach test failures.
   480  	f := &discardFile{}
   481  	w := NewWriter(f, WriterOptions{
   482  		BlockSize:   1,
   483  		TableFormat: TableFormatPebblev1,
   484  	})
   485  	w.Set(ikey("a").UserKey, nil)
   486  	w.Set(ikey("b").UserKey, nil)
   487  	err := w.Close()
   488  	require.NoError(t, err)
   489  	err = w.Close()
   490  	require.Equal(t, err, errWriterClosed)
   491  }
   492  
   493  func TestParallelWriterErrorProp(t *testing.T) {
   494  	fs := vfs.NewMem()
   495  	f, err := fs.Create("test")
   496  	require.NoError(t, err)
   497  	opts := WriterOptions{
   498  		TableFormat: TableFormatPebblev1, BlockSize: 1, Parallelism: true,
   499  	}
   500  
   501  	w := NewWriter(objstorageprovider.NewFileWritable(f), opts)
   502  	// Directly testing this, because it's difficult to get the Writer to
   503  	// encounter an error, precisely when the writeQueue is doing block writes.
   504  	w.coordination.writeQueue.err = errors.New("write queue write error")
   505  	w.Set(ikey("a").UserKey, nil)
   506  	w.Set(ikey("b").UserKey, nil)
   507  	err = w.Close()
   508  	require.Equal(t, err.Error(), "write queue write error")
   509  }
   510  
   511  func TestSizeEstimate(t *testing.T) {
   512  	var sizeEstimate sizeEstimate
   513  	datadriven.RunTest(t, "testdata/size_estimate",
   514  		func(t *testing.T, td *datadriven.TestData) string {
   515  			switch td.Cmd {
   516  			case "init":
   517  				if len(td.CmdArgs) != 1 {
   518  					return "init <empty size>"
   519  				}
   520  				emptySize, err := strconv.Atoi(td.CmdArgs[0].String())
   521  				if err != nil {
   522  					return "invalid empty size"
   523  				}
   524  				sizeEstimate.init(uint64(emptySize))
   525  				return "success"
   526  			case "clear":
   527  				sizeEstimate.clear()
   528  				return fmt.Sprintf("%d", sizeEstimate.size())
   529  			case "size":
   530  				return fmt.Sprintf("%d", sizeEstimate.size())
   531  			case "add_inflight":
   532  				if len(td.CmdArgs) != 1 {
   533  					return "add_inflight <inflight size estimate>"
   534  				}
   535  				inflightSize, err := strconv.Atoi(td.CmdArgs[0].String())
   536  				if err != nil {
   537  					return "invalid inflight size"
   538  				}
   539  				sizeEstimate.addInflight(inflightSize)
   540  				return fmt.Sprintf("%d", sizeEstimate.size())
   541  			case "entry_written":
   542  				if len(td.CmdArgs) != 2 {
   543  					return "entry_written <new_total_size> <prev_inflight_size>"
   544  				}
   545  				newTotalSize, err := strconv.Atoi(td.CmdArgs[0].String())
   546  				if err != nil {
   547  					return "invalid inflight size"
   548  				}
   549  				inflightSize, err := strconv.Atoi(td.CmdArgs[1].String())
   550  				if err != nil {
   551  					return "invalid inflight size"
   552  				}
   553  				sizeEstimate.writtenWithTotal(uint64(newTotalSize), inflightSize)
   554  				return fmt.Sprintf("%d", sizeEstimate.size())
   555  			case "num_written_entries":
   556  				return fmt.Sprintf("%d", sizeEstimate.numWrittenEntries)
   557  			case "num_inflight_entries":
   558  				return fmt.Sprintf("%d", sizeEstimate.numInflightEntries)
   559  			case "num_entries":
   560  				return fmt.Sprintf("%d", sizeEstimate.numWrittenEntries+sizeEstimate.numInflightEntries)
   561  			default:
   562  				return fmt.Sprintf("unknown command: %s", td.Cmd)
   563  			}
   564  		})
   565  }
   566  
   567  func TestWriterClearCache(t *testing.T) {
   568  	// Verify that Writer clears the cache of blocks that it writes.
   569  	mem := vfs.NewMem()
   570  	opts := ReaderOptions{
   571  		Cache:    cache.New(64 << 20),
   572  		Comparer: testkeys.Comparer,
   573  	}
   574  	defer opts.Cache.Unref()
   575  
   576  	writerOpts := WriterOptions{
   577  		Cache:       opts.Cache,
   578  		Comparer:    testkeys.Comparer,
   579  		TableFormat: TableFormatPebblev3,
   580  	}
   581  	cacheOpts := &cacheOpts{cacheID: 1, fileNum: base.FileNum(1).DiskFileNum()}
   582  	invalidData := func() *cache.Value {
   583  		invalid := []byte("invalid data")
   584  		v := cache.Alloc(len(invalid))
   585  		copy(v.Buf(), invalid)
   586  		return v
   587  	}
   588  
   589  	build := func(name string) {
   590  		f, err := mem.Create(name)
   591  		require.NoError(t, err)
   592  
   593  		w := NewWriter(objstorageprovider.NewFileWritable(f), writerOpts, cacheOpts)
   594  		require.NoError(t, w.Set([]byte("hello"), []byte("world")))
   595  		require.NoError(t, w.Set([]byte("hello@42"), []byte("world@42")))
   596  		require.NoError(t, w.Set([]byte("hello@5"), []byte("world@5")))
   597  		require.NoError(t, w.Close())
   598  	}
   599  
   600  	// Build the sstable a first time so that we can determine the locations of
   601  	// all of the blocks.
   602  	build("test")
   603  
   604  	f, err := mem.Open("test")
   605  	require.NoError(t, err)
   606  
   607  	r, err := newReader(f, opts)
   608  	require.NoError(t, err)
   609  
   610  	layout, err := r.Layout()
   611  	require.NoError(t, err)
   612  
   613  	foreachBH := func(layout *Layout, f func(bh BlockHandle)) {
   614  		for _, bh := range layout.Data {
   615  			f(bh.BlockHandle)
   616  		}
   617  		for _, bh := range layout.Index {
   618  			f(bh)
   619  		}
   620  		f(layout.TopIndex)
   621  		f(layout.Filter)
   622  		f(layout.RangeDel)
   623  		for _, bh := range layout.ValueBlock {
   624  			f(bh)
   625  		}
   626  		if layout.ValueIndex.Length != 0 {
   627  			f(layout.ValueIndex)
   628  		}
   629  		f(layout.Properties)
   630  		f(layout.MetaIndex)
   631  	}
   632  
   633  	// Poison the cache for each of the blocks.
   634  	poison := func(bh BlockHandle) {
   635  		opts.Cache.Set(cacheOpts.cacheID, cacheOpts.fileNum, bh.Offset, invalidData()).Release()
   636  	}
   637  	foreachBH(layout, poison)
   638  
   639  	// Build the table a second time. This should clear the cache for the blocks
   640  	// that are written.
   641  	build("test")
   642  
   643  	// Verify that the written blocks have been cleared from the cache.
   644  	check := func(bh BlockHandle) {
   645  		h := opts.Cache.Get(cacheOpts.cacheID, cacheOpts.fileNum, bh.Offset)
   646  		if h.Get() != nil {
   647  			t.Fatalf("%d: expected cache to be cleared, but found %q", bh.Offset, h.Get())
   648  		}
   649  	}
   650  	foreachBH(layout, check)
   651  
   652  	require.NoError(t, r.Close())
   653  }
   654  
   655  type discardFile struct {
   656  	wrote int64
   657  }
   658  
   659  var _ objstorage.Writable = (*discardFile)(nil)
   660  
   661  func (f *discardFile) Finish() error {
   662  	return nil
   663  }
   664  
   665  func (f *discardFile) Abort() {}
   666  
   667  func (f *discardFile) Write(p []byte) error {
   668  	f.wrote += int64(len(p))
   669  	return nil
   670  }
   671  
   672  type blockPropErrSite uint
   673  
   674  const (
   675  	errSiteAdd blockPropErrSite = iota
   676  	errSiteFinishBlock
   677  	errSiteFinishIndex
   678  	errSiteFinishTable
   679  	errSiteNone
   680  )
   681  
   682  type testBlockPropCollector struct {
   683  	errSite blockPropErrSite
   684  	err     error
   685  }
   686  
   687  func (c *testBlockPropCollector) Name() string { return "testBlockPropCollector" }
   688  
   689  func (c *testBlockPropCollector) Add(_ InternalKey, _ []byte) error {
   690  	if c.errSite == errSiteAdd {
   691  		return c.err
   692  	}
   693  	return nil
   694  }
   695  
   696  func (c *testBlockPropCollector) FinishDataBlock(_ []byte) ([]byte, error) {
   697  	if c.errSite == errSiteFinishBlock {
   698  		return nil, c.err
   699  	}
   700  	return nil, nil
   701  }
   702  
   703  func (c *testBlockPropCollector) AddPrevDataBlockToIndexBlock() {}
   704  
   705  func (c *testBlockPropCollector) FinishIndexBlock(_ []byte) ([]byte, error) {
   706  	if c.errSite == errSiteFinishIndex {
   707  		return nil, c.err
   708  	}
   709  	return nil, nil
   710  }
   711  
   712  func (c *testBlockPropCollector) FinishTable(_ []byte) ([]byte, error) {
   713  	if c.errSite == errSiteFinishTable {
   714  		return nil, c.err
   715  	}
   716  	return nil, nil
   717  }
   718  
   719  func TestWriterBlockPropertiesErrors(t *testing.T) {
   720  	blockPropErr := errors.Newf("block property collector failed")
   721  	testCases := []blockPropErrSite{
   722  		errSiteAdd,
   723  		errSiteFinishBlock,
   724  		errSiteFinishIndex,
   725  		errSiteFinishTable,
   726  		errSiteNone,
   727  	}
   728  
   729  	var (
   730  		k1 = base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet)
   731  		v1 = []byte("apples")
   732  		k2 = base.MakeInternalKey([]byte("b"), 0, base.InternalKeyKindSet)
   733  		v2 = []byte("bananas")
   734  		k3 = base.MakeInternalKey([]byte("c"), 0, base.InternalKeyKindSet)
   735  		v3 = []byte("carrots")
   736  	)
   737  
   738  	for _, tc := range testCases {
   739  		t.Run("", func(t *testing.T) {
   740  			fs := vfs.NewMem()
   741  			f, err := fs.Create("test")
   742  			require.NoError(t, err)
   743  
   744  			w := NewWriter(objstorageprovider.NewFileWritable(f), WriterOptions{
   745  				BlockSize: 1,
   746  				BlockPropertyCollectors: []func() BlockPropertyCollector{
   747  					func() BlockPropertyCollector {
   748  						return &testBlockPropCollector{
   749  							errSite: tc,
   750  							err:     blockPropErr,
   751  						}
   752  					},
   753  				},
   754  				TableFormat: TableFormatPebblev1,
   755  			})
   756  
   757  			err = w.Add(k1, v1)
   758  			switch tc {
   759  			case errSiteAdd:
   760  				require.Error(t, err)
   761  				require.Equal(t, blockPropErr, err)
   762  				return
   763  			case errSiteFinishBlock:
   764  				require.NoError(t, err)
   765  				// Addition of a second key completes the first block.
   766  				err = w.Add(k2, v2)
   767  				require.Error(t, err)
   768  				require.Equal(t, blockPropErr, err)
   769  				return
   770  			case errSiteFinishIndex:
   771  				require.NoError(t, err)
   772  				// Addition of a second key completes the first block.
   773  				err = w.Add(k2, v2)
   774  				require.NoError(t, err)
   775  				// The index entry for the first block is added after the completion of
   776  				// the second block, which is triggered by adding a third key.
   777  				err = w.Add(k3, v3)
   778  				require.Error(t, err)
   779  				require.Equal(t, blockPropErr, err)
   780  				return
   781  			}
   782  
   783  			err = w.Close()
   784  			if tc == errSiteFinishTable {
   785  				require.Error(t, err)
   786  				require.Equal(t, blockPropErr, err)
   787  			} else {
   788  				require.NoError(t, err)
   789  			}
   790  		})
   791  	}
   792  }
   793  
   794  func TestWriter_TableFormatCompatibility(t *testing.T) {
   795  	testCases := []struct {
   796  		name        string
   797  		minFormat   TableFormat
   798  		configureFn func(opts *WriterOptions)
   799  		writeFn     func(w *Writer) error
   800  	}{
   801  		{
   802  			name:      "block properties",
   803  			minFormat: TableFormatPebblev1,
   804  			configureFn: func(opts *WriterOptions) {
   805  				opts.BlockPropertyCollectors = []func() BlockPropertyCollector{
   806  					func() BlockPropertyCollector {
   807  						return NewBlockIntervalCollector(
   808  							"collector", &valueCharBlockIntervalCollector{charIdx: 0}, nil,
   809  						)
   810  					},
   811  				}
   812  			},
   813  		},
   814  		{
   815  			name:      "range keys",
   816  			minFormat: TableFormatPebblev2,
   817  			writeFn: func(w *Writer) error {
   818  				return w.RangeKeyDelete([]byte("a"), []byte("b"))
   819  			},
   820  		},
   821  	}
   822  
   823  	for _, tc := range testCases {
   824  		t.Run(tc.name, func(t *testing.T) {
   825  			for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ {
   826  				t.Run(tf.String(), func(t *testing.T) {
   827  					fs := vfs.NewMem()
   828  					f, err := fs.Create("sst")
   829  					require.NoError(t, err)
   830  
   831  					opts := WriterOptions{TableFormat: tf}
   832  					if tc.configureFn != nil {
   833  						tc.configureFn(&opts)
   834  					}
   835  
   836  					w := NewWriter(objstorageprovider.NewFileWritable(f), opts)
   837  					if tc.writeFn != nil {
   838  						err = tc.writeFn(w)
   839  						require.NoError(t, err)
   840  					}
   841  
   842  					err = w.Close()
   843  					if tf < tc.minFormat {
   844  						require.Error(t, err)
   845  					} else {
   846  						require.NoError(t, err)
   847  					}
   848  				})
   849  			}
   850  		})
   851  	}
   852  }
   853  
   854  // Tests for races, such as https://github.com/cockroachdb/cockroach/issues/77194,
   855  // in the Writer.
   856  func TestWriterRace(t *testing.T) {
   857  	ks := testkeys.Alpha(5)
   858  	ks = ks.EveryN(ks.Count() / 1_000)
   859  	keys := make([][]byte, ks.Count())
   860  	for ki := 0; ki < len(keys); ki++ {
   861  		keys[ki] = testkeys.Key(ks, int64(ki))
   862  	}
   863  	readerOpts := ReaderOptions{
   864  		Comparer: testkeys.Comparer,
   865  		Filters:  map[string]base.FilterPolicy{},
   866  	}
   867  
   868  	var wg sync.WaitGroup
   869  	for i := 0; i < 16; i++ {
   870  		wg.Add(1)
   871  		go func() {
   872  			val := make([]byte, rand.Intn(1000))
   873  			opts := WriterOptions{
   874  				Comparer:    testkeys.Comparer,
   875  				BlockSize:   rand.Intn(1 << 10),
   876  				Compression: NoCompression,
   877  			}
   878  			defer wg.Done()
   879  			f := &memFile{}
   880  			w := NewWriter(f, opts)
   881  			for ki := 0; ki < len(keys); ki++ {
   882  				require.NoError(
   883  					t,
   884  					w.Add(base.MakeInternalKey(keys[ki], uint64(ki), InternalKeyKindSet), val),
   885  				)
   886  				require.Equal(
   887  					t, w.dataBlockBuf.dataBlock.getCurKey().UserKey, keys[ki],
   888  				)
   889  			}
   890  			require.NoError(t, w.Close())
   891  			require.Equal(t, w.meta.LargestPoint.UserKey, keys[len(keys)-1])
   892  			r, err := NewMemReader(f.Data(), readerOpts)
   893  			require.NoError(t, err)
   894  			defer r.Close()
   895  			it, err := r.NewIter(nil, nil)
   896  			require.NoError(t, err)
   897  			defer it.Close()
   898  			ki := 0
   899  			for k, v := it.First(); k != nil; k, v = it.Next() {
   900  				require.Equal(t, k.UserKey, keys[ki])
   901  				vBytes, _, err := v.Value(nil)
   902  				require.NoError(t, err)
   903  				require.Equal(t, vBytes, val)
   904  				ki++
   905  			}
   906  		}()
   907  	}
   908  	wg.Wait()
   909  }
   910  
   911  func TestObsoleteBlockPropertyCollectorFilter(t *testing.T) {
   912  	var c obsoleteKeyBlockPropertyCollector
   913  	var f obsoleteKeyBlockPropertyFilter
   914  	require.Equal(t, c.Name(), f.Name())
   915  	// Data block with 1 obsolete and 1 non-obsolete point.
   916  	c.AddPoint(false)
   917  	c.AddPoint(true)
   918  	finishAndCheck := func(finishFunc func([]byte) ([]byte, error), expectedIntersects bool) {
   919  		var buf [1]byte
   920  		prop, err := finishFunc(buf[:0:1])
   921  		require.NoError(t, err)
   922  		expectedLength := 1
   923  		if expectedIntersects {
   924  			// The common case is encoded in 0 bytes
   925  			expectedLength = 0
   926  		}
   927  		require.Equal(t, expectedLength, len(prop))
   928  		// Confirm that the collector used the slice.
   929  		require.Equal(t, unsafe.Pointer(&buf[0]), unsafe.Pointer(&prop[:1][0]))
   930  		intersects, err := f.Intersects(prop)
   931  		require.NoError(t, err)
   932  		require.Equal(t, expectedIntersects, intersects)
   933  	}
   934  	finishAndCheck(c.FinishDataBlock, true)
   935  	c.AddPrevDataBlockToIndexBlock()
   936  	// Data block with only obsolete points.
   937  	c.AddPoint(true)
   938  	c.AddPoint(true)
   939  	finishAndCheck(c.FinishDataBlock, false)
   940  	c.AddPrevDataBlockToIndexBlock()
   941  	// Index block has one obsolete block and one non-obsolete block.
   942  	finishAndCheck(c.FinishIndexBlock, true)
   943  
   944  	// Data block with obsolete point.
   945  	c.AddPoint(true)
   946  	finishAndCheck(c.FinishDataBlock, false)
   947  	c.AddPrevDataBlockToIndexBlock()
   948  	// Data block with obsolete point.
   949  	c.AddPoint(true)
   950  	finishAndCheck(c.FinishDataBlock, false)
   951  	c.AddPrevDataBlockToIndexBlock()
   952  	// Index block has only obsolete blocks.
   953  	finishAndCheck(c.FinishIndexBlock, false)
   954  	// Table is not obsolete.
   955  	finishAndCheck(c.FinishTable, true)
   956  
   957  	// Reset the collector state.
   958  	c = obsoleteKeyBlockPropertyCollector{}
   959  	// Table with only obsolete blocks.
   960  
   961  	// Data block with obsolete point.
   962  	c.AddPoint(true)
   963  	finishAndCheck(c.FinishDataBlock, false)
   964  	c.AddPrevDataBlockToIndexBlock()
   965  	// Data block with obsolete point.
   966  	c.AddPoint(true)
   967  	finishAndCheck(c.FinishDataBlock, false)
   968  	c.AddPrevDataBlockToIndexBlock()
   969  	// Index block has only obsolete blocks.
   970  	finishAndCheck(c.FinishIndexBlock, false)
   971  	// Table is obsolete.
   972  	finishAndCheck(c.FinishTable, false)
   973  }
   974  
   975  func BenchmarkWriter(b *testing.B) {
   976  	keys := make([][]byte, 1e6)
   977  	const keyLen = 24
   978  	keySlab := make([]byte, keyLen*len(keys))
   979  	for i := range keys {
   980  		key := keySlab[i*keyLen : i*keyLen+keyLen]
   981  		binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix
   982  		binary.BigEndian.PutUint64(key[8:16], 456)
   983  		binary.BigEndian.PutUint64(key[16:], uint64(i))
   984  		keys[i] = key
   985  	}
   986  	for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} {
   987  		b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) {
   988  			runWriterBench(b, keys, nil, format)
   989  		})
   990  	}
   991  }
   992  
   993  func BenchmarkWriterWithVersions(b *testing.B) {
   994  	keys := make([][]byte, 1e6)
   995  	const keyLen = 26
   996  	keySlab := make([]byte, keyLen*len(keys))
   997  	for i := range keys {
   998  		key := keySlab[i*keyLen : i*keyLen+keyLen]
   999  		binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix
  1000  		binary.BigEndian.PutUint64(key[8:16], 456)
  1001  		// @ is ascii value 64. Placing any byte with value 64 in these 8 bytes
  1002  		// will confuse testkeys.Comparer, when we pass it a key after splitting
  1003  		// of the suffix, since Comparer thinks this prefix is also a key with a
  1004  		// suffix. Hence, we print as a base 10 string.
  1005  		require.Equal(b, 8, copy(key[16:], fmt.Sprintf("%8d", i/2)))
  1006  		key[24] = '@'
  1007  		// Ascii representation of single digit integer 2-(i%2).
  1008  		key[25] = byte(48 + 2 - (i % 2))
  1009  		keys[i] = key
  1010  	}
  1011  	// TableFormatPebblev3 can sometimes be ~50% slower than
  1012  	// TableFormatPebblev2, since testkeys.Compare is expensive (mainly due to
  1013  	// split) and with v3 we have to call it twice for 50% of the Set calls,
  1014  	// since they have the same prefix as the preceding key.
  1015  	for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} {
  1016  		b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) {
  1017  			runWriterBench(b, keys, testkeys.Comparer, format)
  1018  		})
  1019  	}
  1020  }
  1021  
  1022  func runWriterBench(b *testing.B, keys [][]byte, comparer *base.Comparer, format TableFormat) {
  1023  	for _, bs := range []int{base.DefaultBlockSize, 32 << 10} {
  1024  		b.Run(fmt.Sprintf("block=%s", humanize.Bytes.Int64(int64(bs))), func(b *testing.B) {
  1025  			for _, filter := range []bool{true, false} {
  1026  				b.Run(fmt.Sprintf("filter=%t", filter), func(b *testing.B) {
  1027  					for _, comp := range []Compression{NoCompression, SnappyCompression, ZstdCompression} {
  1028  						b.Run(fmt.Sprintf("compression=%s", comp), func(b *testing.B) {
  1029  							opts := WriterOptions{
  1030  								BlockRestartInterval: 16,
  1031  								BlockSize:            bs,
  1032  								Comparer:             comparer,
  1033  								Compression:          comp,
  1034  								TableFormat:          format,
  1035  							}
  1036  							if filter {
  1037  								opts.FilterPolicy = bloom.FilterPolicy(10)
  1038  							}
  1039  							f := &discardFile{}
  1040  							b.ResetTimer()
  1041  							for i := 0; i < b.N; i++ {
  1042  								f.wrote = 0
  1043  								w := NewWriter(f, opts)
  1044  
  1045  								for j := range keys {
  1046  									if err := w.Set(keys[j], keys[j]); err != nil {
  1047  										b.Fatal(err)
  1048  									}
  1049  								}
  1050  								if err := w.Close(); err != nil {
  1051  									b.Fatal(err)
  1052  								}
  1053  								b.SetBytes(int64(f.wrote))
  1054  							}
  1055  						})
  1056  					}
  1057  				})
  1058  			}
  1059  		})
  1060  	}
  1061  }
  1062  
  1063  var test4bSuffixComparer = &base.Comparer{
  1064  	Compare:   base.DefaultComparer.Compare,
  1065  	Equal:     base.DefaultComparer.Equal,
  1066  	Separator: base.DefaultComparer.Separator,
  1067  	Successor: base.DefaultComparer.Successor,
  1068  	Split: func(key []byte) int {
  1069  		if len(key) > 4 {
  1070  			return len(key) - 4
  1071  		}
  1072  		return len(key)
  1073  	},
  1074  	Name: "comparer-split-4b-suffix",
  1075  }