github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/sstable/block_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"testing"
    13  	"time"
    14  	"unsafe"
    15  
    16  	"github.com/cockroachdb/datadriven"
    17  	"github.com/cockroachdb/pebble/internal/base"
    18  	"github.com/cockroachdb/pebble/internal/itertest"
    19  	"github.com/stretchr/testify/require"
    20  	"golang.org/x/exp/rand"
    21  )
    22  
    23  func ikey(s string) InternalKey {
    24  	return InternalKey{UserKey: []byte(s)}
    25  }
    26  
    27  func TestBlockWriter(t *testing.T) {
    28  	w := &rawBlockWriter{
    29  		blockWriter: blockWriter{restartInterval: 16},
    30  	}
    31  	w.add(ikey("apple"), nil)
    32  	w.add(ikey("apricot"), nil)
    33  	w.add(ikey("banana"), nil)
    34  	block := w.finish()
    35  
    36  	expected := []byte(
    37  		"\x00\x05\x00apple" +
    38  			"\x02\x05\x00ricot" +
    39  			"\x00\x06\x00banana" +
    40  			"\x00\x00\x00\x00\x01\x00\x00\x00")
    41  	if !bytes.Equal(expected, block) {
    42  		t.Fatalf("expected\n%q\nfound\n%q", expected, block)
    43  	}
    44  }
    45  
    46  func TestBlockWriterWithPrefix(t *testing.T) {
    47  	w := &rawBlockWriter{
    48  		blockWriter: blockWriter{restartInterval: 2},
    49  	}
    50  	curKey := func() string {
    51  		return string(base.DecodeInternalKey(w.curKey).UserKey)
    52  	}
    53  	addAdapter := func(
    54  		key InternalKey,
    55  		value []byte,
    56  		addValuePrefix bool,
    57  		valuePrefix valuePrefix,
    58  		setHasSameKeyPrefix bool) {
    59  		w.addWithOptionalValuePrefix(
    60  			key, false, value, len(key.UserKey), addValuePrefix, valuePrefix, setHasSameKeyPrefix)
    61  	}
    62  	addAdapter(
    63  		ikey("apple"), []byte("red"), false, 0, true)
    64  	require.Equal(t, "apple", curKey())
    65  	require.Equal(t, "red", string(w.curValue))
    66  	addAdapter(
    67  		ikey("apricot"), []byte("orange"), true, '\xff', false)
    68  	require.Equal(t, "apricot", curKey())
    69  	require.Equal(t, "orange", string(w.curValue))
    70  	// Even though this call has setHasSameKeyPrefix=true, the previous call,
    71  	// which was after the last restart set it to false. So the restart encoded
    72  	// with banana has this cumulative bit set to false.
    73  	addAdapter(
    74  		ikey("banana"), []byte("yellow"), true, '\x00', true)
    75  	require.Equal(t, "banana", curKey())
    76  	require.Equal(t, "yellow", string(w.curValue))
    77  	addAdapter(
    78  		ikey("cherry"), []byte("red"), false, 0, true)
    79  	require.Equal(t, "cherry", curKey())
    80  	require.Equal(t, "red", string(w.curValue))
    81  	// All intervening calls has setHasSameKeyPrefix=true, so the cumulative bit
    82  	// will be set to true in this restart.
    83  	addAdapter(
    84  		ikey("mango"), []byte("juicy"), false, 0, true)
    85  	require.Equal(t, "mango", curKey())
    86  	require.Equal(t, "juicy", string(w.curValue))
    87  
    88  	block := w.finish()
    89  
    90  	expected := []byte(
    91  		"\x00\x0d\x03apple\x00\x00\x00\x00\x00\x00\x00\x00red" +
    92  			"\x02\x0d\x07ricot\x00\x00\x00\x00\x00\x00\x00\x00\xfforange" +
    93  			"\x00\x0e\x07banana\x00\x00\x00\x00\x00\x00\x00\x00\x00yellow" +
    94  			"\x00\x0e\x03cherry\x00\x00\x00\x00\x00\x00\x00\x00red" +
    95  			"\x00\x0d\x05mango\x00\x00\x00\x00\x00\x00\x00\x00juicy" +
    96  			// Restarts are:
    97  			// 00000000 (restart at apple), 2a000000 (restart at banana), 56000080 (restart at mango)
    98  			// 03000000 (number of restart, i.e., 3). The restart at mango has 1 in the most significant
    99  			// bit of the uint32, so the last byte in the little endian encoding is \x80.
   100  			"\x00\x00\x00\x00\x2a\x00\x00\x00\x56\x00\x00\x80\x03\x00\x00\x00")
   101  	if !bytes.Equal(expected, block) {
   102  		t.Fatalf("expected\n%x\nfound\n%x", expected, block)
   103  	}
   104  }
   105  
   106  func testBlockCleared(t *testing.T, w, b *blockWriter) {
   107  	require.Equal(t, w.restartInterval, b.restartInterval)
   108  	require.Equal(t, w.nEntries, b.nEntries)
   109  	require.Equal(t, w.nextRestart, b.nextRestart)
   110  	require.Equal(t, len(w.buf), len(b.buf))
   111  	require.Equal(t, len(w.restarts), len(b.restarts))
   112  	require.Equal(t, len(w.curKey), len(b.curKey))
   113  	require.Equal(t, len(w.prevKey), len(b.prevKey))
   114  	require.Equal(t, len(w.curValue), len(b.curValue))
   115  	require.Equal(t, w.tmp, b.tmp)
   116  
   117  	// Make sure that we didn't lose the allocated byte slices.
   118  	require.True(t, cap(w.buf) > 0 && cap(b.buf) == 0)
   119  	require.True(t, cap(w.restarts) > 0 && cap(b.restarts) == 0)
   120  	require.True(t, cap(w.curKey) > 0 && cap(b.curKey) == 0)
   121  	require.True(t, cap(w.prevKey) > 0 && cap(b.prevKey) == 0)
   122  	require.True(t, cap(w.curValue) > 0 && cap(b.curValue) == 0)
   123  }
   124  
   125  func TestBlockClear(t *testing.T) {
   126  	w := blockWriter{restartInterval: 16}
   127  	w.add(ikey("apple"), nil)
   128  	w.add(ikey("apricot"), nil)
   129  	w.add(ikey("banana"), nil)
   130  
   131  	w.clear()
   132  
   133  	// Once a block is cleared, we expect its fields to be cleared, but we expect
   134  	// it to keep its allocated byte slices.
   135  	b := blockWriter{}
   136  	testBlockCleared(t, &w, &b)
   137  }
   138  
   139  func TestInvalidInternalKeyDecoding(t *testing.T) {
   140  	// Invalid keys since they don't have an 8 byte trailer.
   141  	testCases := []string{
   142  		"",
   143  		"\x01\x02\x03\x04\x05\x06\x07",
   144  		"foo",
   145  	}
   146  	for _, tc := range testCases {
   147  		i := blockIter{}
   148  		i.decodeInternalKey([]byte(tc))
   149  		require.Nil(t, i.ikey.UserKey)
   150  		require.Equal(t, uint64(InternalKeyKindInvalid), i.ikey.Trailer)
   151  	}
   152  }
   153  
   154  func TestBlockIter(t *testing.T) {
   155  	// k is a block that maps three keys "apple", "apricot", "banana" to empty strings.
   156  	k := block([]byte(
   157  		"\x00\x05\x00apple" +
   158  			"\x02\x05\x00ricot" +
   159  			"\x00\x06\x00banana" +
   160  			"\x00\x00\x00\x00\x01\x00\x00\x00"))
   161  	var testcases = []struct {
   162  		index int
   163  		key   string
   164  	}{
   165  		{0, ""},
   166  		{0, "a"},
   167  		{0, "aaaaaaaaaaaaaaa"},
   168  		{0, "app"},
   169  		{0, "apple"},
   170  		{1, "appliance"},
   171  		{1, "apricos"},
   172  		{1, "apricot"},
   173  		{2, "azzzzzzzzzzzzzz"},
   174  		{2, "b"},
   175  		{2, "banan"},
   176  		{2, "banana"},
   177  		{3, "banana\x00"},
   178  		{3, "c"},
   179  	}
   180  	for _, tc := range testcases {
   181  		i, err := newRawBlockIter(bytes.Compare, k)
   182  		require.NoError(t, err)
   183  		i.SeekGE([]byte(tc.key))
   184  		for j, keyWant := range []string{"apple", "apricot", "banana"}[tc.index:] {
   185  			if !i.Valid() {
   186  				t.Fatalf("key=%q, index=%d, j=%d: Valid got false, keyWant true", tc.key, tc.index, j)
   187  			}
   188  			if keyGot := string(i.Key().UserKey); keyGot != keyWant {
   189  				t.Fatalf("key=%q, index=%d, j=%d: got %q, keyWant %q", tc.key, tc.index, j, keyGot, keyWant)
   190  			}
   191  			i.Next()
   192  		}
   193  		if i.Valid() {
   194  			t.Fatalf("key=%q, index=%d: Valid got true, keyWant false", tc.key, tc.index)
   195  		}
   196  		if err := i.Close(); err != nil {
   197  			t.Fatalf("key=%q, index=%d: got err=%v", tc.key, tc.index, err)
   198  		}
   199  	}
   200  
   201  	{
   202  		i, err := newRawBlockIter(bytes.Compare, k)
   203  		require.NoError(t, err)
   204  		i.Last()
   205  		for j, keyWant := range []string{"banana", "apricot", "apple"} {
   206  			if !i.Valid() {
   207  				t.Fatalf("j=%d: Valid got false, want true", j)
   208  			}
   209  			if keyGot := string(i.Key().UserKey); keyGot != keyWant {
   210  				t.Fatalf("j=%d: got %q, want %q", j, keyGot, keyWant)
   211  			}
   212  			i.Prev()
   213  		}
   214  		if i.Valid() {
   215  			t.Fatalf("Valid got true, want false")
   216  		}
   217  		if err := i.Close(); err != nil {
   218  			t.Fatalf("got err=%v", err)
   219  		}
   220  	}
   221  }
   222  
   223  func TestBlockIter2(t *testing.T) {
   224  	makeIkey := func(s string) InternalKey {
   225  		j := strings.Index(s, ":")
   226  		seqNum, err := strconv.Atoi(s[j+1:])
   227  		if err != nil {
   228  			panic(err)
   229  		}
   230  		return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet)
   231  	}
   232  
   233  	var block []byte
   234  
   235  	for _, r := range []int{1, 2, 3, 4} {
   236  		t.Run(fmt.Sprintf("restart=%d", r), func(t *testing.T) {
   237  			datadriven.RunTest(t, "testdata/block", func(t *testing.T, d *datadriven.TestData) string {
   238  				switch d.Cmd {
   239  				case "build":
   240  					w := &blockWriter{restartInterval: r}
   241  					for _, e := range strings.Split(strings.TrimSpace(d.Input), ",") {
   242  						w.add(makeIkey(e), nil)
   243  					}
   244  					block = w.finish()
   245  					return ""
   246  
   247  				case "iter":
   248  					iter, err := newBlockIter(bytes.Compare, block)
   249  					if err != nil {
   250  						return err.Error()
   251  					}
   252  
   253  					iter.globalSeqNum, err = scanGlobalSeqNum(d)
   254  					if err != nil {
   255  						return err.Error()
   256  					}
   257  					return itertest.RunInternalIterCmd(t, d, iter, itertest.Condensed)
   258  
   259  				default:
   260  					return fmt.Sprintf("unknown command: %s", d.Cmd)
   261  				}
   262  			})
   263  		})
   264  	}
   265  }
   266  
   267  func TestBlockIterKeyStability(t *testing.T) {
   268  	w := &blockWriter{restartInterval: 1}
   269  	expected := [][]byte{
   270  		[]byte("apple"),
   271  		[]byte("apricot"),
   272  		[]byte("banana"),
   273  	}
   274  	for i := range expected {
   275  		w.add(InternalKey{UserKey: expected[i]}, nil)
   276  	}
   277  	block := w.finish()
   278  
   279  	i, err := newBlockIter(bytes.Compare, block)
   280  	require.NoError(t, err)
   281  
   282  	// Check that the supplied slice resides within the bounds of the block.
   283  	check := func(v []byte) {
   284  		t.Helper()
   285  		begin := unsafe.Pointer(&v[0])
   286  		end := unsafe.Pointer(uintptr(begin) + uintptr(len(v)))
   287  		blockBegin := unsafe.Pointer(&block[0])
   288  		blockEnd := unsafe.Pointer(uintptr(blockBegin) + uintptr(len(block)))
   289  		if uintptr(begin) < uintptr(blockBegin) || uintptr(end) > uintptr(blockEnd) {
   290  			t.Fatalf("key %p-%p resides outside of block %p-%p", begin, end, blockBegin, blockEnd)
   291  		}
   292  	}
   293  
   294  	// Check that various means of iterating over the data match our expected
   295  	// values. Note that this is only guaranteed because of the usage of a
   296  	// restart-interval of 1 so that prefix compression was not performed.
   297  	for j := range expected {
   298  		keys := [][]byte{}
   299  		for key, _ := i.SeekGE(expected[j], base.SeekGEFlagsNone); key != nil; key, _ = i.Next() {
   300  			check(key.UserKey)
   301  			keys = append(keys, key.UserKey)
   302  		}
   303  		require.EqualValues(t, expected[j:], keys)
   304  	}
   305  
   306  	for j := range expected {
   307  		keys := [][]byte{}
   308  		for key, _ := i.SeekLT(expected[j], base.SeekLTFlagsNone); key != nil; key, _ = i.Prev() {
   309  			check(key.UserKey)
   310  			keys = append(keys, key.UserKey)
   311  		}
   312  		for i, j := 0, len(keys)-1; i < j; i, j = i+1, j-1 {
   313  			keys[i], keys[j] = keys[j], keys[i]
   314  		}
   315  		require.EqualValues(t, expected[:j], keys)
   316  	}
   317  }
   318  
   319  // Regression test for a bug in blockIter.Next where it was failing to handle
   320  // the case where it is switching from reverse to forward iteration. When that
   321  // switch occurs we need to populate blockIter.fullKey so that prefix
   322  // decompression works properly.
   323  func TestBlockIterReverseDirections(t *testing.T) {
   324  	w := &blockWriter{restartInterval: 4}
   325  	keys := [][]byte{
   326  		[]byte("apple0"),
   327  		[]byte("apple1"),
   328  		[]byte("apple2"),
   329  		[]byte("banana"),
   330  		[]byte("carrot"),
   331  	}
   332  	for i := range keys {
   333  		w.add(InternalKey{UserKey: keys[i]}, nil)
   334  	}
   335  	block := w.finish()
   336  
   337  	for targetPos := 0; targetPos < w.restartInterval; targetPos++ {
   338  		t.Run("", func(t *testing.T) {
   339  			i, err := newBlockIter(bytes.Compare, block)
   340  			require.NoError(t, err)
   341  
   342  			pos := 3
   343  			if key, _ := i.SeekLT([]byte("carrot"), base.SeekLTFlagsNone); !bytes.Equal(keys[pos], key.UserKey) {
   344  				t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey)
   345  			}
   346  			for pos > targetPos {
   347  				pos--
   348  				if key, _ := i.Prev(); !bytes.Equal(keys[pos], key.UserKey) {
   349  					t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey)
   350  				}
   351  			}
   352  			pos++
   353  			if key, _ := i.Next(); !bytes.Equal(keys[pos], key.UserKey) {
   354  				t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey)
   355  			}
   356  		})
   357  	}
   358  }
   359  
   360  func BenchmarkBlockIterSeekGE(b *testing.B) {
   361  	const blockSize = 32 << 10
   362  
   363  	for _, restartInterval := range []int{16} {
   364  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   365  			func(b *testing.B) {
   366  				w := &blockWriter{
   367  					restartInterval: restartInterval,
   368  				}
   369  
   370  				var ikey InternalKey
   371  				var keys [][]byte
   372  				for i := 0; w.estimatedSize() < blockSize; i++ {
   373  					key := []byte(fmt.Sprintf("%05d", i))
   374  					keys = append(keys, key)
   375  					ikey.UserKey = key
   376  					w.add(ikey, nil)
   377  				}
   378  
   379  				it, err := newBlockIter(bytes.Compare, w.finish())
   380  				if err != nil {
   381  					b.Fatal(err)
   382  				}
   383  				rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   384  
   385  				b.ResetTimer()
   386  				for i := 0; i < b.N; i++ {
   387  					k := keys[rng.Intn(len(keys))]
   388  					it.SeekGE(k, base.SeekGEFlagsNone)
   389  					if testing.Verbose() {
   390  						if !it.valid() {
   391  							b.Fatal("expected to find key")
   392  						}
   393  						if !bytes.Equal(k, it.Key().UserKey) {
   394  							b.Fatalf("expected %s, but found %s", k, it.Key().UserKey)
   395  						}
   396  					}
   397  				}
   398  			})
   399  	}
   400  }
   401  
   402  func BenchmarkBlockIterSeekLT(b *testing.B) {
   403  	const blockSize = 32 << 10
   404  
   405  	for _, restartInterval := range []int{16} {
   406  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   407  			func(b *testing.B) {
   408  				w := &blockWriter{
   409  					restartInterval: restartInterval,
   410  				}
   411  
   412  				var ikey InternalKey
   413  				var keys [][]byte
   414  				for i := 0; w.estimatedSize() < blockSize; i++ {
   415  					key := []byte(fmt.Sprintf("%05d", i))
   416  					keys = append(keys, key)
   417  					ikey.UserKey = key
   418  					w.add(ikey, nil)
   419  				}
   420  
   421  				it, err := newBlockIter(bytes.Compare, w.finish())
   422  				if err != nil {
   423  					b.Fatal(err)
   424  				}
   425  				rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   426  
   427  				b.ResetTimer()
   428  				for i := 0; i < b.N; i++ {
   429  					j := rng.Intn(len(keys))
   430  					it.SeekLT(keys[j], base.SeekLTFlagsNone)
   431  					if testing.Verbose() {
   432  						if j == 0 {
   433  							if it.valid() {
   434  								b.Fatal("unexpected key")
   435  							}
   436  						} else {
   437  							if !it.valid() {
   438  								b.Fatal("expected to find key")
   439  							}
   440  							k := keys[j-1]
   441  							if !bytes.Equal(k, it.Key().UserKey) {
   442  								b.Fatalf("expected %s, but found %s", k, it.Key().UserKey)
   443  							}
   444  						}
   445  					}
   446  				}
   447  			})
   448  	}
   449  }
   450  
   451  func BenchmarkBlockIterNext(b *testing.B) {
   452  	const blockSize = 32 << 10
   453  
   454  	for _, restartInterval := range []int{16} {
   455  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   456  			func(b *testing.B) {
   457  				w := &blockWriter{
   458  					restartInterval: restartInterval,
   459  				}
   460  
   461  				var ikey InternalKey
   462  				for i := 0; w.estimatedSize() < blockSize; i++ {
   463  					ikey.UserKey = []byte(fmt.Sprintf("%05d", i))
   464  					w.add(ikey, nil)
   465  				}
   466  
   467  				it, err := newBlockIter(bytes.Compare, w.finish())
   468  				if err != nil {
   469  					b.Fatal(err)
   470  				}
   471  
   472  				b.ResetTimer()
   473  				for i := 0; i < b.N; i++ {
   474  					if !it.valid() {
   475  						it.First()
   476  					}
   477  					it.Next()
   478  				}
   479  			})
   480  	}
   481  }
   482  
   483  func BenchmarkBlockIterPrev(b *testing.B) {
   484  	const blockSize = 32 << 10
   485  
   486  	for _, restartInterval := range []int{16} {
   487  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   488  			func(b *testing.B) {
   489  				w := &blockWriter{
   490  					restartInterval: restartInterval,
   491  				}
   492  
   493  				var ikey InternalKey
   494  				for i := 0; w.estimatedSize() < blockSize; i++ {
   495  					ikey.UserKey = []byte(fmt.Sprintf("%05d", i))
   496  					w.add(ikey, nil)
   497  				}
   498  
   499  				it, err := newBlockIter(bytes.Compare, w.finish())
   500  				if err != nil {
   501  					b.Fatal(err)
   502  				}
   503  
   504  				b.ResetTimer()
   505  				for i := 0; i < b.N; i++ {
   506  					if !it.valid() {
   507  						it.Last()
   508  					}
   509  					it.Prev()
   510  				}
   511  			})
   512  	}
   513  }