github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/block_test.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package sstable
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"testing"
    13  	"time"
    14  	"unsafe"
    15  
    16  	"github.com/cockroachdb/datadriven"
    17  	"github.com/cockroachdb/pebble/internal/base"
    18  	"github.com/stretchr/testify/require"
    19  	"golang.org/x/exp/rand"
    20  )
    21  
    22  func ikey(s string) InternalKey {
    23  	return InternalKey{UserKey: []byte(s)}
    24  }
    25  
    26  func TestBlockWriter(t *testing.T) {
    27  	w := &rawBlockWriter{
    28  		blockWriter: blockWriter{restartInterval: 16},
    29  	}
    30  	w.add(ikey("apple"), nil)
    31  	w.add(ikey("apricot"), nil)
    32  	w.add(ikey("banana"), nil)
    33  	block := w.finish()
    34  
    35  	expected := []byte(
    36  		"\x00\x05\x00apple" +
    37  			"\x02\x05\x00ricot" +
    38  			"\x00\x06\x00banana" +
    39  			"\x00\x00\x00\x00\x01\x00\x00\x00")
    40  	if !bytes.Equal(expected, block) {
    41  		t.Fatalf("expected\n%q\nfound\n%q", expected, block)
    42  	}
    43  }
    44  
    45  func TestBlockWriterWithPrefix(t *testing.T) {
    46  	w := &rawBlockWriter{
    47  		blockWriter: blockWriter{restartInterval: 2},
    48  	}
    49  	curKey := func() string {
    50  		return string(base.DecodeInternalKey(w.curKey).UserKey)
    51  	}
    52  	addAdapter := func(
    53  		key InternalKey,
    54  		value []byte,
    55  		addValuePrefix bool,
    56  		valuePrefix valuePrefix,
    57  		setHasSameKeyPrefix bool) {
    58  		w.addWithOptionalValuePrefix(
    59  			key, false, value, len(key.UserKey), addValuePrefix, valuePrefix, setHasSameKeyPrefix)
    60  	}
    61  	addAdapter(
    62  		ikey("apple"), []byte("red"), false, 0, true)
    63  	require.Equal(t, "apple", curKey())
    64  	require.Equal(t, "red", string(w.curValue))
    65  	addAdapter(
    66  		ikey("apricot"), []byte("orange"), true, '\xff', false)
    67  	require.Equal(t, "apricot", curKey())
    68  	require.Equal(t, "orange", string(w.curValue))
    69  	// Even though this call has setHasSameKeyPrefix=true, the previous call,
    70  	// which was after the last restart set it to false. So the restart encoded
    71  	// with banana has this cumulative bit set to false.
    72  	addAdapter(
    73  		ikey("banana"), []byte("yellow"), true, '\x00', true)
    74  	require.Equal(t, "banana", curKey())
    75  	require.Equal(t, "yellow", string(w.curValue))
    76  	addAdapter(
    77  		ikey("cherry"), []byte("red"), false, 0, true)
    78  	require.Equal(t, "cherry", curKey())
    79  	require.Equal(t, "red", string(w.curValue))
    80  	// All intervening calls has setHasSameKeyPrefix=true, so the cumulative bit
    81  	// will be set to true in this restart.
    82  	addAdapter(
    83  		ikey("mango"), []byte("juicy"), false, 0, true)
    84  	require.Equal(t, "mango", curKey())
    85  	require.Equal(t, "juicy", string(w.curValue))
    86  
    87  	block := w.finish()
    88  
    89  	expected := []byte(
    90  		"\x00\x0d\x03apple\x00\x00\x00\x00\x00\x00\x00\x00red" +
    91  			"\x02\x0d\x07ricot\x00\x00\x00\x00\x00\x00\x00\x00\xfforange" +
    92  			"\x00\x0e\x07banana\x00\x00\x00\x00\x00\x00\x00\x00\x00yellow" +
    93  			"\x00\x0e\x03cherry\x00\x00\x00\x00\x00\x00\x00\x00red" +
    94  			"\x00\x0d\x05mango\x00\x00\x00\x00\x00\x00\x00\x00juicy" +
    95  			// Restarts are:
    96  			// 00000000 (restart at apple), 2a000000 (restart at banana), 56000080 (restart at mango)
    97  			// 03000000 (number of restart, i.e., 3). The restart at mango has 1 in the most significant
    98  			// bit of the uint32, so the last byte in the little endian encoding is \x80.
    99  			"\x00\x00\x00\x00\x2a\x00\x00\x00\x56\x00\x00\x80\x03\x00\x00\x00")
   100  	if !bytes.Equal(expected, block) {
   101  		t.Fatalf("expected\n%x\nfound\n%x", expected, block)
   102  	}
   103  }
   104  
   105  func testBlockCleared(t *testing.T, w, b *blockWriter) {
   106  	require.Equal(t, w.restartInterval, b.restartInterval)
   107  	require.Equal(t, w.nEntries, b.nEntries)
   108  	require.Equal(t, w.nextRestart, b.nextRestart)
   109  	require.Equal(t, len(w.buf), len(b.buf))
   110  	require.Equal(t, len(w.restarts), len(b.restarts))
   111  	require.Equal(t, len(w.curKey), len(b.curKey))
   112  	require.Equal(t, len(w.prevKey), len(b.prevKey))
   113  	require.Equal(t, len(w.curValue), len(b.curValue))
   114  	require.Equal(t, w.tmp, b.tmp)
   115  
   116  	// Make sure that we didn't lose the allocated byte slices.
   117  	require.True(t, cap(w.buf) > 0 && cap(b.buf) == 0)
   118  	require.True(t, cap(w.restarts) > 0 && cap(b.restarts) == 0)
   119  	require.True(t, cap(w.curKey) > 0 && cap(b.curKey) == 0)
   120  	require.True(t, cap(w.prevKey) > 0 && cap(b.prevKey) == 0)
   121  	require.True(t, cap(w.curValue) > 0 && cap(b.curValue) == 0)
   122  }
   123  
   124  func TestBlockClear(t *testing.T) {
   125  	w := blockWriter{restartInterval: 16}
   126  	w.add(ikey("apple"), nil)
   127  	w.add(ikey("apricot"), nil)
   128  	w.add(ikey("banana"), nil)
   129  
   130  	w.clear()
   131  
   132  	// Once a block is cleared, we expect its fields to be cleared, but we expect
   133  	// it to keep its allocated byte slices.
   134  	b := blockWriter{}
   135  	testBlockCleared(t, &w, &b)
   136  }
   137  
   138  func TestInvalidInternalKeyDecoding(t *testing.T) {
   139  	// Invalid keys since they don't have an 8 byte trailer.
   140  	testCases := []string{
   141  		"",
   142  		"\x01\x02\x03\x04\x05\x06\x07",
   143  		"foo",
   144  	}
   145  	for _, tc := range testCases {
   146  		i := blockIter{}
   147  		i.decodeInternalKey([]byte(tc))
   148  		require.Nil(t, i.ikey.UserKey)
   149  		require.Equal(t, uint64(InternalKeyKindInvalid), i.ikey.Trailer)
   150  	}
   151  }
   152  
   153  func TestBlockIter(t *testing.T) {
   154  	// k is a block that maps three keys "apple", "apricot", "banana" to empty strings.
   155  	k := block([]byte(
   156  		"\x00\x05\x00apple" +
   157  			"\x02\x05\x00ricot" +
   158  			"\x00\x06\x00banana" +
   159  			"\x00\x00\x00\x00\x01\x00\x00\x00"))
   160  	var testcases = []struct {
   161  		index int
   162  		key   string
   163  	}{
   164  		{0, ""},
   165  		{0, "a"},
   166  		{0, "aaaaaaaaaaaaaaa"},
   167  		{0, "app"},
   168  		{0, "apple"},
   169  		{1, "appliance"},
   170  		{1, "apricos"},
   171  		{1, "apricot"},
   172  		{2, "azzzzzzzzzzzzzz"},
   173  		{2, "b"},
   174  		{2, "banan"},
   175  		{2, "banana"},
   176  		{3, "banana\x00"},
   177  		{3, "c"},
   178  	}
   179  	for _, tc := range testcases {
   180  		i, err := newRawBlockIter(bytes.Compare, k)
   181  		require.NoError(t, err)
   182  		i.SeekGE([]byte(tc.key))
   183  		for j, keyWant := range []string{"apple", "apricot", "banana"}[tc.index:] {
   184  			if !i.Valid() {
   185  				t.Fatalf("key=%q, index=%d, j=%d: Valid got false, keyWant true", tc.key, tc.index, j)
   186  			}
   187  			if keyGot := string(i.Key().UserKey); keyGot != keyWant {
   188  				t.Fatalf("key=%q, index=%d, j=%d: got %q, keyWant %q", tc.key, tc.index, j, keyGot, keyWant)
   189  			}
   190  			i.Next()
   191  		}
   192  		if i.Valid() {
   193  			t.Fatalf("key=%q, index=%d: Valid got true, keyWant false", tc.key, tc.index)
   194  		}
   195  		if err := i.Close(); err != nil {
   196  			t.Fatalf("key=%q, index=%d: got err=%v", tc.key, tc.index, err)
   197  		}
   198  	}
   199  
   200  	{
   201  		i, err := newRawBlockIter(bytes.Compare, k)
   202  		require.NoError(t, err)
   203  		i.Last()
   204  		for j, keyWant := range []string{"banana", "apricot", "apple"} {
   205  			if !i.Valid() {
   206  				t.Fatalf("j=%d: Valid got false, want true", j)
   207  			}
   208  			if keyGot := string(i.Key().UserKey); keyGot != keyWant {
   209  				t.Fatalf("j=%d: got %q, want %q", j, keyGot, keyWant)
   210  			}
   211  			i.Prev()
   212  		}
   213  		if i.Valid() {
   214  			t.Fatalf("Valid got true, want false")
   215  		}
   216  		if err := i.Close(); err != nil {
   217  			t.Fatalf("got err=%v", err)
   218  		}
   219  	}
   220  }
   221  
   222  func TestBlockIter2(t *testing.T) {
   223  	makeIkey := func(s string) InternalKey {
   224  		j := strings.Index(s, ":")
   225  		seqNum, err := strconv.Atoi(s[j+1:])
   226  		if err != nil {
   227  			panic(err)
   228  		}
   229  		return base.MakeInternalKey([]byte(s[:j]), uint64(seqNum), InternalKeyKindSet)
   230  	}
   231  
   232  	var block []byte
   233  
   234  	for _, r := range []int{1, 2, 3, 4} {
   235  		t.Run(fmt.Sprintf("restart=%d", r), func(t *testing.T) {
   236  			datadriven.RunTest(t, "testdata/block", func(t *testing.T, d *datadriven.TestData) string {
   237  				switch d.Cmd {
   238  				case "build":
   239  					w := &blockWriter{restartInterval: r}
   240  					for _, e := range strings.Split(strings.TrimSpace(d.Input), ",") {
   241  						w.add(makeIkey(e), nil)
   242  					}
   243  					block = w.finish()
   244  					return ""
   245  
   246  				case "iter":
   247  					iter, err := newBlockIter(bytes.Compare, block)
   248  					if err != nil {
   249  						return err.Error()
   250  					}
   251  
   252  					iter.globalSeqNum, err = scanGlobalSeqNum(d)
   253  					if err != nil {
   254  						return err.Error()
   255  					}
   256  
   257  					var b bytes.Buffer
   258  					for _, line := range strings.Split(d.Input, "\n") {
   259  						parts := strings.Fields(line)
   260  						if len(parts) == 0 {
   261  							continue
   262  						}
   263  						switch parts[0] {
   264  						case "seek-ge":
   265  							if len(parts) != 2 {
   266  								return "seek-ge <key>\n"
   267  							}
   268  							iter.SeekGE([]byte(strings.TrimSpace(parts[1])), base.SeekGEFlagsNone)
   269  						case "seek-lt":
   270  							if len(parts) != 2 {
   271  								return "seek-lt <key>\n"
   272  							}
   273  							iter.SeekLT([]byte(strings.TrimSpace(parts[1])), base.SeekLTFlagsNone)
   274  						case "first":
   275  							iter.First()
   276  						case "last":
   277  							iter.Last()
   278  						case "next":
   279  							iter.Next()
   280  						case "prev":
   281  							iter.Prev()
   282  						}
   283  						if iter.valid() {
   284  							fmt.Fprintf(&b, "<%s:%d>", iter.Key().UserKey, iter.Key().SeqNum())
   285  						} else if err := iter.Error(); err != nil {
   286  							fmt.Fprintf(&b, "<err=%v>", err)
   287  						} else {
   288  							fmt.Fprintf(&b, ".")
   289  						}
   290  					}
   291  					b.WriteString("\n")
   292  					return b.String()
   293  
   294  				default:
   295  					return fmt.Sprintf("unknown command: %s", d.Cmd)
   296  				}
   297  			})
   298  		})
   299  	}
   300  }
   301  
   302  func TestBlockIterKeyStability(t *testing.T) {
   303  	w := &blockWriter{restartInterval: 1}
   304  	expected := [][]byte{
   305  		[]byte("apple"),
   306  		[]byte("apricot"),
   307  		[]byte("banana"),
   308  	}
   309  	for i := range expected {
   310  		w.add(InternalKey{UserKey: expected[i]}, nil)
   311  	}
   312  	block := w.finish()
   313  
   314  	i, err := newBlockIter(bytes.Compare, block)
   315  	require.NoError(t, err)
   316  
   317  	// Check that the supplied slice resides within the bounds of the block.
   318  	check := func(v []byte) {
   319  		t.Helper()
   320  		begin := unsafe.Pointer(&v[0])
   321  		end := unsafe.Pointer(uintptr(begin) + uintptr(len(v)))
   322  		blockBegin := unsafe.Pointer(&block[0])
   323  		blockEnd := unsafe.Pointer(uintptr(blockBegin) + uintptr(len(block)))
   324  		if uintptr(begin) < uintptr(blockBegin) || uintptr(end) > uintptr(blockEnd) {
   325  			t.Fatalf("key %p-%p resides outside of block %p-%p", begin, end, blockBegin, blockEnd)
   326  		}
   327  	}
   328  
   329  	// Check that various means of iterating over the data match our expected
   330  	// values. Note that this is only guaranteed because of the usage of a
   331  	// restart-interval of 1 so that prefix compression was not performed.
   332  	for j := range expected {
   333  		keys := [][]byte{}
   334  		for key, _ := i.SeekGE(expected[j], base.SeekGEFlagsNone); key != nil; key, _ = i.Next() {
   335  			check(key.UserKey)
   336  			keys = append(keys, key.UserKey)
   337  		}
   338  		require.EqualValues(t, expected[j:], keys)
   339  	}
   340  
   341  	for j := range expected {
   342  		keys := [][]byte{}
   343  		for key, _ := i.SeekLT(expected[j], base.SeekLTFlagsNone); key != nil; key, _ = i.Prev() {
   344  			check(key.UserKey)
   345  			keys = append(keys, key.UserKey)
   346  		}
   347  		for i, j := 0, len(keys)-1; i < j; i, j = i+1, j-1 {
   348  			keys[i], keys[j] = keys[j], keys[i]
   349  		}
   350  		require.EqualValues(t, expected[:j], keys)
   351  	}
   352  }
   353  
   354  // Regression test for a bug in blockIter.Next where it was failing to handle
   355  // the case where it is switching from reverse to forward iteration. When that
   356  // switch occurs we need to populate blockIter.fullKey so that prefix
   357  // decompression works properly.
   358  func TestBlockIterReverseDirections(t *testing.T) {
   359  	w := &blockWriter{restartInterval: 4}
   360  	keys := [][]byte{
   361  		[]byte("apple0"),
   362  		[]byte("apple1"),
   363  		[]byte("apple2"),
   364  		[]byte("banana"),
   365  		[]byte("carrot"),
   366  	}
   367  	for i := range keys {
   368  		w.add(InternalKey{UserKey: keys[i]}, nil)
   369  	}
   370  	block := w.finish()
   371  
   372  	for targetPos := 0; targetPos < w.restartInterval; targetPos++ {
   373  		t.Run("", func(t *testing.T) {
   374  			i, err := newBlockIter(bytes.Compare, block)
   375  			require.NoError(t, err)
   376  
   377  			pos := 3
   378  			if key, _ := i.SeekLT([]byte("carrot"), base.SeekLTFlagsNone); !bytes.Equal(keys[pos], key.UserKey) {
   379  				t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey)
   380  			}
   381  			for pos > targetPos {
   382  				pos--
   383  				if key, _ := i.Prev(); !bytes.Equal(keys[pos], key.UserKey) {
   384  					t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey)
   385  				}
   386  			}
   387  			pos++
   388  			if key, _ := i.Next(); !bytes.Equal(keys[pos], key.UserKey) {
   389  				t.Fatalf("expected %s, but found %s", keys[pos], key.UserKey)
   390  			}
   391  		})
   392  	}
   393  }
   394  
   395  func BenchmarkBlockIterSeekGE(b *testing.B) {
   396  	const blockSize = 32 << 10
   397  
   398  	for _, restartInterval := range []int{16} {
   399  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   400  			func(b *testing.B) {
   401  				w := &blockWriter{
   402  					restartInterval: restartInterval,
   403  				}
   404  
   405  				var ikey InternalKey
   406  				var keys [][]byte
   407  				for i := 0; w.estimatedSize() < blockSize; i++ {
   408  					key := []byte(fmt.Sprintf("%05d", i))
   409  					keys = append(keys, key)
   410  					ikey.UserKey = key
   411  					w.add(ikey, nil)
   412  				}
   413  
   414  				it, err := newBlockIter(bytes.Compare, w.finish())
   415  				if err != nil {
   416  					b.Fatal(err)
   417  				}
   418  				rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   419  
   420  				b.ResetTimer()
   421  				for i := 0; i < b.N; i++ {
   422  					k := keys[rng.Intn(len(keys))]
   423  					it.SeekGE(k, base.SeekGEFlagsNone)
   424  					if testing.Verbose() {
   425  						if !it.valid() {
   426  							b.Fatal("expected to find key")
   427  						}
   428  						if !bytes.Equal(k, it.Key().UserKey) {
   429  							b.Fatalf("expected %s, but found %s", k, it.Key().UserKey)
   430  						}
   431  					}
   432  				}
   433  			})
   434  	}
   435  }
   436  
   437  func BenchmarkBlockIterSeekLT(b *testing.B) {
   438  	const blockSize = 32 << 10
   439  
   440  	for _, restartInterval := range []int{16} {
   441  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   442  			func(b *testing.B) {
   443  				w := &blockWriter{
   444  					restartInterval: restartInterval,
   445  				}
   446  
   447  				var ikey InternalKey
   448  				var keys [][]byte
   449  				for i := 0; w.estimatedSize() < blockSize; i++ {
   450  					key := []byte(fmt.Sprintf("%05d", i))
   451  					keys = append(keys, key)
   452  					ikey.UserKey = key
   453  					w.add(ikey, nil)
   454  				}
   455  
   456  				it, err := newBlockIter(bytes.Compare, w.finish())
   457  				if err != nil {
   458  					b.Fatal(err)
   459  				}
   460  				rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   461  
   462  				b.ResetTimer()
   463  				for i := 0; i < b.N; i++ {
   464  					j := rng.Intn(len(keys))
   465  					it.SeekLT(keys[j], base.SeekLTFlagsNone)
   466  					if testing.Verbose() {
   467  						if j == 0 {
   468  							if it.valid() {
   469  								b.Fatal("unexpected key")
   470  							}
   471  						} else {
   472  							if !it.valid() {
   473  								b.Fatal("expected to find key")
   474  							}
   475  							k := keys[j-1]
   476  							if !bytes.Equal(k, it.Key().UserKey) {
   477  								b.Fatalf("expected %s, but found %s", k, it.Key().UserKey)
   478  							}
   479  						}
   480  					}
   481  				}
   482  			})
   483  	}
   484  }
   485  
   486  func BenchmarkBlockIterNext(b *testing.B) {
   487  	const blockSize = 32 << 10
   488  
   489  	for _, restartInterval := range []int{16} {
   490  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   491  			func(b *testing.B) {
   492  				w := &blockWriter{
   493  					restartInterval: restartInterval,
   494  				}
   495  
   496  				var ikey InternalKey
   497  				for i := 0; w.estimatedSize() < blockSize; i++ {
   498  					ikey.UserKey = []byte(fmt.Sprintf("%05d", i))
   499  					w.add(ikey, nil)
   500  				}
   501  
   502  				it, err := newBlockIter(bytes.Compare, w.finish())
   503  				if err != nil {
   504  					b.Fatal(err)
   505  				}
   506  
   507  				b.ResetTimer()
   508  				for i := 0; i < b.N; i++ {
   509  					if !it.valid() {
   510  						it.First()
   511  					}
   512  					it.Next()
   513  				}
   514  			})
   515  	}
   516  }
   517  
   518  func BenchmarkBlockIterPrev(b *testing.B) {
   519  	const blockSize = 32 << 10
   520  
   521  	for _, restartInterval := range []int{16} {
   522  		b.Run(fmt.Sprintf("restart=%d", restartInterval),
   523  			func(b *testing.B) {
   524  				w := &blockWriter{
   525  					restartInterval: restartInterval,
   526  				}
   527  
   528  				var ikey InternalKey
   529  				for i := 0; w.estimatedSize() < blockSize; i++ {
   530  					ikey.UserKey = []byte(fmt.Sprintf("%05d", i))
   531  					w.add(ikey, nil)
   532  				}
   533  
   534  				it, err := newBlockIter(bytes.Compare, w.finish())
   535  				if err != nil {
   536  					b.Fatal(err)
   537  				}
   538  
   539  				b.ResetTimer()
   540  				for i := 0; i < b.N; i++ {
   541  					if !it.valid() {
   542  						it.Last()
   543  					}
   544  					it.Prev()
   545  				}
   546  			})
   547  	}
   548  }