github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_sideload_test.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  package kvserver
    11  
    12  import (
    13  	"bytes"
    14  	"context"
    15  	"fmt"
    16  	"io"
    17  	"math"
    18  	"math/rand"
    19  	"os"
    20  	"path/filepath"
    21  	"reflect"
    22  	"regexp"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  	"testing"
    27  
    28  	"github.com/cockroachdb/cockroach/pkg/base"
    29  	"github.com/cockroachdb/cockroach/pkg/kv"
    30  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    31  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    32  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry"
    33  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
    34  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    35  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    36  	"github.com/cockroachdb/cockroach/pkg/storage"
    37  	"github.com/cockroachdb/cockroach/pkg/testutils"
    38  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    39  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    40  	"github.com/cockroachdb/cockroach/pkg/util/log"
    41  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    42  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    43  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    44  	"github.com/cockroachdb/errors"
    45  	"github.com/kr/pretty"
    46  	"go.etcd.io/etcd/raft/raftpb"
    47  	"golang.org/x/time/rate"
    48  )
    49  
    50  func entryEq(l, r raftpb.Entry) error {
    51  	if reflect.DeepEqual(l, r) {
    52  		return nil
    53  	}
    54  	_, lData := DecodeRaftCommand(l.Data)
    55  	_, rData := DecodeRaftCommand(r.Data)
    56  	var lc, rc kvserverpb.RaftCommand
    57  	if err := protoutil.Unmarshal(lData, &lc); err != nil {
    58  		return errors.Wrap(err, "unmarshalling LHS")
    59  	}
    60  	if err := protoutil.Unmarshal(rData, &rc); err != nil {
    61  		return errors.Wrap(err, "unmarshalling RHS")
    62  	}
    63  	if !reflect.DeepEqual(lc, rc) {
    64  		return errors.Newf("unexpected:\n%s", strings.Join(pretty.Diff(lc, rc), "\n"))
    65  	}
    66  	return nil
    67  }
    68  
    69  func mkEnt(
    70  	v raftCommandEncodingVersion, index, term uint64, as *kvserverpb.ReplicatedEvalResult_AddSSTable,
    71  ) raftpb.Entry {
    72  	cmdIDKey := strings.Repeat("x", raftCommandIDLen)
    73  	var cmd kvserverpb.RaftCommand
    74  	cmd.ReplicatedEvalResult.AddSSTable = as
    75  	b, err := protoutil.Marshal(&cmd)
    76  	if err != nil {
    77  		panic(err)
    78  	}
    79  	var ent raftpb.Entry
    80  	ent.Index, ent.Term = index, term
    81  	ent.Data = encodeRaftCommand(v, kvserverbase.CmdIDKey(cmdIDKey), b)
    82  	return ent
    83  }
    84  
    85  func TestSideloadingSideloadedStorage(t *testing.T) {
    86  	defer leaktest.AfterTest(t)()
    87  	t.Run("Mem", func(t *testing.T) {
    88  		testSideloadingSideloadedStorage(t, newInMemSideloadStorage)
    89  	})
    90  	t.Run("Disk", func(t *testing.T) {
    91  		maker := func(
    92  			s *cluster.Settings, rangeID roachpb.RangeID, rep roachpb.ReplicaID, name string, eng storage.Engine,
    93  		) (SideloadStorage, error) {
    94  			return newDiskSideloadStorage(s, rangeID, rep, name, rate.NewLimiter(rate.Inf, math.MaxInt64), eng)
    95  		}
    96  		testSideloadingSideloadedStorage(t, maker)
    97  	})
    98  }
    99  
   100  func testSideloadingSideloadedStorage(
   101  	t *testing.T,
   102  	maker func(*cluster.Settings, roachpb.RangeID, roachpb.ReplicaID, string, storage.Engine) (SideloadStorage, error),
   103  ) {
   104  	dir, cleanup := testutils.TempDir(t)
   105  	defer cleanup()
   106  
   107  	ctx := context.Background()
   108  	st := cluster.MakeTestingClusterSettings()
   109  
   110  	cleanup, eng := newEngine(t)
   111  	defer cleanup()
   112  	defer eng.Close()
   113  
   114  	ss, err := maker(st, 1, 2, dir, eng)
   115  	if err != nil {
   116  		t.Fatal(err)
   117  	}
   118  	_, isInMem := ss.(*inMemSideloadStorage) // some things don't make sense for inMem
   119  
   120  	assertCreated := func(isCreated bool) {
   121  		if isInMem {
   122  			return
   123  		}
   124  		if is := ss.(*diskSideloadStorage).dirCreated; is != isCreated {
   125  			t.Fatalf("assertion failed: expected dirCreated=%t, got %t", isCreated, is)
   126  		}
   127  	}
   128  
   129  	assertCreated(false)
   130  
   131  	const (
   132  		lowTerm = 1
   133  		highTerm
   134  	)
   135  
   136  	file := func(i uint64) []byte { // take uint64 for convenience
   137  		return []byte("content-" + strconv.Itoa(int(i)))
   138  	}
   139  
   140  	if err := ss.Put(ctx, 1, highTerm, file(1)); err != nil {
   141  		t.Fatal(err)
   142  	}
   143  
   144  	assertCreated(true)
   145  
   146  	if c, err := ss.Get(ctx, 1, highTerm); err != nil {
   147  		t.Fatal(err)
   148  	} else if exp := file(1); !bytes.Equal(c, exp) {
   149  		t.Fatalf("got %q, wanted %q", c, exp)
   150  	}
   151  
   152  	// Overwrites the occupied slot.
   153  	if err := ss.Put(ctx, 1, highTerm, file(12345)); err != nil {
   154  		t.Fatal(err)
   155  	}
   156  
   157  	// ... consequently the old entry is gone.
   158  	if c, err := ss.Get(ctx, 1, highTerm); err != nil {
   159  		t.Fatal(err)
   160  	} else if exp := file(12345); !bytes.Equal(c, exp) {
   161  		t.Fatalf("got %q, wanted %q", c, exp)
   162  	}
   163  
   164  	if err := ss.Clear(ctx); err != nil {
   165  		t.Fatal(err)
   166  	}
   167  
   168  	assertCreated(false)
   169  
   170  	for n, test := range []struct {
   171  		fun func() error
   172  		err error
   173  	}{
   174  		{
   175  			err: errSideloadedFileNotFound,
   176  			fun: func() error {
   177  				_, err = ss.Get(ctx, 123, 456)
   178  				return err
   179  			},
   180  		},
   181  		{
   182  			err: errSideloadedFileNotFound,
   183  			fun: func() error {
   184  				_, err := ss.Purge(ctx, 123, 456)
   185  				return err
   186  			},
   187  		},
   188  		{
   189  			err: nil,
   190  			fun: func() error {
   191  				_, _, err := ss.TruncateTo(ctx, 123)
   192  				return err
   193  			},
   194  		},
   195  		{
   196  			err: nil,
   197  			fun: func() error {
   198  				_, err = ss.Filename(ctx, 123, 456)
   199  				return err
   200  			},
   201  		},
   202  	} {
   203  		if err := test.fun(); !errors.Is(err, test.err) {
   204  			t.Fatalf("%d: expected %v, got %v", n, test.err, err)
   205  		}
   206  		if err := ss.Clear(ctx); err != nil {
   207  			t.Fatalf("%d: %+v", n, err)
   208  		}
   209  		assertCreated(false)
   210  	}
   211  
   212  	// Write some payloads at various indexes. Note that this tests Put
   213  	// on a recently Clear()ed storage. Randomize order for fun.
   214  	payloads := []uint64{3, 5, 7, 9, 10}
   215  	for n := range rand.Perm(len(payloads)) {
   216  		i := payloads[n]
   217  		if err := ss.Put(ctx, i, highTerm, file(i*highTerm)); err != nil {
   218  			t.Fatalf("%d: %+v", i, err)
   219  		}
   220  	}
   221  
   222  	assertCreated(true)
   223  
   224  	// Write some more payloads, overlapping, at the past term.
   225  	pastPayloads := append([]uint64{81}, payloads...)
   226  	for _, i := range pastPayloads {
   227  		if err := ss.Put(ctx, i, lowTerm, file(i*lowTerm)); err != nil {
   228  			t.Fatal(err)
   229  		}
   230  	}
   231  
   232  	// Just for fun, recreate the original storage (unless it's the in-memory
   233  	// one), which shouldn't change anything about its state.
   234  	if !isInMem {
   235  		var err error
   236  		ss, err = maker(st, 1, 2, dir, eng)
   237  		if err != nil {
   238  			t.Fatal(err)
   239  		}
   240  		assertCreated(false)
   241  	}
   242  
   243  	// Just a sanity check that for the overlapping terms, we see both entries.
   244  	for _, term := range []uint64{lowTerm, highTerm} {
   245  		index := payloads[0] // exists at both lowTerm and highTerm
   246  		if c, err := ss.Get(ctx, index, term); err != nil {
   247  			t.Fatal(err)
   248  		} else if exp := file(term * index); !bytes.Equal(c, exp) {
   249  			t.Fatalf("got %q, wanted %q", c, exp)
   250  		}
   251  	}
   252  	assertCreated(false) // Get() doesn't recreated nor check
   253  
   254  	for n := range payloads {
   255  		// Truncate indexes <= payloads[n] (payloads is sorted in increasing order).
   256  		if _, _, err := ss.TruncateTo(ctx, payloads[n]); err != nil {
   257  			t.Fatalf("%d: %+v", n, err)
   258  		}
   259  		// Index payloads[n] and above are still there (truncation is exclusive)
   260  		// at both terms.
   261  		for _, term := range []uint64{lowTerm, highTerm} {
   262  			for _, i := range payloads[n:] {
   263  				if _, err := ss.Get(ctx, i, term); err != nil {
   264  					t.Fatalf("%d.%d: %+v", n, i, err)
   265  				}
   266  			}
   267  			// Indexes below are gone.
   268  			for _, i := range payloads[:n] {
   269  				if _, err := ss.Get(ctx, i, term); !errors.Is(err, errSideloadedFileNotFound) {
   270  					t.Fatalf("%d.%d: %+v", n, i, err)
   271  				}
   272  			}
   273  		}
   274  	}
   275  
   276  	func() {
   277  		if isInMem {
   278  			return
   279  		}
   280  		// First add a file that shouldn't be in the sideloaded storage to ensure
   281  		// sane behavior when directory can't be removed after full truncate.
   282  		nonRemovableFile := filepath.Join(ss.(*diskSideloadStorage).dir, "cantremove.xx")
   283  		f, err := os.Create(nonRemovableFile)
   284  		if err != nil {
   285  			t.Fatalf("could not create non i*.t* file in sideloaded storage: %+v", err)
   286  		}
   287  		defer f.Close()
   288  
   289  		_, _, err = ss.TruncateTo(ctx, math.MaxUint64)
   290  		if err == nil {
   291  			t.Fatalf("sideloaded directory should not have been removable due to extra file %s", nonRemovableFile)
   292  		}
   293  		expectedTruncateError := "while purging %q: remove %s: directory not empty"
   294  		if err.Error() != fmt.Sprintf(expectedTruncateError, ss.(*diskSideloadStorage).dir, ss.(*diskSideloadStorage).dir) {
   295  			t.Fatalf("error truncating sideloaded storage: %+v", err)
   296  		}
   297  		// Now remove extra file and let truncation proceed to remove directory.
   298  		err = os.Remove(nonRemovableFile)
   299  		if err != nil {
   300  			t.Fatalf("could not remove %s: %+v", nonRemovableFile, err)
   301  		}
   302  
   303  		// Test that directory is removed when filepath.Glob returns 0 matches.
   304  		if _, _, err := ss.TruncateTo(ctx, math.MaxUint64); err != nil {
   305  			t.Fatal(err)
   306  		}
   307  		// Ensure directory is removed, now that all files should be gone.
   308  		_, err = os.Stat(ss.(*diskSideloadStorage).dir)
   309  		if err == nil {
   310  			t.Fatalf("expected %q to be removed after truncating full range", ss.(*diskSideloadStorage).dir)
   311  		}
   312  		if err != nil {
   313  			if !os.IsNotExist(err) {
   314  				t.Fatalf("expected %q to be removed: %+v", ss.(*diskSideloadStorage).dir, err)
   315  			}
   316  		}
   317  
   318  		// Repopulate with some random indexes to test deletion when there are a
   319  		// non-zero number of filepath.Glob matches.
   320  		payloads := []uint64{3, 5, 7, 9, 10}
   321  		for n := range rand.Perm(len(payloads)) {
   322  			i := payloads[n]
   323  			if err := ss.Put(ctx, i, highTerm, file(i*highTerm)); err != nil {
   324  				t.Fatalf("%d: %+v", i, err)
   325  			}
   326  		}
   327  		assertCreated(true)
   328  		if _, _, err := ss.TruncateTo(ctx, math.MaxUint64); err != nil {
   329  			t.Fatal(err)
   330  		}
   331  		// Ensure directory is removed when all records are removed.
   332  		_, err = os.Stat(ss.(*diskSideloadStorage).dir)
   333  		if err == nil {
   334  			t.Fatalf("expected %q to be removed after truncating full range", ss.(*diskSideloadStorage).dir)
   335  		}
   336  		if err != nil {
   337  			if !os.IsNotExist(err) {
   338  				t.Fatalf("expected %q to be removed: %+v", ss.(*diskSideloadStorage).dir, err)
   339  			}
   340  		}
   341  	}()
   342  
   343  	if err := ss.Clear(ctx); err != nil {
   344  		t.Fatal(err)
   345  	}
   346  
   347  	assertCreated(false)
   348  
   349  	// Sanity check that we can call TruncateTo without the directory existing.
   350  	if _, _, err := ss.TruncateTo(ctx, 1); err != nil {
   351  		t.Fatal(err)
   352  	}
   353  
   354  	assertCreated(false)
   355  
   356  	// Repopulate with a few entries at indexes=1,2,4 and term 10 to test `maybePurgeSideloaded`
   357  	// with.
   358  	for index := uint64(1); index < 5; index++ {
   359  		if index == 3 {
   360  			continue
   361  		}
   362  		payload := []byte(strings.Repeat("x", 1+int(index)))
   363  		if err := ss.Put(ctx, index, 10, payload); err != nil {
   364  			t.Fatalf("%d: %+v", index, err)
   365  		}
   366  	}
   367  
   368  	// Term too high and too low, respectively. Shouldn't delete anything.
   369  	for _, term := range []uint64{9, 11} {
   370  		if size, err := maybePurgeSideloaded(ctx, ss, 1, 10, term); err != nil || size != 0 {
   371  			t.Fatalf("expected noop for term %d, got (%d, %v)", term, size, err)
   372  		}
   373  	}
   374  	// This should delete 2 and 4. Index == size+1, so expect 6.
   375  	if size, err := maybePurgeSideloaded(ctx, ss, 2, 4, 10); err != nil || size != 8 {
   376  		t.Fatalf("unexpectedly got (%d, %v)", size, err)
   377  	}
   378  	// This should delete 1 (the lone survivor).
   379  	if size, err := maybePurgeSideloaded(ctx, ss, 0, 100, 10); err != nil || size != 2 {
   380  		t.Fatalf("unexpectedly got (%d, %v)", size, err)
   381  	}
   382  	// Nothing left.
   383  	if size, err := maybePurgeSideloaded(ctx, ss, 0, 100, 10); err != nil || size != 0 {
   384  		t.Fatalf("expected noop, got (%d, %v)", size, err)
   385  	}
   386  }
   387  
   388  func TestRaftSSTableSideloadingInline(t *testing.T) {
   389  	defer leaktest.AfterTest(t)()
   390  
   391  	v1, v2 := raftVersionStandard, raftVersionSideloaded
   392  	rangeID := roachpb.RangeID(1)
   393  
   394  	type testCase struct {
   395  		// Entry passed into maybeInlineSideloadedRaftCommand and the entry
   396  		// after having (perhaps) been modified.
   397  		thin, fat raftpb.Entry
   398  		// Populate the raft entry cache and sideload storage before running the test.
   399  		setup func(*raftentry.Cache, SideloadStorage)
   400  		// If nonempty, the error expected from maybeInlineSideloadedRaftCommand.
   401  		expErr string
   402  		// If nonempty, a regex that the recorded trace span must match.
   403  		expTrace string
   404  	}
   405  
   406  	sstFat := kvserverpb.ReplicatedEvalResult_AddSSTable{
   407  		Data:  []byte("foo"),
   408  		CRC32: 0, // not checked
   409  	}
   410  	sstThin := kvserverpb.ReplicatedEvalResult_AddSSTable{
   411  		CRC32: 0, // not checked
   412  	}
   413  
   414  	putOnDisk := func(ec *raftentry.Cache, ss SideloadStorage) {
   415  		if err := ss.Put(context.Background(), 5, 6, sstFat.Data); err != nil {
   416  			t.Fatal(err)
   417  		}
   418  	}
   419  
   420  	testCases := map[string]testCase{
   421  		// Plain old v1 Raft command without payload. Don't touch.
   422  		"v1-no-payload": {thin: mkEnt(v1, 5, 6, &sstThin), fat: mkEnt(v1, 5, 6, &sstThin)},
   423  		// With payload, but command is v1. Don't touch. Note that the
   424  		// first of the two shouldn't happen in practice or we have a
   425  		// huge problem once we try to apply this entry.
   426  		"v1-slim-with-payload": {thin: mkEnt(v1, 5, 6, &sstThin), fat: mkEnt(v1, 5, 6, &sstThin)},
   427  		"v1-with-payload":      {thin: mkEnt(v1, 5, 6, &sstFat), fat: mkEnt(v1, 5, 6, &sstFat)},
   428  		// v2 with payload, but payload is AWOL. This would be fatal in practice.
   429  		"v2-with-payload-missing-file": {
   430  			thin: mkEnt(v2, 5, 6, &sstThin), fat: mkEnt(v2, 5, 6, &sstThin),
   431  			expErr: "not found",
   432  		},
   433  		// v2 with payload that's actually there. The request we'll see in
   434  		// practice.
   435  		"v2-with-payload-with-file-no-cache": {
   436  			thin: mkEnt(v2, 5, 6, &sstThin), fat: mkEnt(v2, 5, 6, &sstFat),
   437  			setup: putOnDisk, expTrace: "inlined entry not cached",
   438  		},
   439  		"v2-with-payload-with-file-with-cache": {
   440  			thin: mkEnt(v2, 5, 6, &sstThin), fat: mkEnt(v2, 5, 6, &sstFat),
   441  			setup: func(ec *raftentry.Cache, ss SideloadStorage) {
   442  				putOnDisk(ec, ss)
   443  				ec.Add(rangeID, []raftpb.Entry{mkEnt(v2, 5, 6, &sstFat)}, true)
   444  			}, expTrace: "using cache hit",
   445  		},
   446  		"v2-fat-without-file": {
   447  			thin: mkEnt(v2, 5, 6, &sstFat), fat: mkEnt(v2, 5, 6, &sstFat),
   448  			setup:    func(ec *raftentry.Cache, ss SideloadStorage) {},
   449  			expTrace: "already inlined",
   450  		},
   451  	}
   452  
   453  	runOne := func(k string, test testCase) {
   454  		ctx, collect, cancel := tracing.ContextWithRecordingSpan(context.Background(), "test-recording")
   455  		defer cancel()
   456  
   457  		ec := raftentry.NewCache(1024) // large enough
   458  		ss := mustNewInMemSideloadStorage(rangeID, roachpb.ReplicaID(1), ".")
   459  		if test.setup != nil {
   460  			test.setup(ec, ss)
   461  		}
   462  
   463  		thinCopy := *(protoutil.Clone(&test.thin).(*raftpb.Entry))
   464  		newEnt, err := maybeInlineSideloadedRaftCommand(ctx, rangeID, thinCopy, ss, ec)
   465  		if err != nil {
   466  			if test.expErr == "" || !testutils.IsError(err, test.expErr) {
   467  				t.Fatalf("%s: %+v", k, err)
   468  			}
   469  		} else if test.expErr != "" {
   470  			t.Fatalf("%s: success, but expected error: %s", k, test.expErr)
   471  		} else if err := entryEq(thinCopy, test.thin); err != nil {
   472  			t.Fatalf("%s: mutated the original entry: %s", k, pretty.Diff(thinCopy, test.thin))
   473  		}
   474  
   475  		if newEnt == nil {
   476  			newEnt = &thinCopy
   477  		}
   478  		if err := entryEq(*newEnt, test.fat); err != nil {
   479  			t.Fatalf("%s: %+v", k, err)
   480  		}
   481  
   482  		if dump := collect().String(); test.expTrace != "" {
   483  			if ok, err := regexp.MatchString(test.expTrace, dump); err != nil {
   484  				t.Fatalf("%s: %+v", k, err)
   485  			} else if !ok {
   486  				t.Fatalf("%s: expected trace matching:\n%s\n\nbut got\n%s", k, test.expTrace, dump)
   487  			}
   488  		}
   489  	}
   490  
   491  	keys := make([]string, 0, len(testCases))
   492  	for k := range testCases {
   493  		keys = append(keys, k)
   494  	}
   495  	sort.Strings(keys)
   496  	for _, k := range keys {
   497  		runOne(k, testCases[k])
   498  	}
   499  }
   500  
   501  func TestRaftSSTableSideloadingSideload(t *testing.T) {
   502  	defer leaktest.AfterTest(t)()
   503  
   504  	addSST := kvserverpb.ReplicatedEvalResult_AddSSTable{
   505  		Data: []byte("foo"), CRC32: 0, // not checked
   506  	}
   507  
   508  	addSSTStripped := addSST
   509  	addSSTStripped.Data = nil
   510  
   511  	entV1Reg := mkEnt(raftVersionStandard, 10, 99, nil)
   512  	entV1SST := mkEnt(raftVersionStandard, 11, 99, &addSST)
   513  	entV2Reg := mkEnt(raftVersionSideloaded, 12, 99, nil)
   514  	entV2SST := mkEnt(raftVersionSideloaded, 13, 99, &addSST)
   515  	entV2SSTStripped := mkEnt(raftVersionSideloaded, 13, 99, &addSSTStripped)
   516  
   517  	type tc struct {
   518  		name              string
   519  		preEnts, postEnts []raftpb.Entry
   520  		ss                []string
   521  		size              int64
   522  	}
   523  
   524  	// Intentionally ignore the fact that real calls would always have an
   525  	// unbroken run of `entry.Index`.
   526  	testCases := []tc{
   527  		{
   528  			name:     "empty",
   529  			preEnts:  nil,
   530  			postEnts: nil,
   531  			ss:       nil,
   532  			size:     0,
   533  		},
   534  		{
   535  			name:     "v1",
   536  			preEnts:  []raftpb.Entry{entV1Reg, entV1SST},
   537  			postEnts: []raftpb.Entry{entV1Reg, entV1SST},
   538  			size:     0,
   539  		},
   540  		{
   541  			name:     "v2",
   542  			preEnts:  []raftpb.Entry{entV2SST, entV2Reg},
   543  			postEnts: []raftpb.Entry{entV2SSTStripped, entV2Reg},
   544  			ss:       []string{"i13t99"},
   545  			size:     int64(len(addSST.Data)),
   546  		},
   547  		{
   548  			name:     "mixed",
   549  			preEnts:  []raftpb.Entry{entV1Reg, entV1SST, entV2Reg, entV2SST},
   550  			postEnts: []raftpb.Entry{entV1Reg, entV1SST, entV2Reg, entV2SSTStripped},
   551  			ss:       []string{"i13t99"},
   552  			size:     int64(len(addSST.Data)),
   553  		},
   554  	}
   555  
   556  	for _, test := range testCases {
   557  		t.Run(test.name, func(t *testing.T) {
   558  			ctx := context.Background()
   559  			sideloaded := mustNewInMemSideloadStorage(roachpb.RangeID(3), roachpb.ReplicaID(17), ".")
   560  			postEnts, size, err := maybeSideloadEntriesImpl(ctx, test.preEnts, sideloaded)
   561  			if err != nil {
   562  				t.Fatal(err)
   563  			}
   564  			if len(addSST.Data) == 0 {
   565  				t.Fatal("invocation mutated original AddSSTable struct in memory")
   566  			}
   567  			if !reflect.DeepEqual(postEnts, test.postEnts) {
   568  				t.Fatalf("result differs from expected: %s", pretty.Diff(postEnts, test.postEnts))
   569  			}
   570  			if test.size != size {
   571  				t.Fatalf("expected %d sideloadedSize, but found %d", test.size, size)
   572  			}
   573  			var actKeys []string
   574  			for k := range sideloaded.(*inMemSideloadStorage).m {
   575  				actKeys = append(actKeys, fmt.Sprintf("i%dt%d", k.index, k.term))
   576  			}
   577  			sort.Strings(actKeys)
   578  			if !reflect.DeepEqual(actKeys, test.ss) {
   579  				t.Fatalf("expected %v, got %v", test.ss, actKeys)
   580  			}
   581  		})
   582  	}
   583  }
   584  
   585  func makeInMemSideloaded(repl *Replica) {
   586  	repl.raftMu.Lock()
   587  	repl.raftMu.sideloaded = mustNewInMemSideloadStorage(repl.RangeID, 0, repl.store.engine.GetAuxiliaryDir())
   588  	repl.raftMu.Unlock()
   589  }
   590  
   591  // TestRaftSSTableSideloadingProposal runs a straightforward application of an `AddSSTable` command.
   592  func TestRaftSSTableSideloadingProposal(t *testing.T) {
   593  	defer leaktest.AfterTest(t)()
   594  
   595  	testutils.RunTrueAndFalse(t, "engineInMem", func(t *testing.T, engineInMem bool) {
   596  		testutils.RunTrueAndFalse(t, "mockSideloaded", func(t *testing.T, mockSideloaded bool) {
   597  			if engineInMem && !mockSideloaded {
   598  				t.Skip("https://github.com/cockroachdb/cockroach/issues/31913")
   599  			}
   600  			testRaftSSTableSideloadingProposal(t, engineInMem, mockSideloaded)
   601  		})
   602  	})
   603  }
   604  
   605  // TestRaftSSTableSideloadingProposal runs a straightforward application of an `AddSSTable` command.
   606  func testRaftSSTableSideloadingProposal(t *testing.T, engineInMem, mockSideloaded bool) {
   607  	defer leaktest.AfterTest(t)()
   608  	defer SetMockAddSSTable()()
   609  
   610  	dir, cleanup := testutils.TempDir(t)
   611  	defer cleanup()
   612  	stopper := stop.NewStopper()
   613  	tc := testContext{}
   614  	if !engineInMem {
   615  		cfg := storage.RocksDBConfig{
   616  			StorageConfig: base.StorageConfig{
   617  				Dir:      dir,
   618  				Settings: cluster.MakeTestingClusterSettings(),
   619  			},
   620  		}
   621  		var err error
   622  		cache := storage.NewRocksDBCache(1 << 20)
   623  		defer cache.Release()
   624  		tc.engine, err = storage.NewRocksDB(cfg, cache)
   625  		if err != nil {
   626  			t.Fatal(err)
   627  		}
   628  		stopper.AddCloser(tc.engine)
   629  	}
   630  	defer stopper.Stop(context.Background())
   631  	tc.Start(t, stopper)
   632  
   633  	ctx, collect, cancel := tracing.ContextWithRecordingSpan(context.Background(), "test-recording")
   634  	defer cancel()
   635  
   636  	const (
   637  		key       = "foo"
   638  		entrySize = 128
   639  	)
   640  	val := strings.Repeat("x", entrySize)
   641  
   642  	if mockSideloaded {
   643  		makeInMemSideloaded(tc.repl)
   644  	}
   645  
   646  	ts := hlc.Timestamp{Logical: 1}
   647  
   648  	if err := ProposeAddSSTable(ctx, key, val, ts, tc.store); err != nil {
   649  		t.Fatal(err)
   650  	}
   651  
   652  	{
   653  		var ba roachpb.BatchRequest
   654  		get := getArgs(roachpb.Key(key))
   655  		ba.Add(&get)
   656  		ba.Header.RangeID = tc.repl.RangeID
   657  
   658  		br, pErr := tc.store.Send(ctx, ba)
   659  		if pErr != nil {
   660  			t.Fatal(pErr)
   661  		}
   662  		v := br.Responses[0].GetInner().(*roachpb.GetResponse).Value
   663  		if v == nil {
   664  			t.Fatal("expected to read a value")
   665  		}
   666  		if valBytes, err := v.GetBytes(); err != nil {
   667  			t.Fatal(err)
   668  		} else if !bytes.Equal(valBytes, []byte(val)) {
   669  			t.Fatalf("expected to read '%s', but found '%s'", val, valBytes)
   670  		}
   671  	}
   672  
   673  	func() {
   674  		tc.repl.raftMu.Lock()
   675  		defer tc.repl.raftMu.Unlock()
   676  		if ss, ok := tc.repl.raftMu.sideloaded.(*inMemSideloadStorage); ok && len(ss.m) < 1 {
   677  			t.Fatal("sideloaded storage is empty")
   678  		}
   679  
   680  		if err := testutils.MatchInOrder(
   681  			collect().String(), "sideloadable proposal detected", "ingested SSTable",
   682  		); err != nil {
   683  			t.Fatal(err)
   684  		}
   685  
   686  		if n := tc.store.metrics.AddSSTableProposals.Count(); n == 0 {
   687  			t.Fatalf("expected metric to show at least one AddSSTable proposal, but got %d", n)
   688  		}
   689  
   690  		if n := tc.store.metrics.AddSSTableApplications.Count(); n == 0 {
   691  			t.Fatalf("expected metric to show at least one AddSSTable application, but got %d", n)
   692  		}
   693  		// We usually don't see copies because we hardlink and ingest the original SST. However, this
   694  		// depends on luck and the file system, so don't try to assert it. We should, however, see
   695  		// no more than one.
   696  		expMaxCopies := int64(1)
   697  		if engineInMem {
   698  			// We don't count in-memory env SST writes as copies.
   699  			expMaxCopies = 0
   700  		}
   701  		if n := tc.store.metrics.AddSSTableApplicationCopies.Count(); n > expMaxCopies {
   702  			t.Fatalf("expected metric to show <= %d AddSSTable copies, but got %d", expMaxCopies, n)
   703  		}
   704  	}()
   705  
   706  	// Force a log truncation followed by verification of the tracked raft log size. This exercises a
   707  	// former bug in which the raft log size took the sideloaded payload into account when adding
   708  	// to the log, but not when truncating.
   709  
   710  	// Write enough keys to the range to make sure that a truncation will happen.
   711  	for i := 0; i < RaftLogQueueStaleThreshold+1; i++ {
   712  		key := roachpb.Key(fmt.Sprintf("key%02d", i))
   713  		args := putArgs(key, []byte(fmt.Sprintf("value%02d", i)))
   714  		if _, err := kv.SendWrapped(context.Background(), tc.store.TestSender(), &args); err != nil {
   715  			t.Fatal(err)
   716  		}
   717  	}
   718  
   719  	if _, err := tc.store.raftLogQueue.testingAdd(ctx, tc.repl, 99.99 /* priority */); err != nil {
   720  		t.Fatal(err)
   721  	}
   722  	tc.store.MustForceRaftLogScanAndProcess()
   723  	// SST is definitely truncated now, so recomputing the Raft log keys should match up with
   724  	// the tracked size.
   725  	verifyLogSizeInSync(t, tc.repl)
   726  }
   727  
   728  type mockSender struct {
   729  	logEntries [][]byte
   730  	done       bool
   731  }
   732  
   733  func (mr *mockSender) Send(req *SnapshotRequest) error {
   734  	if req.LogEntries != nil {
   735  		if mr.logEntries != nil {
   736  			return errors.New("already have log entries")
   737  		}
   738  		mr.logEntries = req.LogEntries
   739  	}
   740  	return nil
   741  }
   742  
   743  func (mr *mockSender) Recv() (*SnapshotResponse, error) {
   744  	if mr.done {
   745  		return nil, io.EOF
   746  	}
   747  	status := SnapshotResponse_ACCEPTED
   748  	if len(mr.logEntries) > 0 {
   749  		status = SnapshotResponse_APPLIED
   750  		mr.done = true
   751  	}
   752  	return &SnapshotResponse{Status: status}, nil
   753  }
   754  
   755  func newEngine(t *testing.T) (func(), storage.Engine) {
   756  	dir, cleanup := testutils.TempDir(t)
   757  	eng, err := storage.NewDefaultEngine(
   758  		1<<20,
   759  		base.StorageConfig{
   760  			Dir:       dir,
   761  			MustExist: false,
   762  		})
   763  	if err != nil {
   764  		t.Fatal(err)
   765  	}
   766  	return cleanup, eng
   767  }
   768  
   769  // This test verifies that when a snapshot is sent, sideloaded proposals are
   770  // inlined.
   771  func TestRaftSSTableSideloadingSnapshot(t *testing.T) {
   772  	defer leaktest.AfterTest(t)()
   773  	defer SetMockAddSSTable()()
   774  
   775  	ctx := context.Background()
   776  	tc := testContext{}
   777  
   778  	cleanup, eng := newEngine(t)
   779  	tc.engine = eng
   780  	defer cleanup()
   781  	defer eng.Close()
   782  
   783  	stopper := stop.NewStopper()
   784  	defer stopper.Stop(ctx)
   785  	tc.Start(t, stopper)
   786  
   787  	var ba roachpb.BatchRequest
   788  	ba.RangeID = tc.repl.RangeID
   789  
   790  	// Disable log truncation as we want to be sure that we get to create
   791  	// snapshots that have our sideloaded proposal in them.
   792  	tc.store.SetRaftLogQueueActive(false)
   793  
   794  	// Put a sideloaded proposal on the Range.
   795  	key, val := "don't", "care"
   796  	origSSTData, _ := MakeSSTable(key, val, hlc.Timestamp{}.Add(0, 1))
   797  	{
   798  
   799  		var addReq roachpb.AddSSTableRequest
   800  		addReq.Data = origSSTData
   801  		addReq.Key = roachpb.Key(key)
   802  		addReq.EndKey = addReq.Key.Next()
   803  		ba.Add(&addReq)
   804  
   805  		_, pErr := tc.store.Send(ctx, ba)
   806  		if pErr != nil {
   807  			t.Fatal(pErr)
   808  		}
   809  	}
   810  
   811  	// Run a happy case snapshot. Check that it properly inlines the payload in
   812  	// the contained log entries.
   813  	inlinedEntry := func() raftpb.Entry {
   814  		os, err := tc.repl.GetSnapshot(ctx, SnapshotRequest_RAFT, tc.store.StoreID())
   815  		if err != nil {
   816  			t.Fatal(err)
   817  		}
   818  		defer os.Close()
   819  
   820  		mockSender := &mockSender{}
   821  		if err := sendSnapshot(
   822  			ctx,
   823  			&tc.store.cfg.RaftConfig,
   824  			tc.store.cfg.Settings,
   825  			mockSender,
   826  			&fakeStorePool{},
   827  			SnapshotRequest_Header{State: os.State, Priority: SnapshotRequest_RECOVERY},
   828  			os,
   829  			tc.repl.store.Engine().NewBatch,
   830  			func() {},
   831  		); err != nil {
   832  			t.Fatal(err)
   833  		}
   834  
   835  		var ent raftpb.Entry
   836  		var cmd kvserverpb.RaftCommand
   837  		var finalEnt raftpb.Entry
   838  		for _, entryBytes := range mockSender.logEntries {
   839  			if err := protoutil.Unmarshal(entryBytes, &ent); err != nil {
   840  				t.Fatal(err)
   841  			}
   842  			if sniffSideloadedRaftCommand(ent.Data) {
   843  				_, cmdBytes := DecodeRaftCommand(ent.Data)
   844  				if err := protoutil.Unmarshal(cmdBytes, &cmd); err != nil {
   845  					t.Fatal(err)
   846  				}
   847  				if as := cmd.ReplicatedEvalResult.AddSSTable; as == nil {
   848  					t.Fatalf("no AddSSTable found in sideloaded command %+v", cmd)
   849  				} else if len(as.Data) == 0 {
   850  					t.Fatalf("empty payload in sideloaded command: %+v", cmd)
   851  				}
   852  				finalEnt = ent
   853  			}
   854  		}
   855  		if finalEnt.Index == 0 {
   856  			t.Fatal("no sideloaded command found")
   857  		}
   858  		return finalEnt
   859  	}()
   860  
   861  	sideloadedIndex := inlinedEntry.Index
   862  
   863  	// This happens to be a good point in time to check the `entries()` method
   864  	// which has special handling to accommodate `term()`: when an empty
   865  	// sideload storage is passed in, `entries()` should not inline, and in turn
   866  	// also not populate the entries cache (since its contents must always be
   867  	// fully inlined).
   868  	func() {
   869  		tc.repl.raftMu.Lock()
   870  		defer tc.repl.raftMu.Unlock()
   871  		tc.repl.mu.Lock()
   872  		defer tc.repl.mu.Unlock()
   873  		for _, withSS := range []bool{false, true} {
   874  			tc.store.raftEntryCache.Clear(tc.repl.RangeID, sideloadedIndex+1)
   875  
   876  			var ss SideloadStorage
   877  			if withSS {
   878  				ss = tc.repl.raftMu.sideloaded
   879  			}
   880  			rsl := stateloader.Make(tc.repl.RangeID)
   881  			entries, err := entries(
   882  				ctx, rsl, tc.store.Engine(), tc.repl.RangeID, tc.store.raftEntryCache,
   883  				ss, sideloadedIndex, sideloadedIndex+1, 1<<20,
   884  			)
   885  			if err != nil {
   886  				t.Fatal(err)
   887  			}
   888  			if len(entries) != 1 {
   889  				t.Fatalf("no or too many entries returned from cache: %+v", entries)
   890  			}
   891  			ents, _, _, _ := tc.store.raftEntryCache.Scan(nil, tc.repl.RangeID, sideloadedIndex, sideloadedIndex+1, 1<<20)
   892  			if withSS {
   893  				// We passed the sideload storage, so we expect to get our
   894  				// inlined index back from the cache.
   895  				if len(ents) != 1 {
   896  					t.Fatalf("no or too many entries returned from cache: %+v", ents)
   897  				}
   898  				if err := entryEq(inlinedEntry, ents[0]); err != nil {
   899  					t.Fatalf("withSS=%t: %+v", withSS, err)
   900  				}
   901  			} else {
   902  				// Without sideload storage, expect the cache to remain
   903  				// unpopulated and the entry returned from entries() to not have
   904  				// been inlined.
   905  				if len(ents) != 0 {
   906  					t.Fatalf("expected no cached entries, but got %+v", ents)
   907  				}
   908  				if expErr, err := `ReplicatedEvalResult.AddSSTable.Data: \[\]uint8\[\d+\] != \[\]uint8\[0\]`,
   909  					entryEq(inlinedEntry, entries[0]); !testutils.IsError(
   910  					err,
   911  					expErr,
   912  				) {
   913  					t.Fatalf("expected specific mismatch on `Data` field, but got %v\nwanted: %s", err, expErr)
   914  				}
   915  			}
   916  		}
   917  	}()
   918  
   919  	// Now run a snapshot that will fail since it doesn't find one of its on-disk
   920  	// payloads. This can happen if the Raft log queue runs between the time the
   921  	// (engine) snapshot is taken and the log entries are actually read from the
   922  	// (engine) snapshot. We didn't run this before because we wanted the file
   923  	// to stay in sideloaded storage for the previous test.
   924  	func() {
   925  		failingOS, err := tc.repl.GetSnapshot(ctx, SnapshotRequest_RAFT, tc.store.StoreID())
   926  		if err != nil {
   927  			t.Fatal(err)
   928  		}
   929  		defer failingOS.Close()
   930  
   931  		// Remove the actual file.
   932  		tc.repl.raftMu.Lock()
   933  		if err := tc.repl.raftMu.sideloaded.Clear(ctx); err != nil {
   934  			tc.repl.raftMu.Unlock()
   935  			t.Fatal(err)
   936  		}
   937  		tc.repl.raftMu.Unlock()
   938  		// Additionally we need to clear out the entry from the cache because
   939  		// that would still save the day.
   940  		tc.store.raftEntryCache.Clear(tc.repl.RangeID, sideloadedIndex+1)
   941  
   942  		mockSender := &mockSender{}
   943  		err = sendSnapshot(
   944  			ctx,
   945  			&tc.store.cfg.RaftConfig,
   946  			tc.store.cfg.Settings,
   947  			mockSender,
   948  			&fakeStorePool{},
   949  			SnapshotRequest_Header{State: failingOS.State, Priority: SnapshotRequest_RECOVERY},
   950  			failingOS,
   951  			tc.repl.store.Engine().NewBatch,
   952  			func() {},
   953  		)
   954  		if !errors.HasType(err, (*errMustRetrySnapshotDueToTruncation)(nil)) {
   955  			t.Fatal(err)
   956  		}
   957  	}()
   958  }
   959  
   960  func TestRaftSSTableSideloadingTruncation(t *testing.T) {
   961  	defer leaktest.AfterTest(t)()
   962  	defer SetMockAddSSTable()()
   963  
   964  	tc := testContext{}
   965  	stopper := stop.NewStopper()
   966  	defer stopper.Stop(context.Background())
   967  	tc.Start(t, stopper)
   968  	makeInMemSideloaded(tc.repl)
   969  	ctx := context.Background()
   970  
   971  	const count = 10
   972  
   973  	var indexes []uint64
   974  	addLastIndex := func() {
   975  		lastIndex, err := tc.repl.GetLastIndex()
   976  		if err != nil {
   977  			t.Fatal(err)
   978  		}
   979  		indexes = append(indexes, lastIndex)
   980  	}
   981  	for i := 0; i < count; i++ {
   982  		addLastIndex()
   983  		key := fmt.Sprintf("key-%d", i)
   984  		val := fmt.Sprintf("val-%d", i)
   985  		if err := ProposeAddSSTable(ctx, key, val, tc.Clock().Now(), tc.store); err != nil {
   986  			t.Fatalf("%d: %+v", i, err)
   987  		}
   988  	}
   989  	// Append an extra entry which, if we truncate it, should definitely also
   990  	// remove any leftover files (ok, unless the last one is reproposed but
   991  	// that's *very* unlikely to happen for the last one)
   992  	addLastIndex()
   993  
   994  	fmtSideloaded := func() []string {
   995  		var r []string
   996  		tc.repl.raftMu.Lock()
   997  		defer tc.repl.raftMu.Unlock()
   998  		for k := range tc.repl.raftMu.sideloaded.(*inMemSideloadStorage).m {
   999  			r = append(r, fmt.Sprintf("%v", k))
  1000  		}
  1001  		sort.Strings(r)
  1002  		return r
  1003  	}
  1004  
  1005  	// Check that when we truncate, the number of on-disk files changes in ways
  1006  	// we expect. Intentionally not too strict due to the possibility of
  1007  	// reproposals, etc; it could be made stricter, but this should give enough
  1008  	// confidence already that we're calling `PurgeTo` correctly, and for the
  1009  	// remainder unit testing on each impl's PurgeTo is more useful.
  1010  	for i := range indexes {
  1011  		const rangeID = 1
  1012  		newFirstIndex := indexes[i] + 1
  1013  		truncateArgs := truncateLogArgs(newFirstIndex, rangeID)
  1014  		log.Eventf(ctx, "truncating to index < %d", newFirstIndex)
  1015  		if _, pErr := kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{RangeID: rangeID}, &truncateArgs); pErr != nil {
  1016  			t.Fatal(pErr)
  1017  		}
  1018  		sideloadStrings := fmtSideloaded()
  1019  		if minFiles := count - i; len(sideloadStrings) < minFiles {
  1020  			t.Fatalf("after truncation at %d (i=%d), expected at least %d files left, but have:\n%v",
  1021  				indexes[i], i, minFiles, sideloadStrings)
  1022  		}
  1023  	}
  1024  
  1025  	if sideloadStrings := fmtSideloaded(); len(sideloadStrings) != 0 {
  1026  		t.Fatalf("expected all files to be cleaned up, but found %v", sideloadStrings)
  1027  	}
  1028  
  1029  }