github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft_log_queue_test.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft_log_queue_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"math"
    18  	"strings"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/keys"
    24  	"github.com/cockroachdb/cockroach/pkg/kv"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/storage"
    27  	"github.com/cockroachdb/cockroach/pkg/testutils"
    28  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    29  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    30  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    31  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    33  	"github.com/cockroachdb/errors"
    34  	"github.com/stretchr/testify/assert"
    35  	"go.etcd.io/etcd/raft"
    36  	"go.etcd.io/etcd/raft/tracker"
    37  )
    38  
    39  func TestShouldTruncate(t *testing.T) {
    40  	defer leaktest.AfterTest(t)()
    41  
    42  	testCases := []struct {
    43  		truncatableIndexes uint64
    44  		raftLogSize        int64
    45  		expected           bool
    46  	}{
    47  		{RaftLogQueueStaleThreshold - 1, 0, false},
    48  		{RaftLogQueueStaleThreshold, 0, true},
    49  		{0, RaftLogQueueStaleSize, false},
    50  		{1, RaftLogQueueStaleSize - 1, false},
    51  		{1, RaftLogQueueStaleSize, true},
    52  	}
    53  	for _, c := range testCases {
    54  		t.Run("", func(t *testing.T) {
    55  			var d truncateDecision
    56  			d.Input.LogSize = c.raftLogSize
    57  			d.Input.FirstIndex = 123
    58  			d.NewFirstIndex = d.Input.FirstIndex + c.truncatableIndexes
    59  			v := d.ShouldTruncate()
    60  			if c.expected != v {
    61  				t.Fatalf("expected %v, but found %v", c.expected, v)
    62  			}
    63  		})
    64  	}
    65  }
    66  
    67  func TestComputeTruncateDecision(t *testing.T) {
    68  	defer leaktest.AfterTest(t)()
    69  	ctx := context.Background()
    70  
    71  	const targetSize = 1000
    72  
    73  	// NB: all tests here have a truncateDecisions which starts with "should
    74  	// truncate: false", because these tests don't simulate enough data to be over
    75  	// the truncation threshold.
    76  	testCases := []struct {
    77  		commit          uint64
    78  		progress        []uint64
    79  		raftLogSize     int64
    80  		firstIndex      uint64
    81  		lastIndex       uint64
    82  		pendingSnapshot uint64
    83  		exp             string
    84  	}{
    85  		{
    86  			// Nothing to truncate.
    87  			1, []uint64{1, 2}, 100, 1, 1, 0,
    88  			"should truncate: false [truncate 0 entries to first index 1 (chosen via: last index)]",
    89  		},
    90  		{
    91  			// Nothing to truncate on this replica, though a quorum elsewhere has more progress.
    92  			// NB this couldn't happen if we're truly the Raft leader, unless we appended to our
    93  			// own log asynchronously.
    94  			1, []uint64{1, 5, 5}, 100, 1, 1, 0,
    95  			"should truncate: false [truncate 0 entries to first index 1 (chosen via: last index)]",
    96  		},
    97  		{
    98  			// We're not truncating anything, but one follower is already cut off. There's no pending
    99  			// snapshot so we shouldn't be causing any additional snapshots.
   100  			2, []uint64{1, 5, 5}, 100, 2, 2, 0,
   101  			"should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]",
   102  		},
   103  		{
   104  			// The happy case.
   105  			5, []uint64{5, 5, 5}, 100, 2, 5, 0,
   106  			"should truncate: false [truncate 3 entries to first index 5 (chosen via: last index)]",
   107  		},
   108  		{
   109  			// No truncation, but the outstanding snapshot is made obsolete by the truncation. However
   110  			// it was already obsolete before. (This example is also not one you could manufacture in
   111  			// a real system).
   112  			2, []uint64{5, 5, 5}, 100, 2, 2, 1,
   113  			"should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]",
   114  		},
   115  		{
   116  			// Respecting the pending snapshot.
   117  			5, []uint64{5, 5, 5}, 100, 2, 5, 3,
   118  			"should truncate: false [truncate 1 entries to first index 3 (chosen via: pending snapshot)]",
   119  		},
   120  		{
   121  			// Log is below target size, so respecting the slowest follower.
   122  			3, []uint64{1, 2, 3, 4}, 100, 1, 5, 0,
   123  			"should truncate: false [truncate 0 entries to first index 1 (chosen via: followers)]",
   124  		},
   125  		{
   126  			// Truncating since local log starts at 2. One follower is already cut off without a pending
   127  			// snapshot.
   128  			2, []uint64{1, 2, 3, 4}, 100, 2, 2, 0,
   129  			"should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]",
   130  		},
   131  		// Don't truncate off active followers, even if over targetSize.
   132  		{
   133  			3, []uint64{1, 3, 3, 4}, 2000, 1, 3, 0,
   134  			"should truncate: false [truncate 0 entries to first index 1 (chosen via: followers); log too large (2.0 KiB > 1000 B)]",
   135  		},
   136  		// Don't truncate away pending snapshot, even when log too large.
   137  		{
   138  			100, []uint64{100, 100}, 2000, 1, 100, 50,
   139  			"should truncate: false [truncate 49 entries to first index 50 (chosen via: pending snapshot); log too large (2.0 KiB > 1000 B)]",
   140  		},
   141  		{
   142  			3, []uint64{1, 3, 3, 4}, 2000, 2, 3, 0,
   143  			"should truncate: false [truncate 0 entries to first index 2 (chosen via: first index); log too large (2.0 KiB > 1000 B)]",
   144  		},
   145  		{
   146  			3, []uint64{1, 3, 3, 4}, 2000, 3, 3, 0,
   147  			"should truncate: false [truncate 0 entries to first index 3 (chosen via: first index); log too large (2.0 KiB > 1000 B)]",
   148  		},
   149  		// Respecting the pending snapshot.
   150  		{
   151  			7, []uint64{4}, 2000, 1, 7, 1,
   152  			"should truncate: false [truncate 0 entries to first index 1 (chosen via: pending snapshot); log too large (2.0 KiB > 1000 B)]",
   153  		},
   154  		// Never truncate past the commit index.
   155  		{
   156  			3, []uint64{3, 3, 6}, 100, 2, 7, 0,
   157  			"should truncate: false [truncate 1 entries to first index 3 (chosen via: commit)]",
   158  		},
   159  		// Never truncate past the last index.
   160  		{
   161  			3, []uint64{5}, 100, 1, 3, 0,
   162  			"should truncate: false [truncate 2 entries to first index 3 (chosen via: last index)]",
   163  		},
   164  		// Never truncate "before the first index".
   165  		{
   166  			3, []uint64{5}, 100, 2, 3, 1,
   167  			"should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]",
   168  		},
   169  	}
   170  	for i, c := range testCases {
   171  		t.Run("", func(t *testing.T) {
   172  			status := raft.Status{
   173  				Progress: make(map[uint64]tracker.Progress),
   174  			}
   175  			for j, v := range c.progress {
   176  				status.Progress[uint64(j)] = tracker.Progress{
   177  					RecentActive: true,
   178  					State:        tracker.StateReplicate,
   179  					Match:        v,
   180  					Next:         v + 1,
   181  				}
   182  			}
   183  			status.Commit = c.commit
   184  			input := truncateDecisionInput{
   185  				RaftStatus:           status,
   186  				LogSize:              c.raftLogSize,
   187  				MaxLogSize:           targetSize,
   188  				LogSizeTrusted:       true,
   189  				FirstIndex:           c.firstIndex,
   190  				LastIndex:            c.lastIndex,
   191  				PendingSnapshotIndex: c.pendingSnapshot,
   192  			}
   193  			decision := computeTruncateDecision(input)
   194  			if act, exp := decision.String(), c.exp; act != exp {
   195  				t.Errorf("%d:\ngot:\n%s\nwanted:\n%s", i, act, exp)
   196  			}
   197  
   198  			// Verify the triggers that queue a range for recomputation. In
   199  			// essence, when the raft log size is not trusted we want to suggest
   200  			// a truncation and also a recomputation. If the size *is* trusted,
   201  			// we'll just see the decision play out as before.
   202  			// The real tests for this are in TestRaftLogQueueShouldQueue, but this is
   203  			// some nice extra coverage.
   204  			should, recompute, prio := (*raftLogQueue)(nil).shouldQueueImpl(ctx, decision)
   205  			assert.Equal(t, decision.ShouldTruncate(), should)
   206  			assert.False(t, recompute)
   207  			assert.Equal(t, decision.ShouldTruncate(), prio != 0)
   208  			input.LogSizeTrusted = false
   209  			input.RaftStatus.RaftState = raft.StateLeader
   210  			if input.LastIndex <= input.FirstIndex {
   211  				input.LastIndex = input.FirstIndex + 1
   212  			}
   213  			decision = computeTruncateDecision(input)
   214  			should, recompute, prio = (*raftLogQueue)(nil).shouldQueueImpl(ctx, decision)
   215  			assert.True(t, should)
   216  			assert.True(t, prio > 0)
   217  			assert.True(t, recompute)
   218  		})
   219  	}
   220  }
   221  
   222  // TestComputeTruncateDecisionProgressStatusProbe verifies that when a follower
   223  // is marked as active and is being probed for its log index, we don't truncate
   224  // the log out from under it.
   225  func TestComputeTruncateDecisionProgressStatusProbe(t *testing.T) {
   226  	defer leaktest.AfterTest(t)()
   227  
   228  	// NB: most tests here have a truncateDecisions which starts with "should
   229  	// truncate: false", because these tests don't simulate enough data to be over
   230  	// the truncation threshold.
   231  	exp := map[bool]map[bool]string{ // (tooLarge, active)
   232  		false: {
   233  			true:  "should truncate: false [truncate 0 entries to first index 10 (chosen via: probing follower)]",
   234  			false: "should truncate: false [truncate 0 entries to first index 10 (chosen via: first index)]",
   235  		},
   236  		true: {
   237  			true:  "should truncate: false [truncate 0 entries to first index 10 (chosen via: probing follower); log too large (2.0 KiB > 1.0 KiB)]",
   238  			false: "should truncate: true [truncate 190 entries to first index 200 (chosen via: followers); log too large (2.0 KiB > 1.0 KiB)]",
   239  		},
   240  	}
   241  
   242  	testutils.RunTrueAndFalse(t, "tooLarge", func(t *testing.T, tooLarge bool) {
   243  		testutils.RunTrueAndFalse(t, "active", func(t *testing.T, active bool) {
   244  			status := raft.Status{
   245  				Progress: make(map[uint64]tracker.Progress),
   246  			}
   247  			progress := []uint64{100, 200, 300, 400, 500}
   248  			lastIndex := uint64(500)
   249  			status.Commit = 300
   250  
   251  			for i, v := range progress {
   252  				var pr tracker.Progress
   253  				if v == 100 {
   254  					// A probing follower is probed with some index (Next) but
   255  					// it has a zero Match (i.e. no idea how much of its log
   256  					// agrees with ours).
   257  					pr = tracker.Progress{
   258  						RecentActive: active,
   259  						State:        tracker.StateProbe,
   260  						Match:        0,
   261  						Next:         v,
   262  					}
   263  				} else { // everyone else
   264  					pr = tracker.Progress{
   265  						Match:        v,
   266  						Next:         v + 1,
   267  						RecentActive: true,
   268  						State:        tracker.StateReplicate,
   269  					}
   270  				}
   271  				status.Progress[uint64(i)] = pr
   272  			}
   273  
   274  			input := truncateDecisionInput{
   275  				RaftStatus:     status,
   276  				MaxLogSize:     1024,
   277  				FirstIndex:     10,
   278  				LastIndex:      lastIndex,
   279  				LogSizeTrusted: true,
   280  			}
   281  			if tooLarge {
   282  				input.LogSize += 2 * input.MaxLogSize
   283  			}
   284  
   285  			decision := computeTruncateDecision(input)
   286  			if s, exp := decision.String(), exp[tooLarge][active]; s != exp {
   287  				t.Errorf("expected %q, got %q", exp, s)
   288  			}
   289  		})
   290  	})
   291  }
   292  
   293  func TestTruncateDecisionZeroValue(t *testing.T) {
   294  	defer leaktest.AfterTest(t)()
   295  
   296  	var decision truncateDecision
   297  	assert.False(t, decision.ShouldTruncate())
   298  	assert.Zero(t, decision.NumNewRaftSnapshots())
   299  	assert.Zero(t, decision.NumTruncatableIndexes())
   300  	assert.Equal(t, "should truncate: false [truncate 0 entries to first index 0 (chosen via: ); log size untrusted]", decision.String())
   301  }
   302  
   303  func TestTruncateDecisionNumSnapshots(t *testing.T) {
   304  	defer leaktest.AfterTest(t)()
   305  
   306  	status := raft.Status{
   307  		Progress: map[uint64]tracker.Progress{
   308  			// Fully caught up.
   309  			5: {State: tracker.StateReplicate, Match: 11, Next: 12},
   310  			// Behind.
   311  			6: {State: tracker.StateReplicate, Match: 10, Next: 11},
   312  			// Last MsgApp in flight, so basically caught up.
   313  			7: {State: tracker.StateReplicate, Match: 10, Next: 12},
   314  			8: {State: tracker.StateProbe},    // irrelevant
   315  			9: {State: tracker.StateSnapshot}, // irrelevant
   316  		},
   317  	}
   318  
   319  	decision := truncateDecision{Input: truncateDecisionInput{RaftStatus: status}}
   320  	assert.Equal(t, 0, decision.raftSnapshotsForIndex(10))
   321  	assert.Equal(t, 1, decision.raftSnapshotsForIndex(11))
   322  	assert.Equal(t, 3, decision.raftSnapshotsForIndex(12))
   323  	assert.Equal(t, 3, decision.raftSnapshotsForIndex(13))
   324  }
   325  
   326  func verifyLogSizeInSync(t *testing.T, r *Replica) {
   327  	t.Helper()
   328  	r.raftMu.Lock()
   329  	defer r.raftMu.Unlock()
   330  	r.mu.Lock()
   331  	raftLogSize := r.mu.raftLogSize
   332  	r.mu.Unlock()
   333  	actualRaftLogSize, err := ComputeRaftLogSize(context.Background(), r.RangeID, r.Engine(), r.SideloadedRaftMuLocked())
   334  	if err != nil {
   335  		t.Fatal(err)
   336  	}
   337  	if actualRaftLogSize != raftLogSize {
   338  		t.Fatalf("replica claims raft log size %d, but computed %d", raftLogSize, actualRaftLogSize)
   339  	}
   340  }
   341  
   342  func TestUpdateRaftStatusActivity(t *testing.T) {
   343  	defer leaktest.AfterTest(t)()
   344  
   345  	type testCase struct {
   346  		prs        []tracker.Progress
   347  		replicas   []roachpb.ReplicaDescriptor
   348  		lastUpdate lastUpdateTimesMap
   349  		now        time.Time
   350  
   351  		exp []tracker.Progress
   352  	}
   353  
   354  	now := timeutil.Now()
   355  
   356  	inactivityThreashold := time.Second
   357  
   358  	tcs := []testCase{
   359  		// No data, no crash.
   360  		{},
   361  		// No knowledge = no update.
   362  		{prs: []tracker.Progress{{RecentActive: true}}, exp: []tracker.Progress{{RecentActive: true}}},
   363  		{prs: []tracker.Progress{{RecentActive: false}}, exp: []tracker.Progress{{RecentActive: false}}},
   364  		// See replica in descriptor but then don't find it in the map. Assumes the follower is not
   365  		// active.
   366  		{
   367  			replicas: []roachpb.ReplicaDescriptor{{ReplicaID: 1}},
   368  			prs:      []tracker.Progress{{RecentActive: true}},
   369  			exp:      []tracker.Progress{{RecentActive: false}},
   370  		},
   371  		// Three replicas in descriptor. The first one responded recently, the second didn't,
   372  		// the third did but it doesn't have a Progress.
   373  		{
   374  			replicas: []roachpb.ReplicaDescriptor{{ReplicaID: 1}, {ReplicaID: 2}, {ReplicaID: 3}},
   375  			prs:      []tracker.Progress{{RecentActive: false}, {RecentActive: true}},
   376  			lastUpdate: map[roachpb.ReplicaID]time.Time{
   377  				1: now.Add(-1 * inactivityThreashold / 2),
   378  				2: now.Add(-1 - inactivityThreashold),
   379  				3: now,
   380  			},
   381  			now: now,
   382  
   383  			exp: []tracker.Progress{{RecentActive: true}, {RecentActive: false}},
   384  		},
   385  	}
   386  
   387  	ctx := context.Background()
   388  
   389  	for _, tc := range tcs {
   390  		t.Run("", func(t *testing.T) {
   391  			prs := make(map[uint64]tracker.Progress)
   392  			for i, pr := range tc.prs {
   393  				prs[uint64(i+1)] = pr
   394  			}
   395  			expPRs := make(map[uint64]tracker.Progress)
   396  			for i, pr := range tc.exp {
   397  				expPRs[uint64(i+1)] = pr
   398  			}
   399  			updateRaftProgressFromActivity(ctx, prs, tc.replicas,
   400  				func(replicaID roachpb.ReplicaID) bool {
   401  					return tc.lastUpdate.isFollowerActiveSince(ctx, replicaID, tc.now, inactivityThreashold)
   402  				},
   403  			)
   404  
   405  			assert.Equal(t, expPRs, prs)
   406  		})
   407  	}
   408  }
   409  
   410  func TestNewTruncateDecisionMaxSize(t *testing.T) {
   411  	defer leaktest.AfterTest(t)()
   412  	stopper := stop.NewStopper()
   413  	defer stopper.Stop(context.Background())
   414  
   415  	cfg := TestStoreConfig(hlc.NewClock(hlc.NewManualClock(123).UnixNano, time.Nanosecond))
   416  	const exp = 1881
   417  	cfg.RaftLogTruncationThreshold = exp
   418  	store := createTestStoreWithConfig(
   419  		t, stopper, testStoreOpts{createSystemRanges: true}, &cfg,
   420  	)
   421  
   422  	repl, err := store.GetReplica(1)
   423  	if err != nil {
   424  		t.Fatal(err)
   425  	}
   426  
   427  	ctx := context.Background()
   428  	td, err := newTruncateDecision(ctx, repl)
   429  	if err != nil {
   430  		t.Fatal(err)
   431  	}
   432  
   433  	if td.Input.MaxLogSize != exp {
   434  		t.Fatalf("MaxLogSize %d is unexpected, wanted %d", td.Input.MaxLogSize, exp)
   435  	}
   436  }
   437  
   438  // TestNewTruncateDecision verifies that old raft log entries are correctly
   439  // removed.
   440  func TestNewTruncateDecision(t *testing.T) {
   441  	defer leaktest.AfterTest(t)()
   442  
   443  	t.Skip("https://github.com/cockroachdb/cockroach/issues/38584")
   444  
   445  	stopper := stop.NewStopper()
   446  	defer stopper.Stop(context.Background())
   447  	store, _ := createTestStore(t,
   448  		testStoreOpts{
   449  			// This test was written before test stores could start with more than one
   450  			// range and was not adapted.
   451  			createSystemRanges: false,
   452  		},
   453  		stopper)
   454  	store.SetRaftLogQueueActive(false)
   455  
   456  	r, err := store.GetReplica(1)
   457  	if err != nil {
   458  		t.Fatal(err)
   459  	}
   460  
   461  	getIndexes := func() (uint64, int, uint64, error) {
   462  		d, err := newTruncateDecision(context.Background(), r)
   463  		if err != nil {
   464  			return 0, 0, 0, err
   465  		}
   466  		return d.Input.FirstIndex, d.NumTruncatableIndexes(), d.NewFirstIndex, nil
   467  	}
   468  
   469  	aFirst, aTruncatable, aOldest, err := getIndexes()
   470  	if err != nil {
   471  		t.Fatal(err)
   472  	}
   473  	if aFirst == 0 {
   474  		t.Errorf("expected first index to be greater than 0, got %d", aFirst)
   475  	}
   476  
   477  	// Write a few keys to the range.
   478  	for i := 0; i < RaftLogQueueStaleThreshold+1; i++ {
   479  		key := roachpb.Key(fmt.Sprintf("key%02d", i))
   480  		args := putArgs(key, []byte(fmt.Sprintf("value%02d", i)))
   481  		if _, err := kv.SendWrapped(context.Background(), store.TestSender(), &args); err != nil {
   482  			t.Fatal(err)
   483  		}
   484  	}
   485  
   486  	bFirst, bTruncatable, bOldest, err := getIndexes()
   487  	if err != nil {
   488  		t.Fatal(err)
   489  	}
   490  	if aFirst != bFirst {
   491  		t.Fatalf("expected firstIndex to not change, instead it changed from %d -> %d", aFirst, bFirst)
   492  	}
   493  	if aTruncatable >= bTruncatable {
   494  		t.Fatalf("expected truncatableIndexes to increase, instead it changed from %d -> %d", aTruncatable, bTruncatable)
   495  	}
   496  	if aOldest >= bOldest {
   497  		t.Fatalf("expected oldestIndex to increase, instead it changed from %d -> %d", aOldest, bOldest)
   498  	}
   499  
   500  	// Enable the raft log scanner and and force a truncation.
   501  	store.SetRaftLogQueueActive(true)
   502  	store.MustForceRaftLogScanAndProcess()
   503  	store.SetRaftLogQueueActive(false)
   504  
   505  	// There can be a delay from when the truncation command is issued and the
   506  	// indexes updating.
   507  	var cFirst, cOldest uint64
   508  	var numTruncatable int
   509  	testutils.SucceedsSoon(t, func() error {
   510  		var err error
   511  		cFirst, numTruncatable, cOldest, err = getIndexes()
   512  		if err != nil {
   513  			t.Fatal(err)
   514  		}
   515  		if bFirst == cFirst {
   516  			return errors.Errorf("truncation did not occur, expected firstIndex to change, instead it remained at %d", cFirst)
   517  		}
   518  		return nil
   519  	})
   520  	if bTruncatable < numTruncatable {
   521  		t.Errorf("expected numTruncatable to decrease, instead it changed from %d -> %d", bTruncatable, numTruncatable)
   522  	}
   523  	if bOldest >= cOldest {
   524  		t.Errorf("expected oldestIndex to increase, instead it changed from %d -> %d", bOldest, cOldest)
   525  	}
   526  
   527  	verifyLogSizeInSync(t, r)
   528  
   529  	// Again, enable the raft log scanner and and force a truncation. This time
   530  	// we expect no truncation to occur.
   531  	store.SetRaftLogQueueActive(true)
   532  	store.MustForceRaftLogScanAndProcess()
   533  	store.SetRaftLogQueueActive(false)
   534  
   535  	// Unlike the last iteration, where we expect a truncation and can wait on
   536  	// it with succeedsSoon, we can't do that here. This check is fragile in
   537  	// that the truncation triggered here may lose the race against the call to
   538  	// GetFirstIndex or newTruncateDecision, giving a false negative. Fixing
   539  	// this requires additional instrumentation of the queues, which was deemed
   540  	// to require too much work at the time of this writing.
   541  	dFirst, dTruncatable, dOldest, err := getIndexes()
   542  	if err != nil {
   543  		t.Fatal(err)
   544  	}
   545  	if cFirst != dFirst {
   546  		t.Errorf("truncation should not have occurred, but firstIndex changed from %d -> %d", cFirst, dFirst)
   547  	}
   548  	if numTruncatable != dTruncatable {
   549  		t.Errorf("truncation should not have occurred, but truncatableIndexes changed from %d -> %d", numTruncatable, dTruncatable)
   550  	}
   551  	if cOldest != dOldest {
   552  		t.Errorf("truncation should not have occurred, but oldestIndex changed from %d -> %d", cOldest, dOldest)
   553  	}
   554  }
   555  
   556  // TestProactiveRaftLogTruncate verifies that we proactively truncate the raft
   557  // log even when replica scanning is disabled.
   558  func TestProactiveRaftLogTruncate(t *testing.T) {
   559  	defer leaktest.AfterTest(t)()
   560  
   561  	ctx := context.Background()
   562  
   563  	testCases := []struct {
   564  		count     int
   565  		valueSize int
   566  	}{
   567  		// Lots of small KVs.
   568  		{RaftLogQueueStaleSize / 100, 5},
   569  		// One big KV.
   570  		{1, RaftLogQueueStaleSize},
   571  	}
   572  	for _, c := range testCases {
   573  		t.Run("", func(t *testing.T) {
   574  			stopper := stop.NewStopper()
   575  			defer stopper.Stop(ctx)
   576  			store, _ := createTestStore(t,
   577  				testStoreOpts{
   578  					// This test was written before test stores could start with more than one
   579  					// range and was not adapted.
   580  					createSystemRanges: false,
   581  				},
   582  				stopper)
   583  
   584  			// Note that turning off the replica scanner does not prevent the queues
   585  			// from processing entries (in this case specifically the raftLogQueue),
   586  			// just that the scanner will not try to push all replicas onto the queues.
   587  			store.SetReplicaScannerActive(false)
   588  
   589  			r, err := store.GetReplica(1)
   590  			if err != nil {
   591  				t.Fatal(err)
   592  			}
   593  
   594  			oldFirstIndex, err := r.GetFirstIndex()
   595  			if err != nil {
   596  				t.Fatal(err)
   597  			}
   598  
   599  			for i := 0; i < c.count; i++ {
   600  				key := roachpb.Key(fmt.Sprintf("key%02d", i))
   601  				args := putArgs(key, []byte(fmt.Sprintf("%s%02d", strings.Repeat("v", c.valueSize), i)))
   602  				if _, err := kv.SendWrapped(ctx, store.TestSender(), &args); err != nil {
   603  					t.Fatal(err)
   604  				}
   605  			}
   606  
   607  			// Log truncation is an asynchronous process and while it will usually occur
   608  			// fairly quickly, there is a slight race between this check and the
   609  			// truncation, especially when under stress.
   610  			testutils.SucceedsSoon(t, func() error {
   611  				newFirstIndex, err := r.GetFirstIndex()
   612  				if err != nil {
   613  					t.Fatal(err)
   614  				}
   615  				if newFirstIndex <= oldFirstIndex {
   616  					return errors.Errorf("log was not correctly truncated, old first index:%d, current first index:%d",
   617  						oldFirstIndex, newFirstIndex)
   618  				}
   619  				return nil
   620  			})
   621  		})
   622  	}
   623  }
   624  
   625  func TestSnapshotLogTruncationConstraints(t *testing.T) {
   626  	defer leaktest.AfterTest(t)()
   627  
   628  	ctx := context.Background()
   629  	r := &Replica{}
   630  	var storeID roachpb.StoreID
   631  	id1, id2 := uuid.MakeV4(), uuid.MakeV4()
   632  	const (
   633  		index1 = 50
   634  		index2 = 60
   635  	)
   636  
   637  	// Add first constraint.
   638  	r.addSnapshotLogTruncationConstraintLocked(ctx, id1, index1, storeID)
   639  	exp1 := map[uuid.UUID]snapTruncationInfo{id1: {index: index1}}
   640  
   641  	// Make sure it registered.
   642  	assert.Equal(t, r.mu.snapshotLogTruncationConstraints, exp1)
   643  
   644  	// Add another constraint with the same id. Extremely unlikely in practice
   645  	// but we want to make sure it doesn't blow anything up. Collisions are
   646  	// handled by ignoring the colliding update.
   647  	r.addSnapshotLogTruncationConstraintLocked(ctx, id1, index2, storeID)
   648  	assert.Equal(t, r.mu.snapshotLogTruncationConstraints, exp1)
   649  
   650  	// Helper that grabs the min constraint index (which can trigger GC as a
   651  	// byproduct) and asserts.
   652  	assertMin := func(exp uint64, now time.Time) {
   653  		t.Helper()
   654  		const anyRecipientStore roachpb.StoreID = 0
   655  		if maxIndex := r.getAndGCSnapshotLogTruncationConstraintsLocked(now, anyRecipientStore); maxIndex != exp {
   656  			t.Fatalf("unexpected max index %d, wanted %d", maxIndex, exp)
   657  		}
   658  	}
   659  
   660  	// Queue should be told index1 is the highest pending one. Note that the
   661  	// colliding update at index2 is not represented.
   662  	assertMin(index1, time.Time{})
   663  
   664  	// Add another, higher, index. We're not going to notice it's around
   665  	// until the lower one disappears.
   666  	r.addSnapshotLogTruncationConstraintLocked(ctx, id2, index2, storeID)
   667  
   668  	now := timeutil.Now()
   669  	// The colliding snapshot comes back. Or the original, we can't tell.
   670  	r.completeSnapshotLogTruncationConstraint(ctx, id1, now)
   671  	// The index should show up when its deadline isn't hit.
   672  	assertMin(index1, now)
   673  	assertMin(index1, now.Add(raftLogQueuePendingSnapshotGracePeriod))
   674  	assertMin(index1, now.Add(raftLogQueuePendingSnapshotGracePeriod))
   675  	// Once we're over deadline, the index returned so far disappears.
   676  	assertMin(index2, now.Add(raftLogQueuePendingSnapshotGracePeriod+1))
   677  	assertMin(index2, time.Time{})
   678  	assertMin(index2, now.Add(10*raftLogQueuePendingSnapshotGracePeriod))
   679  
   680  	r.completeSnapshotLogTruncationConstraint(ctx, id2, now)
   681  	assertMin(index2, now)
   682  	assertMin(index2, now.Add(raftLogQueuePendingSnapshotGracePeriod))
   683  	assertMin(0, now.Add(2*raftLogQueuePendingSnapshotGracePeriod))
   684  
   685  	assert.Equal(t, r.mu.snapshotLogTruncationConstraints, map[uuid.UUID]snapTruncationInfo(nil))
   686  }
   687  
   688  // TestTruncateLog verifies that the TruncateLog command removes a
   689  // prefix of the raft logs (modifying FirstIndex() and making them
   690  // inaccessible via Entries()).
   691  func TestTruncateLog(t *testing.T) {
   692  	defer leaktest.AfterTest(t)()
   693  	tc := testContext{}
   694  	cfg := TestStoreConfig(nil)
   695  	cfg.TestingKnobs.DisableRaftLogQueue = true
   696  	stopper := stop.NewStopper()
   697  	defer stopper.Stop(context.Background())
   698  	tc.StartWithStoreConfig(t, stopper, cfg)
   699  
   700  	// Populate the log with 10 entries. Save the LastIndex after each write.
   701  	var indexes []uint64
   702  	for i := 0; i < 10; i++ {
   703  		args := incrementArgs([]byte("a"), int64(i))
   704  
   705  		if _, pErr := tc.SendWrapped(args); pErr != nil {
   706  			t.Fatal(pErr)
   707  		}
   708  		idx, err := tc.repl.GetLastIndex()
   709  		if err != nil {
   710  			t.Fatal(err)
   711  		}
   712  		indexes = append(indexes, idx)
   713  	}
   714  
   715  	rangeID := tc.repl.RangeID
   716  
   717  	// Discard the first half of the log.
   718  	truncateArgs := truncateLogArgs(indexes[5], rangeID)
   719  	if _, pErr := tc.SendWrappedWith(roachpb.Header{RangeID: 1}, &truncateArgs); pErr != nil {
   720  		t.Fatal(pErr)
   721  	}
   722  
   723  	// FirstIndex has changed.
   724  	firstIndex, err := tc.repl.GetFirstIndex()
   725  	if err != nil {
   726  		t.Fatal(err)
   727  	}
   728  	if firstIndex != indexes[5] {
   729  		t.Errorf("expected firstIndex == %d, got %d", indexes[5], firstIndex)
   730  	}
   731  
   732  	// We can still get what remains of the log.
   733  	tc.repl.mu.Lock()
   734  	entries, err := tc.repl.raftEntriesLocked(indexes[5], indexes[9], math.MaxUint64)
   735  	tc.repl.mu.Unlock()
   736  	if err != nil {
   737  		t.Fatal(err)
   738  	}
   739  	if len(entries) != int(indexes[9]-indexes[5]) {
   740  		t.Errorf("expected %d entries, got %d", indexes[9]-indexes[5], len(entries))
   741  	}
   742  
   743  	// But any range that includes the truncated entries returns an error.
   744  	tc.repl.mu.Lock()
   745  	_, err = tc.repl.raftEntriesLocked(indexes[4], indexes[9], math.MaxUint64)
   746  	tc.repl.mu.Unlock()
   747  	if !errors.Is(err, raft.ErrCompacted) {
   748  		t.Errorf("expected ErrCompacted, got %s", err)
   749  	}
   750  
   751  	// The term of the last truncated entry is still available.
   752  	tc.repl.mu.Lock()
   753  	term, err := tc.repl.raftTermRLocked(indexes[4])
   754  	tc.repl.mu.Unlock()
   755  	if err != nil {
   756  		t.Fatal(err)
   757  	}
   758  	if term == 0 {
   759  		t.Errorf("invalid term 0 for truncated entry")
   760  	}
   761  
   762  	// The terms of older entries are gone.
   763  	tc.repl.mu.Lock()
   764  	_, err = tc.repl.raftTermRLocked(indexes[3])
   765  	tc.repl.mu.Unlock()
   766  	if !errors.Is(err, raft.ErrCompacted) {
   767  		t.Errorf("expected ErrCompacted, got %s", err)
   768  	}
   769  
   770  	// Truncating logs that have already been truncated should not return an
   771  	// error.
   772  	truncateArgs = truncateLogArgs(indexes[3], rangeID)
   773  	if _, pErr := tc.SendWrapped(&truncateArgs); pErr != nil {
   774  		t.Fatal(pErr)
   775  	}
   776  
   777  	// Truncating logs that have the wrong rangeID included should not return
   778  	// an error but should not truncate any logs.
   779  	truncateArgs = truncateLogArgs(indexes[9], rangeID+1)
   780  	if _, pErr := tc.SendWrapped(&truncateArgs); pErr != nil {
   781  		t.Fatal(pErr)
   782  	}
   783  
   784  	tc.repl.mu.Lock()
   785  	// The term of the last truncated entry is still available.
   786  	term, err = tc.repl.raftTermRLocked(indexes[4])
   787  	tc.repl.mu.Unlock()
   788  	if err != nil {
   789  		t.Fatal(err)
   790  	}
   791  	if term == 0 {
   792  		t.Errorf("invalid term 0 for truncated entry")
   793  	}
   794  }
   795  
   796  func TestRaftLogQueueShouldQueueRecompute(t *testing.T) {
   797  	defer leaktest.AfterTest(t)()
   798  
   799  	ctx := context.Background()
   800  	var rlq *raftLogQueue
   801  
   802  	_ = ctx
   803  	_ = rlq
   804  
   805  	// NB: Cases for which decision.ShouldTruncate() is true are tested in
   806  	// TestComputeTruncateDecision, so here the decision itself is never
   807  	// positive.
   808  	var decision truncateDecision
   809  	decision.Input.LogSizeTrusted = true
   810  	decision.Input.LogSize = 12
   811  	decision.Input.MaxLogSize = 1000
   812  
   813  	verify := func(shouldQ bool, recompute bool, prio float64) {
   814  		t.Helper()
   815  		isQ, isR, isP := rlq.shouldQueueImpl(ctx, decision)
   816  		assert.Equal(t, shouldQ, isQ)
   817  		assert.Equal(t, recompute, isR)
   818  		assert.Equal(t, prio, isP)
   819  	}
   820  
   821  	verify(false, false, 0)
   822  
   823  	// Check all the boxes: unknown log size, leader, and non-empty log.
   824  	decision.Input.LogSize = 123
   825  	decision.Input.LogSizeTrusted = false
   826  	decision.Input.FirstIndex = 10
   827  	decision.Input.LastIndex = 20
   828  
   829  	verify(true, true, 1+float64(decision.Input.MaxLogSize)/2)
   830  
   831  	golden := decision
   832  
   833  	// Check all boxes except that log is empty.
   834  	decision = golden
   835  	decision.Input.LastIndex = decision.Input.FirstIndex
   836  	verify(false, false, 0)
   837  }
   838  
   839  // TestTruncateLogRecompute checks that if raftLogSize is not trusted, the raft
   840  // log queue picks up the replica, recomputes the log size, and considers a
   841  // truncation.
   842  func TestTruncateLogRecompute(t *testing.T) {
   843  	defer leaktest.AfterTest(t)()
   844  
   845  	ctx := context.Background()
   846  	dir, cleanup := testutils.TempDir(t)
   847  	defer cleanup()
   848  
   849  	cache := storage.NewRocksDBCache(1 << 20)
   850  	defer cache.Release()
   851  	eng, err := storage.NewRocksDB(storage.RocksDBConfig{StorageConfig: base.StorageConfig{Dir: dir}}, cache)
   852  	if err != nil {
   853  		t.Fatal(err)
   854  	}
   855  	defer eng.Close()
   856  
   857  	tc := testContext{
   858  		engine: eng,
   859  	}
   860  	cfg := TestStoreConfig(nil)
   861  	cfg.TestingKnobs.DisableRaftLogQueue = true
   862  	stopper := stop.NewStopper()
   863  	defer stopper.Stop(context.Background())
   864  	tc.StartWithStoreConfig(t, stopper, cfg)
   865  
   866  	key := roachpb.Key("a")
   867  	repl := tc.store.LookupReplica(keys.MustAddr(key))
   868  
   869  	trusted := func() bool {
   870  		repl.mu.Lock()
   871  		defer repl.mu.Unlock()
   872  		return repl.mu.raftLogSizeTrusted
   873  	}
   874  
   875  	put := func() {
   876  		var v roachpb.Value
   877  		v.SetBytes(bytes.Repeat([]byte("x"), RaftLogQueueStaleSize*5))
   878  		put := roachpb.NewPut(key, v)
   879  		var ba roachpb.BatchRequest
   880  		ba.Add(put)
   881  		ba.RangeID = repl.RangeID
   882  
   883  		if _, pErr := tc.store.Send(ctx, ba); pErr != nil {
   884  			t.Fatal(pErr)
   885  		}
   886  	}
   887  
   888  	put()
   889  
   890  	decision, err := newTruncateDecision(ctx, repl)
   891  	assert.NoError(t, err)
   892  	assert.True(t, decision.ShouldTruncate())
   893  	// Should never trust initially, until recomputed at least once.
   894  	assert.False(t, trusted())
   895  
   896  	repl.mu.Lock()
   897  	repl.mu.raftLogSizeTrusted = false
   898  	repl.mu.raftLogSize += 12          // garbage
   899  	repl.mu.raftLogLastCheckSize += 12 // garbage
   900  	repl.mu.Unlock()
   901  
   902  	// Force a raft log queue run. The result should be a nonzero Raft log of
   903  	// size below the threshold (though we won't check that since it could have
   904  	// grown over threshold again; we compute instead that its size is correct).
   905  	tc.store.SetRaftLogQueueActive(true)
   906  	tc.store.MustForceRaftLogScanAndProcess()
   907  
   908  	for i := 0; i < 2; i++ {
   909  		verifyLogSizeInSync(t, repl)
   910  		assert.True(t, trusted())
   911  		put() // make sure we remain trusted and in sync
   912  	}
   913  }