github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/raft_log_queue_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "math" 18 "strings" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/keys" 24 "github.com/cockroachdb/cockroach/pkg/kv" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/storage" 27 "github.com/cockroachdb/cockroach/pkg/testutils" 28 "github.com/cockroachdb/cockroach/pkg/util/hlc" 29 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 30 "github.com/cockroachdb/cockroach/pkg/util/stop" 31 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 32 "github.com/cockroachdb/cockroach/pkg/util/uuid" 33 "github.com/cockroachdb/errors" 34 "github.com/stretchr/testify/assert" 35 "go.etcd.io/etcd/raft" 36 "go.etcd.io/etcd/raft/tracker" 37 ) 38 39 func TestShouldTruncate(t *testing.T) { 40 defer leaktest.AfterTest(t)() 41 42 testCases := []struct { 43 truncatableIndexes uint64 44 raftLogSize int64 45 expected bool 46 }{ 47 {RaftLogQueueStaleThreshold - 1, 0, false}, 48 {RaftLogQueueStaleThreshold, 0, true}, 49 {0, RaftLogQueueStaleSize, false}, 50 {1, RaftLogQueueStaleSize - 1, false}, 51 {1, RaftLogQueueStaleSize, true}, 52 } 53 for _, c := range testCases { 54 t.Run("", func(t *testing.T) { 55 var d truncateDecision 56 d.Input.LogSize = c.raftLogSize 57 d.Input.FirstIndex = 123 58 d.NewFirstIndex = d.Input.FirstIndex + c.truncatableIndexes 59 v := d.ShouldTruncate() 60 if c.expected != v { 61 t.Fatalf("expected %v, but found %v", c.expected, v) 62 } 63 }) 64 } 65 } 66 67 func TestComputeTruncateDecision(t *testing.T) { 68 defer leaktest.AfterTest(t)() 69 ctx := context.Background() 70 71 const targetSize = 1000 72 73 // NB: all tests here have a truncateDecisions which starts with "should 74 // truncate: false", because these tests don't simulate enough data to be over 75 // the truncation threshold. 76 testCases := []struct { 77 commit uint64 78 progress []uint64 79 raftLogSize int64 80 firstIndex uint64 81 lastIndex uint64 82 pendingSnapshot uint64 83 exp string 84 }{ 85 { 86 // Nothing to truncate. 87 1, []uint64{1, 2}, 100, 1, 1, 0, 88 "should truncate: false [truncate 0 entries to first index 1 (chosen via: last index)]", 89 }, 90 { 91 // Nothing to truncate on this replica, though a quorum elsewhere has more progress. 92 // NB this couldn't happen if we're truly the Raft leader, unless we appended to our 93 // own log asynchronously. 94 1, []uint64{1, 5, 5}, 100, 1, 1, 0, 95 "should truncate: false [truncate 0 entries to first index 1 (chosen via: last index)]", 96 }, 97 { 98 // We're not truncating anything, but one follower is already cut off. There's no pending 99 // snapshot so we shouldn't be causing any additional snapshots. 100 2, []uint64{1, 5, 5}, 100, 2, 2, 0, 101 "should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]", 102 }, 103 { 104 // The happy case. 105 5, []uint64{5, 5, 5}, 100, 2, 5, 0, 106 "should truncate: false [truncate 3 entries to first index 5 (chosen via: last index)]", 107 }, 108 { 109 // No truncation, but the outstanding snapshot is made obsolete by the truncation. However 110 // it was already obsolete before. (This example is also not one you could manufacture in 111 // a real system). 112 2, []uint64{5, 5, 5}, 100, 2, 2, 1, 113 "should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]", 114 }, 115 { 116 // Respecting the pending snapshot. 117 5, []uint64{5, 5, 5}, 100, 2, 5, 3, 118 "should truncate: false [truncate 1 entries to first index 3 (chosen via: pending snapshot)]", 119 }, 120 { 121 // Log is below target size, so respecting the slowest follower. 122 3, []uint64{1, 2, 3, 4}, 100, 1, 5, 0, 123 "should truncate: false [truncate 0 entries to first index 1 (chosen via: followers)]", 124 }, 125 { 126 // Truncating since local log starts at 2. One follower is already cut off without a pending 127 // snapshot. 128 2, []uint64{1, 2, 3, 4}, 100, 2, 2, 0, 129 "should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]", 130 }, 131 // Don't truncate off active followers, even if over targetSize. 132 { 133 3, []uint64{1, 3, 3, 4}, 2000, 1, 3, 0, 134 "should truncate: false [truncate 0 entries to first index 1 (chosen via: followers); log too large (2.0 KiB > 1000 B)]", 135 }, 136 // Don't truncate away pending snapshot, even when log too large. 137 { 138 100, []uint64{100, 100}, 2000, 1, 100, 50, 139 "should truncate: false [truncate 49 entries to first index 50 (chosen via: pending snapshot); log too large (2.0 KiB > 1000 B)]", 140 }, 141 { 142 3, []uint64{1, 3, 3, 4}, 2000, 2, 3, 0, 143 "should truncate: false [truncate 0 entries to first index 2 (chosen via: first index); log too large (2.0 KiB > 1000 B)]", 144 }, 145 { 146 3, []uint64{1, 3, 3, 4}, 2000, 3, 3, 0, 147 "should truncate: false [truncate 0 entries to first index 3 (chosen via: first index); log too large (2.0 KiB > 1000 B)]", 148 }, 149 // Respecting the pending snapshot. 150 { 151 7, []uint64{4}, 2000, 1, 7, 1, 152 "should truncate: false [truncate 0 entries to first index 1 (chosen via: pending snapshot); log too large (2.0 KiB > 1000 B)]", 153 }, 154 // Never truncate past the commit index. 155 { 156 3, []uint64{3, 3, 6}, 100, 2, 7, 0, 157 "should truncate: false [truncate 1 entries to first index 3 (chosen via: commit)]", 158 }, 159 // Never truncate past the last index. 160 { 161 3, []uint64{5}, 100, 1, 3, 0, 162 "should truncate: false [truncate 2 entries to first index 3 (chosen via: last index)]", 163 }, 164 // Never truncate "before the first index". 165 { 166 3, []uint64{5}, 100, 2, 3, 1, 167 "should truncate: false [truncate 0 entries to first index 2 (chosen via: first index)]", 168 }, 169 } 170 for i, c := range testCases { 171 t.Run("", func(t *testing.T) { 172 status := raft.Status{ 173 Progress: make(map[uint64]tracker.Progress), 174 } 175 for j, v := range c.progress { 176 status.Progress[uint64(j)] = tracker.Progress{ 177 RecentActive: true, 178 State: tracker.StateReplicate, 179 Match: v, 180 Next: v + 1, 181 } 182 } 183 status.Commit = c.commit 184 input := truncateDecisionInput{ 185 RaftStatus: status, 186 LogSize: c.raftLogSize, 187 MaxLogSize: targetSize, 188 LogSizeTrusted: true, 189 FirstIndex: c.firstIndex, 190 LastIndex: c.lastIndex, 191 PendingSnapshotIndex: c.pendingSnapshot, 192 } 193 decision := computeTruncateDecision(input) 194 if act, exp := decision.String(), c.exp; act != exp { 195 t.Errorf("%d:\ngot:\n%s\nwanted:\n%s", i, act, exp) 196 } 197 198 // Verify the triggers that queue a range for recomputation. In 199 // essence, when the raft log size is not trusted we want to suggest 200 // a truncation and also a recomputation. If the size *is* trusted, 201 // we'll just see the decision play out as before. 202 // The real tests for this are in TestRaftLogQueueShouldQueue, but this is 203 // some nice extra coverage. 204 should, recompute, prio := (*raftLogQueue)(nil).shouldQueueImpl(ctx, decision) 205 assert.Equal(t, decision.ShouldTruncate(), should) 206 assert.False(t, recompute) 207 assert.Equal(t, decision.ShouldTruncate(), prio != 0) 208 input.LogSizeTrusted = false 209 input.RaftStatus.RaftState = raft.StateLeader 210 if input.LastIndex <= input.FirstIndex { 211 input.LastIndex = input.FirstIndex + 1 212 } 213 decision = computeTruncateDecision(input) 214 should, recompute, prio = (*raftLogQueue)(nil).shouldQueueImpl(ctx, decision) 215 assert.True(t, should) 216 assert.True(t, prio > 0) 217 assert.True(t, recompute) 218 }) 219 } 220 } 221 222 // TestComputeTruncateDecisionProgressStatusProbe verifies that when a follower 223 // is marked as active and is being probed for its log index, we don't truncate 224 // the log out from under it. 225 func TestComputeTruncateDecisionProgressStatusProbe(t *testing.T) { 226 defer leaktest.AfterTest(t)() 227 228 // NB: most tests here have a truncateDecisions which starts with "should 229 // truncate: false", because these tests don't simulate enough data to be over 230 // the truncation threshold. 231 exp := map[bool]map[bool]string{ // (tooLarge, active) 232 false: { 233 true: "should truncate: false [truncate 0 entries to first index 10 (chosen via: probing follower)]", 234 false: "should truncate: false [truncate 0 entries to first index 10 (chosen via: first index)]", 235 }, 236 true: { 237 true: "should truncate: false [truncate 0 entries to first index 10 (chosen via: probing follower); log too large (2.0 KiB > 1.0 KiB)]", 238 false: "should truncate: true [truncate 190 entries to first index 200 (chosen via: followers); log too large (2.0 KiB > 1.0 KiB)]", 239 }, 240 } 241 242 testutils.RunTrueAndFalse(t, "tooLarge", func(t *testing.T, tooLarge bool) { 243 testutils.RunTrueAndFalse(t, "active", func(t *testing.T, active bool) { 244 status := raft.Status{ 245 Progress: make(map[uint64]tracker.Progress), 246 } 247 progress := []uint64{100, 200, 300, 400, 500} 248 lastIndex := uint64(500) 249 status.Commit = 300 250 251 for i, v := range progress { 252 var pr tracker.Progress 253 if v == 100 { 254 // A probing follower is probed with some index (Next) but 255 // it has a zero Match (i.e. no idea how much of its log 256 // agrees with ours). 257 pr = tracker.Progress{ 258 RecentActive: active, 259 State: tracker.StateProbe, 260 Match: 0, 261 Next: v, 262 } 263 } else { // everyone else 264 pr = tracker.Progress{ 265 Match: v, 266 Next: v + 1, 267 RecentActive: true, 268 State: tracker.StateReplicate, 269 } 270 } 271 status.Progress[uint64(i)] = pr 272 } 273 274 input := truncateDecisionInput{ 275 RaftStatus: status, 276 MaxLogSize: 1024, 277 FirstIndex: 10, 278 LastIndex: lastIndex, 279 LogSizeTrusted: true, 280 } 281 if tooLarge { 282 input.LogSize += 2 * input.MaxLogSize 283 } 284 285 decision := computeTruncateDecision(input) 286 if s, exp := decision.String(), exp[tooLarge][active]; s != exp { 287 t.Errorf("expected %q, got %q", exp, s) 288 } 289 }) 290 }) 291 } 292 293 func TestTruncateDecisionZeroValue(t *testing.T) { 294 defer leaktest.AfterTest(t)() 295 296 var decision truncateDecision 297 assert.False(t, decision.ShouldTruncate()) 298 assert.Zero(t, decision.NumNewRaftSnapshots()) 299 assert.Zero(t, decision.NumTruncatableIndexes()) 300 assert.Equal(t, "should truncate: false [truncate 0 entries to first index 0 (chosen via: ); log size untrusted]", decision.String()) 301 } 302 303 func TestTruncateDecisionNumSnapshots(t *testing.T) { 304 defer leaktest.AfterTest(t)() 305 306 status := raft.Status{ 307 Progress: map[uint64]tracker.Progress{ 308 // Fully caught up. 309 5: {State: tracker.StateReplicate, Match: 11, Next: 12}, 310 // Behind. 311 6: {State: tracker.StateReplicate, Match: 10, Next: 11}, 312 // Last MsgApp in flight, so basically caught up. 313 7: {State: tracker.StateReplicate, Match: 10, Next: 12}, 314 8: {State: tracker.StateProbe}, // irrelevant 315 9: {State: tracker.StateSnapshot}, // irrelevant 316 }, 317 } 318 319 decision := truncateDecision{Input: truncateDecisionInput{RaftStatus: status}} 320 assert.Equal(t, 0, decision.raftSnapshotsForIndex(10)) 321 assert.Equal(t, 1, decision.raftSnapshotsForIndex(11)) 322 assert.Equal(t, 3, decision.raftSnapshotsForIndex(12)) 323 assert.Equal(t, 3, decision.raftSnapshotsForIndex(13)) 324 } 325 326 func verifyLogSizeInSync(t *testing.T, r *Replica) { 327 t.Helper() 328 r.raftMu.Lock() 329 defer r.raftMu.Unlock() 330 r.mu.Lock() 331 raftLogSize := r.mu.raftLogSize 332 r.mu.Unlock() 333 actualRaftLogSize, err := ComputeRaftLogSize(context.Background(), r.RangeID, r.Engine(), r.SideloadedRaftMuLocked()) 334 if err != nil { 335 t.Fatal(err) 336 } 337 if actualRaftLogSize != raftLogSize { 338 t.Fatalf("replica claims raft log size %d, but computed %d", raftLogSize, actualRaftLogSize) 339 } 340 } 341 342 func TestUpdateRaftStatusActivity(t *testing.T) { 343 defer leaktest.AfterTest(t)() 344 345 type testCase struct { 346 prs []tracker.Progress 347 replicas []roachpb.ReplicaDescriptor 348 lastUpdate lastUpdateTimesMap 349 now time.Time 350 351 exp []tracker.Progress 352 } 353 354 now := timeutil.Now() 355 356 inactivityThreashold := time.Second 357 358 tcs := []testCase{ 359 // No data, no crash. 360 {}, 361 // No knowledge = no update. 362 {prs: []tracker.Progress{{RecentActive: true}}, exp: []tracker.Progress{{RecentActive: true}}}, 363 {prs: []tracker.Progress{{RecentActive: false}}, exp: []tracker.Progress{{RecentActive: false}}}, 364 // See replica in descriptor but then don't find it in the map. Assumes the follower is not 365 // active. 366 { 367 replicas: []roachpb.ReplicaDescriptor{{ReplicaID: 1}}, 368 prs: []tracker.Progress{{RecentActive: true}}, 369 exp: []tracker.Progress{{RecentActive: false}}, 370 }, 371 // Three replicas in descriptor. The first one responded recently, the second didn't, 372 // the third did but it doesn't have a Progress. 373 { 374 replicas: []roachpb.ReplicaDescriptor{{ReplicaID: 1}, {ReplicaID: 2}, {ReplicaID: 3}}, 375 prs: []tracker.Progress{{RecentActive: false}, {RecentActive: true}}, 376 lastUpdate: map[roachpb.ReplicaID]time.Time{ 377 1: now.Add(-1 * inactivityThreashold / 2), 378 2: now.Add(-1 - inactivityThreashold), 379 3: now, 380 }, 381 now: now, 382 383 exp: []tracker.Progress{{RecentActive: true}, {RecentActive: false}}, 384 }, 385 } 386 387 ctx := context.Background() 388 389 for _, tc := range tcs { 390 t.Run("", func(t *testing.T) { 391 prs := make(map[uint64]tracker.Progress) 392 for i, pr := range tc.prs { 393 prs[uint64(i+1)] = pr 394 } 395 expPRs := make(map[uint64]tracker.Progress) 396 for i, pr := range tc.exp { 397 expPRs[uint64(i+1)] = pr 398 } 399 updateRaftProgressFromActivity(ctx, prs, tc.replicas, 400 func(replicaID roachpb.ReplicaID) bool { 401 return tc.lastUpdate.isFollowerActiveSince(ctx, replicaID, tc.now, inactivityThreashold) 402 }, 403 ) 404 405 assert.Equal(t, expPRs, prs) 406 }) 407 } 408 } 409 410 func TestNewTruncateDecisionMaxSize(t *testing.T) { 411 defer leaktest.AfterTest(t)() 412 stopper := stop.NewStopper() 413 defer stopper.Stop(context.Background()) 414 415 cfg := TestStoreConfig(hlc.NewClock(hlc.NewManualClock(123).UnixNano, time.Nanosecond)) 416 const exp = 1881 417 cfg.RaftLogTruncationThreshold = exp 418 store := createTestStoreWithConfig( 419 t, stopper, testStoreOpts{createSystemRanges: true}, &cfg, 420 ) 421 422 repl, err := store.GetReplica(1) 423 if err != nil { 424 t.Fatal(err) 425 } 426 427 ctx := context.Background() 428 td, err := newTruncateDecision(ctx, repl) 429 if err != nil { 430 t.Fatal(err) 431 } 432 433 if td.Input.MaxLogSize != exp { 434 t.Fatalf("MaxLogSize %d is unexpected, wanted %d", td.Input.MaxLogSize, exp) 435 } 436 } 437 438 // TestNewTruncateDecision verifies that old raft log entries are correctly 439 // removed. 440 func TestNewTruncateDecision(t *testing.T) { 441 defer leaktest.AfterTest(t)() 442 443 t.Skip("https://github.com/cockroachdb/cockroach/issues/38584") 444 445 stopper := stop.NewStopper() 446 defer stopper.Stop(context.Background()) 447 store, _ := createTestStore(t, 448 testStoreOpts{ 449 // This test was written before test stores could start with more than one 450 // range and was not adapted. 451 createSystemRanges: false, 452 }, 453 stopper) 454 store.SetRaftLogQueueActive(false) 455 456 r, err := store.GetReplica(1) 457 if err != nil { 458 t.Fatal(err) 459 } 460 461 getIndexes := func() (uint64, int, uint64, error) { 462 d, err := newTruncateDecision(context.Background(), r) 463 if err != nil { 464 return 0, 0, 0, err 465 } 466 return d.Input.FirstIndex, d.NumTruncatableIndexes(), d.NewFirstIndex, nil 467 } 468 469 aFirst, aTruncatable, aOldest, err := getIndexes() 470 if err != nil { 471 t.Fatal(err) 472 } 473 if aFirst == 0 { 474 t.Errorf("expected first index to be greater than 0, got %d", aFirst) 475 } 476 477 // Write a few keys to the range. 478 for i := 0; i < RaftLogQueueStaleThreshold+1; i++ { 479 key := roachpb.Key(fmt.Sprintf("key%02d", i)) 480 args := putArgs(key, []byte(fmt.Sprintf("value%02d", i))) 481 if _, err := kv.SendWrapped(context.Background(), store.TestSender(), &args); err != nil { 482 t.Fatal(err) 483 } 484 } 485 486 bFirst, bTruncatable, bOldest, err := getIndexes() 487 if err != nil { 488 t.Fatal(err) 489 } 490 if aFirst != bFirst { 491 t.Fatalf("expected firstIndex to not change, instead it changed from %d -> %d", aFirst, bFirst) 492 } 493 if aTruncatable >= bTruncatable { 494 t.Fatalf("expected truncatableIndexes to increase, instead it changed from %d -> %d", aTruncatable, bTruncatable) 495 } 496 if aOldest >= bOldest { 497 t.Fatalf("expected oldestIndex to increase, instead it changed from %d -> %d", aOldest, bOldest) 498 } 499 500 // Enable the raft log scanner and and force a truncation. 501 store.SetRaftLogQueueActive(true) 502 store.MustForceRaftLogScanAndProcess() 503 store.SetRaftLogQueueActive(false) 504 505 // There can be a delay from when the truncation command is issued and the 506 // indexes updating. 507 var cFirst, cOldest uint64 508 var numTruncatable int 509 testutils.SucceedsSoon(t, func() error { 510 var err error 511 cFirst, numTruncatable, cOldest, err = getIndexes() 512 if err != nil { 513 t.Fatal(err) 514 } 515 if bFirst == cFirst { 516 return errors.Errorf("truncation did not occur, expected firstIndex to change, instead it remained at %d", cFirst) 517 } 518 return nil 519 }) 520 if bTruncatable < numTruncatable { 521 t.Errorf("expected numTruncatable to decrease, instead it changed from %d -> %d", bTruncatable, numTruncatable) 522 } 523 if bOldest >= cOldest { 524 t.Errorf("expected oldestIndex to increase, instead it changed from %d -> %d", bOldest, cOldest) 525 } 526 527 verifyLogSizeInSync(t, r) 528 529 // Again, enable the raft log scanner and and force a truncation. This time 530 // we expect no truncation to occur. 531 store.SetRaftLogQueueActive(true) 532 store.MustForceRaftLogScanAndProcess() 533 store.SetRaftLogQueueActive(false) 534 535 // Unlike the last iteration, where we expect a truncation and can wait on 536 // it with succeedsSoon, we can't do that here. This check is fragile in 537 // that the truncation triggered here may lose the race against the call to 538 // GetFirstIndex or newTruncateDecision, giving a false negative. Fixing 539 // this requires additional instrumentation of the queues, which was deemed 540 // to require too much work at the time of this writing. 541 dFirst, dTruncatable, dOldest, err := getIndexes() 542 if err != nil { 543 t.Fatal(err) 544 } 545 if cFirst != dFirst { 546 t.Errorf("truncation should not have occurred, but firstIndex changed from %d -> %d", cFirst, dFirst) 547 } 548 if numTruncatable != dTruncatable { 549 t.Errorf("truncation should not have occurred, but truncatableIndexes changed from %d -> %d", numTruncatable, dTruncatable) 550 } 551 if cOldest != dOldest { 552 t.Errorf("truncation should not have occurred, but oldestIndex changed from %d -> %d", cOldest, dOldest) 553 } 554 } 555 556 // TestProactiveRaftLogTruncate verifies that we proactively truncate the raft 557 // log even when replica scanning is disabled. 558 func TestProactiveRaftLogTruncate(t *testing.T) { 559 defer leaktest.AfterTest(t)() 560 561 ctx := context.Background() 562 563 testCases := []struct { 564 count int 565 valueSize int 566 }{ 567 // Lots of small KVs. 568 {RaftLogQueueStaleSize / 100, 5}, 569 // One big KV. 570 {1, RaftLogQueueStaleSize}, 571 } 572 for _, c := range testCases { 573 t.Run("", func(t *testing.T) { 574 stopper := stop.NewStopper() 575 defer stopper.Stop(ctx) 576 store, _ := createTestStore(t, 577 testStoreOpts{ 578 // This test was written before test stores could start with more than one 579 // range and was not adapted. 580 createSystemRanges: false, 581 }, 582 stopper) 583 584 // Note that turning off the replica scanner does not prevent the queues 585 // from processing entries (in this case specifically the raftLogQueue), 586 // just that the scanner will not try to push all replicas onto the queues. 587 store.SetReplicaScannerActive(false) 588 589 r, err := store.GetReplica(1) 590 if err != nil { 591 t.Fatal(err) 592 } 593 594 oldFirstIndex, err := r.GetFirstIndex() 595 if err != nil { 596 t.Fatal(err) 597 } 598 599 for i := 0; i < c.count; i++ { 600 key := roachpb.Key(fmt.Sprintf("key%02d", i)) 601 args := putArgs(key, []byte(fmt.Sprintf("%s%02d", strings.Repeat("v", c.valueSize), i))) 602 if _, err := kv.SendWrapped(ctx, store.TestSender(), &args); err != nil { 603 t.Fatal(err) 604 } 605 } 606 607 // Log truncation is an asynchronous process and while it will usually occur 608 // fairly quickly, there is a slight race between this check and the 609 // truncation, especially when under stress. 610 testutils.SucceedsSoon(t, func() error { 611 newFirstIndex, err := r.GetFirstIndex() 612 if err != nil { 613 t.Fatal(err) 614 } 615 if newFirstIndex <= oldFirstIndex { 616 return errors.Errorf("log was not correctly truncated, old first index:%d, current first index:%d", 617 oldFirstIndex, newFirstIndex) 618 } 619 return nil 620 }) 621 }) 622 } 623 } 624 625 func TestSnapshotLogTruncationConstraints(t *testing.T) { 626 defer leaktest.AfterTest(t)() 627 628 ctx := context.Background() 629 r := &Replica{} 630 var storeID roachpb.StoreID 631 id1, id2 := uuid.MakeV4(), uuid.MakeV4() 632 const ( 633 index1 = 50 634 index2 = 60 635 ) 636 637 // Add first constraint. 638 r.addSnapshotLogTruncationConstraintLocked(ctx, id1, index1, storeID) 639 exp1 := map[uuid.UUID]snapTruncationInfo{id1: {index: index1}} 640 641 // Make sure it registered. 642 assert.Equal(t, r.mu.snapshotLogTruncationConstraints, exp1) 643 644 // Add another constraint with the same id. Extremely unlikely in practice 645 // but we want to make sure it doesn't blow anything up. Collisions are 646 // handled by ignoring the colliding update. 647 r.addSnapshotLogTruncationConstraintLocked(ctx, id1, index2, storeID) 648 assert.Equal(t, r.mu.snapshotLogTruncationConstraints, exp1) 649 650 // Helper that grabs the min constraint index (which can trigger GC as a 651 // byproduct) and asserts. 652 assertMin := func(exp uint64, now time.Time) { 653 t.Helper() 654 const anyRecipientStore roachpb.StoreID = 0 655 if maxIndex := r.getAndGCSnapshotLogTruncationConstraintsLocked(now, anyRecipientStore); maxIndex != exp { 656 t.Fatalf("unexpected max index %d, wanted %d", maxIndex, exp) 657 } 658 } 659 660 // Queue should be told index1 is the highest pending one. Note that the 661 // colliding update at index2 is not represented. 662 assertMin(index1, time.Time{}) 663 664 // Add another, higher, index. We're not going to notice it's around 665 // until the lower one disappears. 666 r.addSnapshotLogTruncationConstraintLocked(ctx, id2, index2, storeID) 667 668 now := timeutil.Now() 669 // The colliding snapshot comes back. Or the original, we can't tell. 670 r.completeSnapshotLogTruncationConstraint(ctx, id1, now) 671 // The index should show up when its deadline isn't hit. 672 assertMin(index1, now) 673 assertMin(index1, now.Add(raftLogQueuePendingSnapshotGracePeriod)) 674 assertMin(index1, now.Add(raftLogQueuePendingSnapshotGracePeriod)) 675 // Once we're over deadline, the index returned so far disappears. 676 assertMin(index2, now.Add(raftLogQueuePendingSnapshotGracePeriod+1)) 677 assertMin(index2, time.Time{}) 678 assertMin(index2, now.Add(10*raftLogQueuePendingSnapshotGracePeriod)) 679 680 r.completeSnapshotLogTruncationConstraint(ctx, id2, now) 681 assertMin(index2, now) 682 assertMin(index2, now.Add(raftLogQueuePendingSnapshotGracePeriod)) 683 assertMin(0, now.Add(2*raftLogQueuePendingSnapshotGracePeriod)) 684 685 assert.Equal(t, r.mu.snapshotLogTruncationConstraints, map[uuid.UUID]snapTruncationInfo(nil)) 686 } 687 688 // TestTruncateLog verifies that the TruncateLog command removes a 689 // prefix of the raft logs (modifying FirstIndex() and making them 690 // inaccessible via Entries()). 691 func TestTruncateLog(t *testing.T) { 692 defer leaktest.AfterTest(t)() 693 tc := testContext{} 694 cfg := TestStoreConfig(nil) 695 cfg.TestingKnobs.DisableRaftLogQueue = true 696 stopper := stop.NewStopper() 697 defer stopper.Stop(context.Background()) 698 tc.StartWithStoreConfig(t, stopper, cfg) 699 700 // Populate the log with 10 entries. Save the LastIndex after each write. 701 var indexes []uint64 702 for i := 0; i < 10; i++ { 703 args := incrementArgs([]byte("a"), int64(i)) 704 705 if _, pErr := tc.SendWrapped(args); pErr != nil { 706 t.Fatal(pErr) 707 } 708 idx, err := tc.repl.GetLastIndex() 709 if err != nil { 710 t.Fatal(err) 711 } 712 indexes = append(indexes, idx) 713 } 714 715 rangeID := tc.repl.RangeID 716 717 // Discard the first half of the log. 718 truncateArgs := truncateLogArgs(indexes[5], rangeID) 719 if _, pErr := tc.SendWrappedWith(roachpb.Header{RangeID: 1}, &truncateArgs); pErr != nil { 720 t.Fatal(pErr) 721 } 722 723 // FirstIndex has changed. 724 firstIndex, err := tc.repl.GetFirstIndex() 725 if err != nil { 726 t.Fatal(err) 727 } 728 if firstIndex != indexes[5] { 729 t.Errorf("expected firstIndex == %d, got %d", indexes[5], firstIndex) 730 } 731 732 // We can still get what remains of the log. 733 tc.repl.mu.Lock() 734 entries, err := tc.repl.raftEntriesLocked(indexes[5], indexes[9], math.MaxUint64) 735 tc.repl.mu.Unlock() 736 if err != nil { 737 t.Fatal(err) 738 } 739 if len(entries) != int(indexes[9]-indexes[5]) { 740 t.Errorf("expected %d entries, got %d", indexes[9]-indexes[5], len(entries)) 741 } 742 743 // But any range that includes the truncated entries returns an error. 744 tc.repl.mu.Lock() 745 _, err = tc.repl.raftEntriesLocked(indexes[4], indexes[9], math.MaxUint64) 746 tc.repl.mu.Unlock() 747 if !errors.Is(err, raft.ErrCompacted) { 748 t.Errorf("expected ErrCompacted, got %s", err) 749 } 750 751 // The term of the last truncated entry is still available. 752 tc.repl.mu.Lock() 753 term, err := tc.repl.raftTermRLocked(indexes[4]) 754 tc.repl.mu.Unlock() 755 if err != nil { 756 t.Fatal(err) 757 } 758 if term == 0 { 759 t.Errorf("invalid term 0 for truncated entry") 760 } 761 762 // The terms of older entries are gone. 763 tc.repl.mu.Lock() 764 _, err = tc.repl.raftTermRLocked(indexes[3]) 765 tc.repl.mu.Unlock() 766 if !errors.Is(err, raft.ErrCompacted) { 767 t.Errorf("expected ErrCompacted, got %s", err) 768 } 769 770 // Truncating logs that have already been truncated should not return an 771 // error. 772 truncateArgs = truncateLogArgs(indexes[3], rangeID) 773 if _, pErr := tc.SendWrapped(&truncateArgs); pErr != nil { 774 t.Fatal(pErr) 775 } 776 777 // Truncating logs that have the wrong rangeID included should not return 778 // an error but should not truncate any logs. 779 truncateArgs = truncateLogArgs(indexes[9], rangeID+1) 780 if _, pErr := tc.SendWrapped(&truncateArgs); pErr != nil { 781 t.Fatal(pErr) 782 } 783 784 tc.repl.mu.Lock() 785 // The term of the last truncated entry is still available. 786 term, err = tc.repl.raftTermRLocked(indexes[4]) 787 tc.repl.mu.Unlock() 788 if err != nil { 789 t.Fatal(err) 790 } 791 if term == 0 { 792 t.Errorf("invalid term 0 for truncated entry") 793 } 794 } 795 796 func TestRaftLogQueueShouldQueueRecompute(t *testing.T) { 797 defer leaktest.AfterTest(t)() 798 799 ctx := context.Background() 800 var rlq *raftLogQueue 801 802 _ = ctx 803 _ = rlq 804 805 // NB: Cases for which decision.ShouldTruncate() is true are tested in 806 // TestComputeTruncateDecision, so here the decision itself is never 807 // positive. 808 var decision truncateDecision 809 decision.Input.LogSizeTrusted = true 810 decision.Input.LogSize = 12 811 decision.Input.MaxLogSize = 1000 812 813 verify := func(shouldQ bool, recompute bool, prio float64) { 814 t.Helper() 815 isQ, isR, isP := rlq.shouldQueueImpl(ctx, decision) 816 assert.Equal(t, shouldQ, isQ) 817 assert.Equal(t, recompute, isR) 818 assert.Equal(t, prio, isP) 819 } 820 821 verify(false, false, 0) 822 823 // Check all the boxes: unknown log size, leader, and non-empty log. 824 decision.Input.LogSize = 123 825 decision.Input.LogSizeTrusted = false 826 decision.Input.FirstIndex = 10 827 decision.Input.LastIndex = 20 828 829 verify(true, true, 1+float64(decision.Input.MaxLogSize)/2) 830 831 golden := decision 832 833 // Check all boxes except that log is empty. 834 decision = golden 835 decision.Input.LastIndex = decision.Input.FirstIndex 836 verify(false, false, 0) 837 } 838 839 // TestTruncateLogRecompute checks that if raftLogSize is not trusted, the raft 840 // log queue picks up the replica, recomputes the log size, and considers a 841 // truncation. 842 func TestTruncateLogRecompute(t *testing.T) { 843 defer leaktest.AfterTest(t)() 844 845 ctx := context.Background() 846 dir, cleanup := testutils.TempDir(t) 847 defer cleanup() 848 849 cache := storage.NewRocksDBCache(1 << 20) 850 defer cache.Release() 851 eng, err := storage.NewRocksDB(storage.RocksDBConfig{StorageConfig: base.StorageConfig{Dir: dir}}, cache) 852 if err != nil { 853 t.Fatal(err) 854 } 855 defer eng.Close() 856 857 tc := testContext{ 858 engine: eng, 859 } 860 cfg := TestStoreConfig(nil) 861 cfg.TestingKnobs.DisableRaftLogQueue = true 862 stopper := stop.NewStopper() 863 defer stopper.Stop(context.Background()) 864 tc.StartWithStoreConfig(t, stopper, cfg) 865 866 key := roachpb.Key("a") 867 repl := tc.store.LookupReplica(keys.MustAddr(key)) 868 869 trusted := func() bool { 870 repl.mu.Lock() 871 defer repl.mu.Unlock() 872 return repl.mu.raftLogSizeTrusted 873 } 874 875 put := func() { 876 var v roachpb.Value 877 v.SetBytes(bytes.Repeat([]byte("x"), RaftLogQueueStaleSize*5)) 878 put := roachpb.NewPut(key, v) 879 var ba roachpb.BatchRequest 880 ba.Add(put) 881 ba.RangeID = repl.RangeID 882 883 if _, pErr := tc.store.Send(ctx, ba); pErr != nil { 884 t.Fatal(pErr) 885 } 886 } 887 888 put() 889 890 decision, err := newTruncateDecision(ctx, repl) 891 assert.NoError(t, err) 892 assert.True(t, decision.ShouldTruncate()) 893 // Should never trust initially, until recomputed at least once. 894 assert.False(t, trusted()) 895 896 repl.mu.Lock() 897 repl.mu.raftLogSizeTrusted = false 898 repl.mu.raftLogSize += 12 // garbage 899 repl.mu.raftLogLastCheckSize += 12 // garbage 900 repl.mu.Unlock() 901 902 // Force a raft log queue run. The result should be a nonzero Raft log of 903 // size below the threshold (though we won't check that since it could have 904 // grown over threshold again; we compute instead that its size is correct). 905 tc.store.SetRaftLogQueueActive(true) 906 tc.store.MustForceRaftLogScanAndProcess() 907 908 for i := 0; i < 2; i++ { 909 verifyLogSizeInSync(t, repl) 910 assert.True(t, trusted()) 911 put() // make sure we remain trusted and in sync 912 } 913 }