github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_sideload_test.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 package kvserver 11 12 import ( 13 "bytes" 14 "context" 15 "fmt" 16 "io" 17 "math" 18 "math/rand" 19 "os" 20 "path/filepath" 21 "reflect" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 "testing" 27 28 "github.com/cockroachdb/cockroach/pkg/base" 29 "github.com/cockroachdb/cockroach/pkg/kv" 30 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 31 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 32 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftentry" 33 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader" 34 "github.com/cockroachdb/cockroach/pkg/roachpb" 35 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 36 "github.com/cockroachdb/cockroach/pkg/storage" 37 "github.com/cockroachdb/cockroach/pkg/testutils" 38 "github.com/cockroachdb/cockroach/pkg/util/hlc" 39 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 40 "github.com/cockroachdb/cockroach/pkg/util/log" 41 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 42 "github.com/cockroachdb/cockroach/pkg/util/stop" 43 "github.com/cockroachdb/cockroach/pkg/util/tracing" 44 "github.com/cockroachdb/errors" 45 "github.com/kr/pretty" 46 "go.etcd.io/etcd/raft/raftpb" 47 "golang.org/x/time/rate" 48 ) 49 50 func entryEq(l, r raftpb.Entry) error { 51 if reflect.DeepEqual(l, r) { 52 return nil 53 } 54 _, lData := DecodeRaftCommand(l.Data) 55 _, rData := DecodeRaftCommand(r.Data) 56 var lc, rc kvserverpb.RaftCommand 57 if err := protoutil.Unmarshal(lData, &lc); err != nil { 58 return errors.Wrap(err, "unmarshalling LHS") 59 } 60 if err := protoutil.Unmarshal(rData, &rc); err != nil { 61 return errors.Wrap(err, "unmarshalling RHS") 62 } 63 if !reflect.DeepEqual(lc, rc) { 64 return errors.Newf("unexpected:\n%s", strings.Join(pretty.Diff(lc, rc), "\n")) 65 } 66 return nil 67 } 68 69 func mkEnt( 70 v raftCommandEncodingVersion, index, term uint64, as *kvserverpb.ReplicatedEvalResult_AddSSTable, 71 ) raftpb.Entry { 72 cmdIDKey := strings.Repeat("x", raftCommandIDLen) 73 var cmd kvserverpb.RaftCommand 74 cmd.ReplicatedEvalResult.AddSSTable = as 75 b, err := protoutil.Marshal(&cmd) 76 if err != nil { 77 panic(err) 78 } 79 var ent raftpb.Entry 80 ent.Index, ent.Term = index, term 81 ent.Data = encodeRaftCommand(v, kvserverbase.CmdIDKey(cmdIDKey), b) 82 return ent 83 } 84 85 func TestSideloadingSideloadedStorage(t *testing.T) { 86 defer leaktest.AfterTest(t)() 87 t.Run("Mem", func(t *testing.T) { 88 testSideloadingSideloadedStorage(t, newInMemSideloadStorage) 89 }) 90 t.Run("Disk", func(t *testing.T) { 91 maker := func( 92 s *cluster.Settings, rangeID roachpb.RangeID, rep roachpb.ReplicaID, name string, eng storage.Engine, 93 ) (SideloadStorage, error) { 94 return newDiskSideloadStorage(s, rangeID, rep, name, rate.NewLimiter(rate.Inf, math.MaxInt64), eng) 95 } 96 testSideloadingSideloadedStorage(t, maker) 97 }) 98 } 99 100 func testSideloadingSideloadedStorage( 101 t *testing.T, 102 maker func(*cluster.Settings, roachpb.RangeID, roachpb.ReplicaID, string, storage.Engine) (SideloadStorage, error), 103 ) { 104 dir, cleanup := testutils.TempDir(t) 105 defer cleanup() 106 107 ctx := context.Background() 108 st := cluster.MakeTestingClusterSettings() 109 110 cleanup, eng := newEngine(t) 111 defer cleanup() 112 defer eng.Close() 113 114 ss, err := maker(st, 1, 2, dir, eng) 115 if err != nil { 116 t.Fatal(err) 117 } 118 _, isInMem := ss.(*inMemSideloadStorage) // some things don't make sense for inMem 119 120 assertCreated := func(isCreated bool) { 121 if isInMem { 122 return 123 } 124 if is := ss.(*diskSideloadStorage).dirCreated; is != isCreated { 125 t.Fatalf("assertion failed: expected dirCreated=%t, got %t", isCreated, is) 126 } 127 } 128 129 assertCreated(false) 130 131 const ( 132 lowTerm = 1 133 highTerm 134 ) 135 136 file := func(i uint64) []byte { // take uint64 for convenience 137 return []byte("content-" + strconv.Itoa(int(i))) 138 } 139 140 if err := ss.Put(ctx, 1, highTerm, file(1)); err != nil { 141 t.Fatal(err) 142 } 143 144 assertCreated(true) 145 146 if c, err := ss.Get(ctx, 1, highTerm); err != nil { 147 t.Fatal(err) 148 } else if exp := file(1); !bytes.Equal(c, exp) { 149 t.Fatalf("got %q, wanted %q", c, exp) 150 } 151 152 // Overwrites the occupied slot. 153 if err := ss.Put(ctx, 1, highTerm, file(12345)); err != nil { 154 t.Fatal(err) 155 } 156 157 // ... consequently the old entry is gone. 158 if c, err := ss.Get(ctx, 1, highTerm); err != nil { 159 t.Fatal(err) 160 } else if exp := file(12345); !bytes.Equal(c, exp) { 161 t.Fatalf("got %q, wanted %q", c, exp) 162 } 163 164 if err := ss.Clear(ctx); err != nil { 165 t.Fatal(err) 166 } 167 168 assertCreated(false) 169 170 for n, test := range []struct { 171 fun func() error 172 err error 173 }{ 174 { 175 err: errSideloadedFileNotFound, 176 fun: func() error { 177 _, err = ss.Get(ctx, 123, 456) 178 return err 179 }, 180 }, 181 { 182 err: errSideloadedFileNotFound, 183 fun: func() error { 184 _, err := ss.Purge(ctx, 123, 456) 185 return err 186 }, 187 }, 188 { 189 err: nil, 190 fun: func() error { 191 _, _, err := ss.TruncateTo(ctx, 123) 192 return err 193 }, 194 }, 195 { 196 err: nil, 197 fun: func() error { 198 _, err = ss.Filename(ctx, 123, 456) 199 return err 200 }, 201 }, 202 } { 203 if err := test.fun(); !errors.Is(err, test.err) { 204 t.Fatalf("%d: expected %v, got %v", n, test.err, err) 205 } 206 if err := ss.Clear(ctx); err != nil { 207 t.Fatalf("%d: %+v", n, err) 208 } 209 assertCreated(false) 210 } 211 212 // Write some payloads at various indexes. Note that this tests Put 213 // on a recently Clear()ed storage. Randomize order for fun. 214 payloads := []uint64{3, 5, 7, 9, 10} 215 for n := range rand.Perm(len(payloads)) { 216 i := payloads[n] 217 if err := ss.Put(ctx, i, highTerm, file(i*highTerm)); err != nil { 218 t.Fatalf("%d: %+v", i, err) 219 } 220 } 221 222 assertCreated(true) 223 224 // Write some more payloads, overlapping, at the past term. 225 pastPayloads := append([]uint64{81}, payloads...) 226 for _, i := range pastPayloads { 227 if err := ss.Put(ctx, i, lowTerm, file(i*lowTerm)); err != nil { 228 t.Fatal(err) 229 } 230 } 231 232 // Just for fun, recreate the original storage (unless it's the in-memory 233 // one), which shouldn't change anything about its state. 234 if !isInMem { 235 var err error 236 ss, err = maker(st, 1, 2, dir, eng) 237 if err != nil { 238 t.Fatal(err) 239 } 240 assertCreated(false) 241 } 242 243 // Just a sanity check that for the overlapping terms, we see both entries. 244 for _, term := range []uint64{lowTerm, highTerm} { 245 index := payloads[0] // exists at both lowTerm and highTerm 246 if c, err := ss.Get(ctx, index, term); err != nil { 247 t.Fatal(err) 248 } else if exp := file(term * index); !bytes.Equal(c, exp) { 249 t.Fatalf("got %q, wanted %q", c, exp) 250 } 251 } 252 assertCreated(false) // Get() doesn't recreated nor check 253 254 for n := range payloads { 255 // Truncate indexes <= payloads[n] (payloads is sorted in increasing order). 256 if _, _, err := ss.TruncateTo(ctx, payloads[n]); err != nil { 257 t.Fatalf("%d: %+v", n, err) 258 } 259 // Index payloads[n] and above are still there (truncation is exclusive) 260 // at both terms. 261 for _, term := range []uint64{lowTerm, highTerm} { 262 for _, i := range payloads[n:] { 263 if _, err := ss.Get(ctx, i, term); err != nil { 264 t.Fatalf("%d.%d: %+v", n, i, err) 265 } 266 } 267 // Indexes below are gone. 268 for _, i := range payloads[:n] { 269 if _, err := ss.Get(ctx, i, term); !errors.Is(err, errSideloadedFileNotFound) { 270 t.Fatalf("%d.%d: %+v", n, i, err) 271 } 272 } 273 } 274 } 275 276 func() { 277 if isInMem { 278 return 279 } 280 // First add a file that shouldn't be in the sideloaded storage to ensure 281 // sane behavior when directory can't be removed after full truncate. 282 nonRemovableFile := filepath.Join(ss.(*diskSideloadStorage).dir, "cantremove.xx") 283 f, err := os.Create(nonRemovableFile) 284 if err != nil { 285 t.Fatalf("could not create non i*.t* file in sideloaded storage: %+v", err) 286 } 287 defer f.Close() 288 289 _, _, err = ss.TruncateTo(ctx, math.MaxUint64) 290 if err == nil { 291 t.Fatalf("sideloaded directory should not have been removable due to extra file %s", nonRemovableFile) 292 } 293 expectedTruncateError := "while purging %q: remove %s: directory not empty" 294 if err.Error() != fmt.Sprintf(expectedTruncateError, ss.(*diskSideloadStorage).dir, ss.(*diskSideloadStorage).dir) { 295 t.Fatalf("error truncating sideloaded storage: %+v", err) 296 } 297 // Now remove extra file and let truncation proceed to remove directory. 298 err = os.Remove(nonRemovableFile) 299 if err != nil { 300 t.Fatalf("could not remove %s: %+v", nonRemovableFile, err) 301 } 302 303 // Test that directory is removed when filepath.Glob returns 0 matches. 304 if _, _, err := ss.TruncateTo(ctx, math.MaxUint64); err != nil { 305 t.Fatal(err) 306 } 307 // Ensure directory is removed, now that all files should be gone. 308 _, err = os.Stat(ss.(*diskSideloadStorage).dir) 309 if err == nil { 310 t.Fatalf("expected %q to be removed after truncating full range", ss.(*diskSideloadStorage).dir) 311 } 312 if err != nil { 313 if !os.IsNotExist(err) { 314 t.Fatalf("expected %q to be removed: %+v", ss.(*diskSideloadStorage).dir, err) 315 } 316 } 317 318 // Repopulate with some random indexes to test deletion when there are a 319 // non-zero number of filepath.Glob matches. 320 payloads := []uint64{3, 5, 7, 9, 10} 321 for n := range rand.Perm(len(payloads)) { 322 i := payloads[n] 323 if err := ss.Put(ctx, i, highTerm, file(i*highTerm)); err != nil { 324 t.Fatalf("%d: %+v", i, err) 325 } 326 } 327 assertCreated(true) 328 if _, _, err := ss.TruncateTo(ctx, math.MaxUint64); err != nil { 329 t.Fatal(err) 330 } 331 // Ensure directory is removed when all records are removed. 332 _, err = os.Stat(ss.(*diskSideloadStorage).dir) 333 if err == nil { 334 t.Fatalf("expected %q to be removed after truncating full range", ss.(*diskSideloadStorage).dir) 335 } 336 if err != nil { 337 if !os.IsNotExist(err) { 338 t.Fatalf("expected %q to be removed: %+v", ss.(*diskSideloadStorage).dir, err) 339 } 340 } 341 }() 342 343 if err := ss.Clear(ctx); err != nil { 344 t.Fatal(err) 345 } 346 347 assertCreated(false) 348 349 // Sanity check that we can call TruncateTo without the directory existing. 350 if _, _, err := ss.TruncateTo(ctx, 1); err != nil { 351 t.Fatal(err) 352 } 353 354 assertCreated(false) 355 356 // Repopulate with a few entries at indexes=1,2,4 and term 10 to test `maybePurgeSideloaded` 357 // with. 358 for index := uint64(1); index < 5; index++ { 359 if index == 3 { 360 continue 361 } 362 payload := []byte(strings.Repeat("x", 1+int(index))) 363 if err := ss.Put(ctx, index, 10, payload); err != nil { 364 t.Fatalf("%d: %+v", index, err) 365 } 366 } 367 368 // Term too high and too low, respectively. Shouldn't delete anything. 369 for _, term := range []uint64{9, 11} { 370 if size, err := maybePurgeSideloaded(ctx, ss, 1, 10, term); err != nil || size != 0 { 371 t.Fatalf("expected noop for term %d, got (%d, %v)", term, size, err) 372 } 373 } 374 // This should delete 2 and 4. Index == size+1, so expect 6. 375 if size, err := maybePurgeSideloaded(ctx, ss, 2, 4, 10); err != nil || size != 8 { 376 t.Fatalf("unexpectedly got (%d, %v)", size, err) 377 } 378 // This should delete 1 (the lone survivor). 379 if size, err := maybePurgeSideloaded(ctx, ss, 0, 100, 10); err != nil || size != 2 { 380 t.Fatalf("unexpectedly got (%d, %v)", size, err) 381 } 382 // Nothing left. 383 if size, err := maybePurgeSideloaded(ctx, ss, 0, 100, 10); err != nil || size != 0 { 384 t.Fatalf("expected noop, got (%d, %v)", size, err) 385 } 386 } 387 388 func TestRaftSSTableSideloadingInline(t *testing.T) { 389 defer leaktest.AfterTest(t)() 390 391 v1, v2 := raftVersionStandard, raftVersionSideloaded 392 rangeID := roachpb.RangeID(1) 393 394 type testCase struct { 395 // Entry passed into maybeInlineSideloadedRaftCommand and the entry 396 // after having (perhaps) been modified. 397 thin, fat raftpb.Entry 398 // Populate the raft entry cache and sideload storage before running the test. 399 setup func(*raftentry.Cache, SideloadStorage) 400 // If nonempty, the error expected from maybeInlineSideloadedRaftCommand. 401 expErr string 402 // If nonempty, a regex that the recorded trace span must match. 403 expTrace string 404 } 405 406 sstFat := kvserverpb.ReplicatedEvalResult_AddSSTable{ 407 Data: []byte("foo"), 408 CRC32: 0, // not checked 409 } 410 sstThin := kvserverpb.ReplicatedEvalResult_AddSSTable{ 411 CRC32: 0, // not checked 412 } 413 414 putOnDisk := func(ec *raftentry.Cache, ss SideloadStorage) { 415 if err := ss.Put(context.Background(), 5, 6, sstFat.Data); err != nil { 416 t.Fatal(err) 417 } 418 } 419 420 testCases := map[string]testCase{ 421 // Plain old v1 Raft command without payload. Don't touch. 422 "v1-no-payload": {thin: mkEnt(v1, 5, 6, &sstThin), fat: mkEnt(v1, 5, 6, &sstThin)}, 423 // With payload, but command is v1. Don't touch. Note that the 424 // first of the two shouldn't happen in practice or we have a 425 // huge problem once we try to apply this entry. 426 "v1-slim-with-payload": {thin: mkEnt(v1, 5, 6, &sstThin), fat: mkEnt(v1, 5, 6, &sstThin)}, 427 "v1-with-payload": {thin: mkEnt(v1, 5, 6, &sstFat), fat: mkEnt(v1, 5, 6, &sstFat)}, 428 // v2 with payload, but payload is AWOL. This would be fatal in practice. 429 "v2-with-payload-missing-file": { 430 thin: mkEnt(v2, 5, 6, &sstThin), fat: mkEnt(v2, 5, 6, &sstThin), 431 expErr: "not found", 432 }, 433 // v2 with payload that's actually there. The request we'll see in 434 // practice. 435 "v2-with-payload-with-file-no-cache": { 436 thin: mkEnt(v2, 5, 6, &sstThin), fat: mkEnt(v2, 5, 6, &sstFat), 437 setup: putOnDisk, expTrace: "inlined entry not cached", 438 }, 439 "v2-with-payload-with-file-with-cache": { 440 thin: mkEnt(v2, 5, 6, &sstThin), fat: mkEnt(v2, 5, 6, &sstFat), 441 setup: func(ec *raftentry.Cache, ss SideloadStorage) { 442 putOnDisk(ec, ss) 443 ec.Add(rangeID, []raftpb.Entry{mkEnt(v2, 5, 6, &sstFat)}, true) 444 }, expTrace: "using cache hit", 445 }, 446 "v2-fat-without-file": { 447 thin: mkEnt(v2, 5, 6, &sstFat), fat: mkEnt(v2, 5, 6, &sstFat), 448 setup: func(ec *raftentry.Cache, ss SideloadStorage) {}, 449 expTrace: "already inlined", 450 }, 451 } 452 453 runOne := func(k string, test testCase) { 454 ctx, collect, cancel := tracing.ContextWithRecordingSpan(context.Background(), "test-recording") 455 defer cancel() 456 457 ec := raftentry.NewCache(1024) // large enough 458 ss := mustNewInMemSideloadStorage(rangeID, roachpb.ReplicaID(1), ".") 459 if test.setup != nil { 460 test.setup(ec, ss) 461 } 462 463 thinCopy := *(protoutil.Clone(&test.thin).(*raftpb.Entry)) 464 newEnt, err := maybeInlineSideloadedRaftCommand(ctx, rangeID, thinCopy, ss, ec) 465 if err != nil { 466 if test.expErr == "" || !testutils.IsError(err, test.expErr) { 467 t.Fatalf("%s: %+v", k, err) 468 } 469 } else if test.expErr != "" { 470 t.Fatalf("%s: success, but expected error: %s", k, test.expErr) 471 } else if err := entryEq(thinCopy, test.thin); err != nil { 472 t.Fatalf("%s: mutated the original entry: %s", k, pretty.Diff(thinCopy, test.thin)) 473 } 474 475 if newEnt == nil { 476 newEnt = &thinCopy 477 } 478 if err := entryEq(*newEnt, test.fat); err != nil { 479 t.Fatalf("%s: %+v", k, err) 480 } 481 482 if dump := collect().String(); test.expTrace != "" { 483 if ok, err := regexp.MatchString(test.expTrace, dump); err != nil { 484 t.Fatalf("%s: %+v", k, err) 485 } else if !ok { 486 t.Fatalf("%s: expected trace matching:\n%s\n\nbut got\n%s", k, test.expTrace, dump) 487 } 488 } 489 } 490 491 keys := make([]string, 0, len(testCases)) 492 for k := range testCases { 493 keys = append(keys, k) 494 } 495 sort.Strings(keys) 496 for _, k := range keys { 497 runOne(k, testCases[k]) 498 } 499 } 500 501 func TestRaftSSTableSideloadingSideload(t *testing.T) { 502 defer leaktest.AfterTest(t)() 503 504 addSST := kvserverpb.ReplicatedEvalResult_AddSSTable{ 505 Data: []byte("foo"), CRC32: 0, // not checked 506 } 507 508 addSSTStripped := addSST 509 addSSTStripped.Data = nil 510 511 entV1Reg := mkEnt(raftVersionStandard, 10, 99, nil) 512 entV1SST := mkEnt(raftVersionStandard, 11, 99, &addSST) 513 entV2Reg := mkEnt(raftVersionSideloaded, 12, 99, nil) 514 entV2SST := mkEnt(raftVersionSideloaded, 13, 99, &addSST) 515 entV2SSTStripped := mkEnt(raftVersionSideloaded, 13, 99, &addSSTStripped) 516 517 type tc struct { 518 name string 519 preEnts, postEnts []raftpb.Entry 520 ss []string 521 size int64 522 } 523 524 // Intentionally ignore the fact that real calls would always have an 525 // unbroken run of `entry.Index`. 526 testCases := []tc{ 527 { 528 name: "empty", 529 preEnts: nil, 530 postEnts: nil, 531 ss: nil, 532 size: 0, 533 }, 534 { 535 name: "v1", 536 preEnts: []raftpb.Entry{entV1Reg, entV1SST}, 537 postEnts: []raftpb.Entry{entV1Reg, entV1SST}, 538 size: 0, 539 }, 540 { 541 name: "v2", 542 preEnts: []raftpb.Entry{entV2SST, entV2Reg}, 543 postEnts: []raftpb.Entry{entV2SSTStripped, entV2Reg}, 544 ss: []string{"i13t99"}, 545 size: int64(len(addSST.Data)), 546 }, 547 { 548 name: "mixed", 549 preEnts: []raftpb.Entry{entV1Reg, entV1SST, entV2Reg, entV2SST}, 550 postEnts: []raftpb.Entry{entV1Reg, entV1SST, entV2Reg, entV2SSTStripped}, 551 ss: []string{"i13t99"}, 552 size: int64(len(addSST.Data)), 553 }, 554 } 555 556 for _, test := range testCases { 557 t.Run(test.name, func(t *testing.T) { 558 ctx := context.Background() 559 sideloaded := mustNewInMemSideloadStorage(roachpb.RangeID(3), roachpb.ReplicaID(17), ".") 560 postEnts, size, err := maybeSideloadEntriesImpl(ctx, test.preEnts, sideloaded) 561 if err != nil { 562 t.Fatal(err) 563 } 564 if len(addSST.Data) == 0 { 565 t.Fatal("invocation mutated original AddSSTable struct in memory") 566 } 567 if !reflect.DeepEqual(postEnts, test.postEnts) { 568 t.Fatalf("result differs from expected: %s", pretty.Diff(postEnts, test.postEnts)) 569 } 570 if test.size != size { 571 t.Fatalf("expected %d sideloadedSize, but found %d", test.size, size) 572 } 573 var actKeys []string 574 for k := range sideloaded.(*inMemSideloadStorage).m { 575 actKeys = append(actKeys, fmt.Sprintf("i%dt%d", k.index, k.term)) 576 } 577 sort.Strings(actKeys) 578 if !reflect.DeepEqual(actKeys, test.ss) { 579 t.Fatalf("expected %v, got %v", test.ss, actKeys) 580 } 581 }) 582 } 583 } 584 585 func makeInMemSideloaded(repl *Replica) { 586 repl.raftMu.Lock() 587 repl.raftMu.sideloaded = mustNewInMemSideloadStorage(repl.RangeID, 0, repl.store.engine.GetAuxiliaryDir()) 588 repl.raftMu.Unlock() 589 } 590 591 // TestRaftSSTableSideloadingProposal runs a straightforward application of an `AddSSTable` command. 592 func TestRaftSSTableSideloadingProposal(t *testing.T) { 593 defer leaktest.AfterTest(t)() 594 595 testutils.RunTrueAndFalse(t, "engineInMem", func(t *testing.T, engineInMem bool) { 596 testutils.RunTrueAndFalse(t, "mockSideloaded", func(t *testing.T, mockSideloaded bool) { 597 if engineInMem && !mockSideloaded { 598 t.Skip("https://github.com/cockroachdb/cockroach/issues/31913") 599 } 600 testRaftSSTableSideloadingProposal(t, engineInMem, mockSideloaded) 601 }) 602 }) 603 } 604 605 // TestRaftSSTableSideloadingProposal runs a straightforward application of an `AddSSTable` command. 606 func testRaftSSTableSideloadingProposal(t *testing.T, engineInMem, mockSideloaded bool) { 607 defer leaktest.AfterTest(t)() 608 defer SetMockAddSSTable()() 609 610 dir, cleanup := testutils.TempDir(t) 611 defer cleanup() 612 stopper := stop.NewStopper() 613 tc := testContext{} 614 if !engineInMem { 615 cfg := storage.RocksDBConfig{ 616 StorageConfig: base.StorageConfig{ 617 Dir: dir, 618 Settings: cluster.MakeTestingClusterSettings(), 619 }, 620 } 621 var err error 622 cache := storage.NewRocksDBCache(1 << 20) 623 defer cache.Release() 624 tc.engine, err = storage.NewRocksDB(cfg, cache) 625 if err != nil { 626 t.Fatal(err) 627 } 628 stopper.AddCloser(tc.engine) 629 } 630 defer stopper.Stop(context.Background()) 631 tc.Start(t, stopper) 632 633 ctx, collect, cancel := tracing.ContextWithRecordingSpan(context.Background(), "test-recording") 634 defer cancel() 635 636 const ( 637 key = "foo" 638 entrySize = 128 639 ) 640 val := strings.Repeat("x", entrySize) 641 642 if mockSideloaded { 643 makeInMemSideloaded(tc.repl) 644 } 645 646 ts := hlc.Timestamp{Logical: 1} 647 648 if err := ProposeAddSSTable(ctx, key, val, ts, tc.store); err != nil { 649 t.Fatal(err) 650 } 651 652 { 653 var ba roachpb.BatchRequest 654 get := getArgs(roachpb.Key(key)) 655 ba.Add(&get) 656 ba.Header.RangeID = tc.repl.RangeID 657 658 br, pErr := tc.store.Send(ctx, ba) 659 if pErr != nil { 660 t.Fatal(pErr) 661 } 662 v := br.Responses[0].GetInner().(*roachpb.GetResponse).Value 663 if v == nil { 664 t.Fatal("expected to read a value") 665 } 666 if valBytes, err := v.GetBytes(); err != nil { 667 t.Fatal(err) 668 } else if !bytes.Equal(valBytes, []byte(val)) { 669 t.Fatalf("expected to read '%s', but found '%s'", val, valBytes) 670 } 671 } 672 673 func() { 674 tc.repl.raftMu.Lock() 675 defer tc.repl.raftMu.Unlock() 676 if ss, ok := tc.repl.raftMu.sideloaded.(*inMemSideloadStorage); ok && len(ss.m) < 1 { 677 t.Fatal("sideloaded storage is empty") 678 } 679 680 if err := testutils.MatchInOrder( 681 collect().String(), "sideloadable proposal detected", "ingested SSTable", 682 ); err != nil { 683 t.Fatal(err) 684 } 685 686 if n := tc.store.metrics.AddSSTableProposals.Count(); n == 0 { 687 t.Fatalf("expected metric to show at least one AddSSTable proposal, but got %d", n) 688 } 689 690 if n := tc.store.metrics.AddSSTableApplications.Count(); n == 0 { 691 t.Fatalf("expected metric to show at least one AddSSTable application, but got %d", n) 692 } 693 // We usually don't see copies because we hardlink and ingest the original SST. However, this 694 // depends on luck and the file system, so don't try to assert it. We should, however, see 695 // no more than one. 696 expMaxCopies := int64(1) 697 if engineInMem { 698 // We don't count in-memory env SST writes as copies. 699 expMaxCopies = 0 700 } 701 if n := tc.store.metrics.AddSSTableApplicationCopies.Count(); n > expMaxCopies { 702 t.Fatalf("expected metric to show <= %d AddSSTable copies, but got %d", expMaxCopies, n) 703 } 704 }() 705 706 // Force a log truncation followed by verification of the tracked raft log size. This exercises a 707 // former bug in which the raft log size took the sideloaded payload into account when adding 708 // to the log, but not when truncating. 709 710 // Write enough keys to the range to make sure that a truncation will happen. 711 for i := 0; i < RaftLogQueueStaleThreshold+1; i++ { 712 key := roachpb.Key(fmt.Sprintf("key%02d", i)) 713 args := putArgs(key, []byte(fmt.Sprintf("value%02d", i))) 714 if _, err := kv.SendWrapped(context.Background(), tc.store.TestSender(), &args); err != nil { 715 t.Fatal(err) 716 } 717 } 718 719 if _, err := tc.store.raftLogQueue.testingAdd(ctx, tc.repl, 99.99 /* priority */); err != nil { 720 t.Fatal(err) 721 } 722 tc.store.MustForceRaftLogScanAndProcess() 723 // SST is definitely truncated now, so recomputing the Raft log keys should match up with 724 // the tracked size. 725 verifyLogSizeInSync(t, tc.repl) 726 } 727 728 type mockSender struct { 729 logEntries [][]byte 730 done bool 731 } 732 733 func (mr *mockSender) Send(req *SnapshotRequest) error { 734 if req.LogEntries != nil { 735 if mr.logEntries != nil { 736 return errors.New("already have log entries") 737 } 738 mr.logEntries = req.LogEntries 739 } 740 return nil 741 } 742 743 func (mr *mockSender) Recv() (*SnapshotResponse, error) { 744 if mr.done { 745 return nil, io.EOF 746 } 747 status := SnapshotResponse_ACCEPTED 748 if len(mr.logEntries) > 0 { 749 status = SnapshotResponse_APPLIED 750 mr.done = true 751 } 752 return &SnapshotResponse{Status: status}, nil 753 } 754 755 func newEngine(t *testing.T) (func(), storage.Engine) { 756 dir, cleanup := testutils.TempDir(t) 757 eng, err := storage.NewDefaultEngine( 758 1<<20, 759 base.StorageConfig{ 760 Dir: dir, 761 MustExist: false, 762 }) 763 if err != nil { 764 t.Fatal(err) 765 } 766 return cleanup, eng 767 } 768 769 // This test verifies that when a snapshot is sent, sideloaded proposals are 770 // inlined. 771 func TestRaftSSTableSideloadingSnapshot(t *testing.T) { 772 defer leaktest.AfterTest(t)() 773 defer SetMockAddSSTable()() 774 775 ctx := context.Background() 776 tc := testContext{} 777 778 cleanup, eng := newEngine(t) 779 tc.engine = eng 780 defer cleanup() 781 defer eng.Close() 782 783 stopper := stop.NewStopper() 784 defer stopper.Stop(ctx) 785 tc.Start(t, stopper) 786 787 var ba roachpb.BatchRequest 788 ba.RangeID = tc.repl.RangeID 789 790 // Disable log truncation as we want to be sure that we get to create 791 // snapshots that have our sideloaded proposal in them. 792 tc.store.SetRaftLogQueueActive(false) 793 794 // Put a sideloaded proposal on the Range. 795 key, val := "don't", "care" 796 origSSTData, _ := MakeSSTable(key, val, hlc.Timestamp{}.Add(0, 1)) 797 { 798 799 var addReq roachpb.AddSSTableRequest 800 addReq.Data = origSSTData 801 addReq.Key = roachpb.Key(key) 802 addReq.EndKey = addReq.Key.Next() 803 ba.Add(&addReq) 804 805 _, pErr := tc.store.Send(ctx, ba) 806 if pErr != nil { 807 t.Fatal(pErr) 808 } 809 } 810 811 // Run a happy case snapshot. Check that it properly inlines the payload in 812 // the contained log entries. 813 inlinedEntry := func() raftpb.Entry { 814 os, err := tc.repl.GetSnapshot(ctx, SnapshotRequest_RAFT, tc.store.StoreID()) 815 if err != nil { 816 t.Fatal(err) 817 } 818 defer os.Close() 819 820 mockSender := &mockSender{} 821 if err := sendSnapshot( 822 ctx, 823 &tc.store.cfg.RaftConfig, 824 tc.store.cfg.Settings, 825 mockSender, 826 &fakeStorePool{}, 827 SnapshotRequest_Header{State: os.State, Priority: SnapshotRequest_RECOVERY}, 828 os, 829 tc.repl.store.Engine().NewBatch, 830 func() {}, 831 ); err != nil { 832 t.Fatal(err) 833 } 834 835 var ent raftpb.Entry 836 var cmd kvserverpb.RaftCommand 837 var finalEnt raftpb.Entry 838 for _, entryBytes := range mockSender.logEntries { 839 if err := protoutil.Unmarshal(entryBytes, &ent); err != nil { 840 t.Fatal(err) 841 } 842 if sniffSideloadedRaftCommand(ent.Data) { 843 _, cmdBytes := DecodeRaftCommand(ent.Data) 844 if err := protoutil.Unmarshal(cmdBytes, &cmd); err != nil { 845 t.Fatal(err) 846 } 847 if as := cmd.ReplicatedEvalResult.AddSSTable; as == nil { 848 t.Fatalf("no AddSSTable found in sideloaded command %+v", cmd) 849 } else if len(as.Data) == 0 { 850 t.Fatalf("empty payload in sideloaded command: %+v", cmd) 851 } 852 finalEnt = ent 853 } 854 } 855 if finalEnt.Index == 0 { 856 t.Fatal("no sideloaded command found") 857 } 858 return finalEnt 859 }() 860 861 sideloadedIndex := inlinedEntry.Index 862 863 // This happens to be a good point in time to check the `entries()` method 864 // which has special handling to accommodate `term()`: when an empty 865 // sideload storage is passed in, `entries()` should not inline, and in turn 866 // also not populate the entries cache (since its contents must always be 867 // fully inlined). 868 func() { 869 tc.repl.raftMu.Lock() 870 defer tc.repl.raftMu.Unlock() 871 tc.repl.mu.Lock() 872 defer tc.repl.mu.Unlock() 873 for _, withSS := range []bool{false, true} { 874 tc.store.raftEntryCache.Clear(tc.repl.RangeID, sideloadedIndex+1) 875 876 var ss SideloadStorage 877 if withSS { 878 ss = tc.repl.raftMu.sideloaded 879 } 880 rsl := stateloader.Make(tc.repl.RangeID) 881 entries, err := entries( 882 ctx, rsl, tc.store.Engine(), tc.repl.RangeID, tc.store.raftEntryCache, 883 ss, sideloadedIndex, sideloadedIndex+1, 1<<20, 884 ) 885 if err != nil { 886 t.Fatal(err) 887 } 888 if len(entries) != 1 { 889 t.Fatalf("no or too many entries returned from cache: %+v", entries) 890 } 891 ents, _, _, _ := tc.store.raftEntryCache.Scan(nil, tc.repl.RangeID, sideloadedIndex, sideloadedIndex+1, 1<<20) 892 if withSS { 893 // We passed the sideload storage, so we expect to get our 894 // inlined index back from the cache. 895 if len(ents) != 1 { 896 t.Fatalf("no or too many entries returned from cache: %+v", ents) 897 } 898 if err := entryEq(inlinedEntry, ents[0]); err != nil { 899 t.Fatalf("withSS=%t: %+v", withSS, err) 900 } 901 } else { 902 // Without sideload storage, expect the cache to remain 903 // unpopulated and the entry returned from entries() to not have 904 // been inlined. 905 if len(ents) != 0 { 906 t.Fatalf("expected no cached entries, but got %+v", ents) 907 } 908 if expErr, err := `ReplicatedEvalResult.AddSSTable.Data: \[\]uint8\[\d+\] != \[\]uint8\[0\]`, 909 entryEq(inlinedEntry, entries[0]); !testutils.IsError( 910 err, 911 expErr, 912 ) { 913 t.Fatalf("expected specific mismatch on `Data` field, but got %v\nwanted: %s", err, expErr) 914 } 915 } 916 } 917 }() 918 919 // Now run a snapshot that will fail since it doesn't find one of its on-disk 920 // payloads. This can happen if the Raft log queue runs between the time the 921 // (engine) snapshot is taken and the log entries are actually read from the 922 // (engine) snapshot. We didn't run this before because we wanted the file 923 // to stay in sideloaded storage for the previous test. 924 func() { 925 failingOS, err := tc.repl.GetSnapshot(ctx, SnapshotRequest_RAFT, tc.store.StoreID()) 926 if err != nil { 927 t.Fatal(err) 928 } 929 defer failingOS.Close() 930 931 // Remove the actual file. 932 tc.repl.raftMu.Lock() 933 if err := tc.repl.raftMu.sideloaded.Clear(ctx); err != nil { 934 tc.repl.raftMu.Unlock() 935 t.Fatal(err) 936 } 937 tc.repl.raftMu.Unlock() 938 // Additionally we need to clear out the entry from the cache because 939 // that would still save the day. 940 tc.store.raftEntryCache.Clear(tc.repl.RangeID, sideloadedIndex+1) 941 942 mockSender := &mockSender{} 943 err = sendSnapshot( 944 ctx, 945 &tc.store.cfg.RaftConfig, 946 tc.store.cfg.Settings, 947 mockSender, 948 &fakeStorePool{}, 949 SnapshotRequest_Header{State: failingOS.State, Priority: SnapshotRequest_RECOVERY}, 950 failingOS, 951 tc.repl.store.Engine().NewBatch, 952 func() {}, 953 ) 954 if !errors.HasType(err, (*errMustRetrySnapshotDueToTruncation)(nil)) { 955 t.Fatal(err) 956 } 957 }() 958 } 959 960 func TestRaftSSTableSideloadingTruncation(t *testing.T) { 961 defer leaktest.AfterTest(t)() 962 defer SetMockAddSSTable()() 963 964 tc := testContext{} 965 stopper := stop.NewStopper() 966 defer stopper.Stop(context.Background()) 967 tc.Start(t, stopper) 968 makeInMemSideloaded(tc.repl) 969 ctx := context.Background() 970 971 const count = 10 972 973 var indexes []uint64 974 addLastIndex := func() { 975 lastIndex, err := tc.repl.GetLastIndex() 976 if err != nil { 977 t.Fatal(err) 978 } 979 indexes = append(indexes, lastIndex) 980 } 981 for i := 0; i < count; i++ { 982 addLastIndex() 983 key := fmt.Sprintf("key-%d", i) 984 val := fmt.Sprintf("val-%d", i) 985 if err := ProposeAddSSTable(ctx, key, val, tc.Clock().Now(), tc.store); err != nil { 986 t.Fatalf("%d: %+v", i, err) 987 } 988 } 989 // Append an extra entry which, if we truncate it, should definitely also 990 // remove any leftover files (ok, unless the last one is reproposed but 991 // that's *very* unlikely to happen for the last one) 992 addLastIndex() 993 994 fmtSideloaded := func() []string { 995 var r []string 996 tc.repl.raftMu.Lock() 997 defer tc.repl.raftMu.Unlock() 998 for k := range tc.repl.raftMu.sideloaded.(*inMemSideloadStorage).m { 999 r = append(r, fmt.Sprintf("%v", k)) 1000 } 1001 sort.Strings(r) 1002 return r 1003 } 1004 1005 // Check that when we truncate, the number of on-disk files changes in ways 1006 // we expect. Intentionally not too strict due to the possibility of 1007 // reproposals, etc; it could be made stricter, but this should give enough 1008 // confidence already that we're calling `PurgeTo` correctly, and for the 1009 // remainder unit testing on each impl's PurgeTo is more useful. 1010 for i := range indexes { 1011 const rangeID = 1 1012 newFirstIndex := indexes[i] + 1 1013 truncateArgs := truncateLogArgs(newFirstIndex, rangeID) 1014 log.Eventf(ctx, "truncating to index < %d", newFirstIndex) 1015 if _, pErr := kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{RangeID: rangeID}, &truncateArgs); pErr != nil { 1016 t.Fatal(pErr) 1017 } 1018 sideloadStrings := fmtSideloaded() 1019 if minFiles := count - i; len(sideloadStrings) < minFiles { 1020 t.Fatalf("after truncation at %d (i=%d), expected at least %d files left, but have:\n%v", 1021 indexes[i], i, minFiles, sideloadStrings) 1022 } 1023 } 1024 1025 if sideloadStrings := fmtSideloaded(); len(sideloadStrings) != 0 { 1026 t.Fatalf("expected all files to be cleaned up, but found %v", sideloadStrings) 1027 } 1028 1029 }