github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/compaction_test.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 crand "crypto/rand" 11 "fmt" 12 "math" 13 "math/rand" 14 "path/filepath" 15 "reflect" 16 "regexp" 17 "runtime" 18 "slices" 19 "sort" 20 "strconv" 21 "strings" 22 "sync/atomic" 23 "testing" 24 "time" 25 26 "github.com/cockroachdb/datadriven" 27 "github.com/cockroachdb/errors" 28 "github.com/cockroachdb/errors/oserror" 29 "github.com/cockroachdb/pebble/internal/base" 30 "github.com/cockroachdb/pebble/internal/keyspan" 31 "github.com/cockroachdb/pebble/internal/manifest" 32 "github.com/cockroachdb/pebble/internal/testkeys" 33 "github.com/cockroachdb/pebble/objstorage" 34 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 35 "github.com/cockroachdb/pebble/objstorage/remote" 36 "github.com/cockroachdb/pebble/sstable" 37 "github.com/cockroachdb/pebble/vfs" 38 "github.com/cockroachdb/pebble/vfs/errorfs" 39 "github.com/stretchr/testify/require" 40 ) 41 42 func newVersion(opts *Options, files [numLevels][]*fileMetadata) *version { 43 return manifest.NewVersion( 44 opts.Comparer.Compare, 45 opts.Comparer.FormatKey, 46 opts.FlushSplitBytes, 47 files) 48 } 49 50 type compactionPickerForTesting struct { 51 score float64 52 level int 53 baseLevel int 54 opts *Options 55 vers *manifest.Version 56 maxLevelBytes [7]int64 57 } 58 59 var _ compactionPicker = &compactionPickerForTesting{} 60 61 func (p *compactionPickerForTesting) getScores([]compactionInfo) [numLevels]float64 { 62 return [numLevels]float64{} 63 } 64 65 func (p *compactionPickerForTesting) getBaseLevel() int { 66 return p.baseLevel 67 } 68 69 func (p *compactionPickerForTesting) estimatedCompactionDebt(l0ExtraSize uint64) uint64 { 70 return 0 71 } 72 73 func (p *compactionPickerForTesting) forceBaseLevel1() {} 74 75 func (p *compactionPickerForTesting) pickAuto(env compactionEnv) (pc *pickedCompaction) { 76 if p.score < 1 { 77 return nil 78 } 79 outputLevel := p.level + 1 80 if p.level == 0 { 81 outputLevel = p.baseLevel 82 } 83 iter := p.vers.Levels[p.level].Iter() 84 iter.First() 85 cInfo := candidateLevelInfo{ 86 level: p.level, 87 outputLevel: outputLevel, 88 file: iter.Take(), 89 } 90 if cInfo.level == 0 { 91 return pickL0(env, p.opts, p.vers, p.baseLevel) 92 } 93 return pickAutoLPositive(env, p.opts, p.vers, cInfo, p.baseLevel, p.maxLevelBytes) 94 } 95 96 func (p *compactionPickerForTesting) pickElisionOnlyCompaction( 97 env compactionEnv, 98 ) (pc *pickedCompaction) { 99 return nil 100 } 101 102 func (p *compactionPickerForTesting) pickRewriteCompaction( 103 env compactionEnv, 104 ) (pc *pickedCompaction) { 105 return nil 106 } 107 108 func (p *compactionPickerForTesting) pickReadTriggeredCompaction( 109 env compactionEnv, 110 ) (pc *pickedCompaction) { 111 return nil 112 } 113 114 func TestPickCompaction(t *testing.T) { 115 fileNums := func(files manifest.LevelSlice) string { 116 var ss []string 117 files.Each(func(meta *fileMetadata) { 118 ss = append(ss, strconv.Itoa(int(meta.FileNum))) 119 }) 120 sort.Strings(ss) 121 return strings.Join(ss, ",") 122 } 123 124 opts := (*Options)(nil).EnsureDefaults() 125 newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata { 126 m := (&fileMetadata{ 127 FileNum: fileNum, 128 Size: size, 129 }).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest) 130 m.InitPhysicalBacking() 131 return m 132 } 133 134 testCases := []struct { 135 desc string 136 version *version 137 picker compactionPickerForTesting 138 want string 139 wantMulti bool 140 }{ 141 { 142 desc: "no compaction", 143 version: newVersion(opts, [numLevels][]*fileMetadata{ 144 0: { 145 newFileMeta( 146 100, 147 1, 148 base.ParseInternalKey("i.SET.101"), 149 base.ParseInternalKey("j.SET.102"), 150 ), 151 }, 152 }), 153 want: "", 154 }, 155 156 { 157 desc: "1 L0 file", 158 version: newVersion(opts, [numLevels][]*fileMetadata{ 159 0: { 160 newFileMeta( 161 100, 162 1, 163 base.ParseInternalKey("i.SET.101"), 164 base.ParseInternalKey("j.SET.102"), 165 ), 166 }, 167 }), 168 picker: compactionPickerForTesting{ 169 score: 99, 170 level: 0, 171 baseLevel: 1, 172 }, 173 want: "100 ", 174 }, 175 176 { 177 desc: "2 L0 files (0 overlaps)", 178 version: newVersion(opts, [numLevels][]*fileMetadata{ 179 0: { 180 newFileMeta( 181 100, 182 1, 183 base.ParseInternalKey("i.SET.101"), 184 base.ParseInternalKey("j.SET.102"), 185 ), 186 newFileMeta( 187 110, 188 1, 189 base.ParseInternalKey("k.SET.111"), 190 base.ParseInternalKey("l.SET.112"), 191 ), 192 }, 193 }), 194 picker: compactionPickerForTesting{ 195 score: 99, 196 level: 0, 197 baseLevel: 1, 198 }, 199 want: "100,110 ", 200 }, 201 202 { 203 desc: "2 L0 files, with ikey overlap", 204 version: newVersion(opts, [numLevels][]*fileMetadata{ 205 0: { 206 newFileMeta( 207 100, 208 1, 209 base.ParseInternalKey("i.SET.101"), 210 base.ParseInternalKey("p.SET.102"), 211 ), 212 newFileMeta( 213 110, 214 1, 215 base.ParseInternalKey("j.SET.111"), 216 base.ParseInternalKey("q.SET.112"), 217 ), 218 }, 219 }), 220 picker: compactionPickerForTesting{ 221 score: 99, 222 level: 0, 223 baseLevel: 1, 224 }, 225 want: "100,110 ", 226 }, 227 228 { 229 desc: "2 L0 files, with ukey overlap", 230 version: newVersion(opts, [numLevels][]*fileMetadata{ 231 0: { 232 newFileMeta( 233 100, 234 1, 235 base.ParseInternalKey("i.SET.101"), 236 base.ParseInternalKey("i.SET.102"), 237 ), 238 newFileMeta( 239 110, 240 1, 241 base.ParseInternalKey("i.SET.111"), 242 base.ParseInternalKey("i.SET.112"), 243 ), 244 }, 245 }), 246 picker: compactionPickerForTesting{ 247 score: 99, 248 level: 0, 249 baseLevel: 1, 250 }, 251 want: "100,110 ", 252 }, 253 254 { 255 desc: "1 L0 file, 2 L1 files (0 overlaps)", 256 version: newVersion(opts, [numLevels][]*fileMetadata{ 257 0: { 258 newFileMeta( 259 100, 260 1, 261 base.ParseInternalKey("i.SET.101"), 262 base.ParseInternalKey("i.SET.102"), 263 ), 264 }, 265 1: { 266 newFileMeta( 267 200, 268 1, 269 base.ParseInternalKey("a.SET.201"), 270 base.ParseInternalKey("b.SET.202"), 271 ), 272 newFileMeta( 273 210, 274 1, 275 base.ParseInternalKey("y.SET.211"), 276 base.ParseInternalKey("z.SET.212"), 277 ), 278 }, 279 }), 280 picker: compactionPickerForTesting{ 281 score: 99, 282 level: 0, 283 baseLevel: 1, 284 }, 285 want: "100 ", 286 }, 287 288 { 289 desc: "1 L0 file, 2 L1 files (1 overlap), 4 L2 files (3 overlaps)", 290 version: newVersion(opts, [numLevels][]*fileMetadata{ 291 0: { 292 newFileMeta( 293 100, 294 1, 295 base.ParseInternalKey("i.SET.101"), 296 base.ParseInternalKey("t.SET.102"), 297 ), 298 }, 299 1: { 300 newFileMeta( 301 200, 302 1, 303 base.ParseInternalKey("a.SET.201"), 304 base.ParseInternalKey("e.SET.202"), 305 ), 306 newFileMeta( 307 210, 308 1, 309 base.ParseInternalKey("f.SET.211"), 310 base.ParseInternalKey("j.SET.212"), 311 ), 312 }, 313 2: { 314 newFileMeta( 315 300, 316 1, 317 base.ParseInternalKey("a.SET.301"), 318 base.ParseInternalKey("b.SET.302"), 319 ), 320 newFileMeta( 321 310, 322 1, 323 base.ParseInternalKey("c.SET.311"), 324 base.ParseInternalKey("g.SET.312"), 325 ), 326 newFileMeta( 327 320, 328 1, 329 base.ParseInternalKey("h.SET.321"), 330 base.ParseInternalKey("m.SET.322"), 331 ), 332 newFileMeta( 333 330, 334 1, 335 base.ParseInternalKey("n.SET.331"), 336 base.ParseInternalKey("z.SET.332"), 337 ), 338 }, 339 }), 340 picker: compactionPickerForTesting{ 341 score: 99, 342 level: 0, 343 baseLevel: 1, 344 }, 345 want: "100 210 310,320,330", 346 }, 347 348 { 349 desc: "4 L1 files, 2 L2 files, can grow", 350 version: newVersion(opts, [numLevels][]*fileMetadata{ 351 1: { 352 newFileMeta( 353 200, 354 1, 355 base.ParseInternalKey("i1.SET.201"), 356 base.ParseInternalKey("i2.SET.202"), 357 ), 358 newFileMeta( 359 210, 360 1, 361 base.ParseInternalKey("j1.SET.211"), 362 base.ParseInternalKey("j2.SET.212"), 363 ), 364 newFileMeta( 365 220, 366 1, 367 base.ParseInternalKey("k1.SET.221"), 368 base.ParseInternalKey("k2.SET.222"), 369 ), 370 newFileMeta( 371 230, 372 1, 373 base.ParseInternalKey("l1.SET.231"), 374 base.ParseInternalKey("l2.SET.232"), 375 ), 376 }, 377 2: { 378 newFileMeta( 379 300, 380 1, 381 base.ParseInternalKey("a0.SET.301"), 382 base.ParseInternalKey("l0.SET.302"), 383 ), 384 newFileMeta( 385 310, 386 1, 387 base.ParseInternalKey("l2.SET.311"), 388 base.ParseInternalKey("z2.SET.312"), 389 ), 390 }, 391 }), 392 picker: compactionPickerForTesting{ 393 score: 99, 394 level: 1, 395 baseLevel: 1, 396 }, 397 want: "200,210,220 300 ", 398 wantMulti: true, 399 }, 400 401 { 402 desc: "4 L1 files, 2 L2 files, can't grow (range)", 403 version: newVersion(opts, [numLevels][]*fileMetadata{ 404 1: { 405 newFileMeta( 406 200, 407 1, 408 base.ParseInternalKey("i1.SET.201"), 409 base.ParseInternalKey("i2.SET.202"), 410 ), 411 newFileMeta( 412 210, 413 1, 414 base.ParseInternalKey("j1.SET.211"), 415 base.ParseInternalKey("j2.SET.212"), 416 ), 417 newFileMeta( 418 220, 419 1, 420 base.ParseInternalKey("k1.SET.221"), 421 base.ParseInternalKey("k2.SET.222"), 422 ), 423 newFileMeta( 424 230, 425 1, 426 base.ParseInternalKey("l1.SET.231"), 427 base.ParseInternalKey("l2.SET.232"), 428 ), 429 }, 430 2: { 431 newFileMeta( 432 300, 433 1, 434 base.ParseInternalKey("a0.SET.301"), 435 base.ParseInternalKey("j0.SET.302"), 436 ), 437 newFileMeta( 438 310, 439 1, 440 base.ParseInternalKey("j2.SET.311"), 441 base.ParseInternalKey("z2.SET.312"), 442 ), 443 }, 444 }), 445 picker: compactionPickerForTesting{ 446 score: 99, 447 level: 1, 448 baseLevel: 1, 449 }, 450 want: "200 300 ", 451 wantMulti: true, 452 }, 453 454 { 455 desc: "4 L1 files, 2 L2 files, can't grow (size)", 456 version: newVersion(opts, [numLevels][]*fileMetadata{ 457 1: { 458 newFileMeta( 459 200, 460 expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, 461 base.ParseInternalKey("i1.SET.201"), 462 base.ParseInternalKey("i2.SET.202"), 463 ), 464 newFileMeta( 465 210, 466 expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, 467 base.ParseInternalKey("j1.SET.211"), 468 base.ParseInternalKey("j2.SET.212"), 469 ), 470 newFileMeta( 471 220, 472 expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, 473 base.ParseInternalKey("k1.SET.221"), 474 base.ParseInternalKey("k2.SET.222"), 475 ), 476 newFileMeta( 477 230, 478 expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, 479 base.ParseInternalKey("l1.SET.231"), 480 base.ParseInternalKey("l2.SET.232"), 481 ), 482 }, 483 2: { 484 newFileMeta( 485 300, 486 expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1, 487 base.ParseInternalKey("a0.SET.301"), 488 base.ParseInternalKey("l0.SET.302"), 489 ), 490 newFileMeta( 491 310, 492 expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1, 493 base.ParseInternalKey("l2.SET.311"), 494 base.ParseInternalKey("z2.SET.312"), 495 ), 496 }, 497 }), 498 picker: compactionPickerForTesting{ 499 score: 99, 500 level: 1, 501 baseLevel: 1, 502 }, 503 want: "200 300 ", 504 }, 505 } 506 507 for _, tc := range testCases { 508 vs := &versionSet{ 509 opts: opts, 510 cmp: DefaultComparer.Compare, 511 cmpName: DefaultComparer.Name, 512 } 513 vs.versions.Init(nil) 514 vs.append(tc.version) 515 tc.picker.opts = opts 516 tc.picker.vers = tc.version 517 vs.picker = &tc.picker 518 pc, got := vs.picker.pickAuto(compactionEnv{diskAvailBytes: math.MaxUint64}), "" 519 if pc != nil { 520 c := newCompaction(pc, opts, time.Now(), nil /* provider */) 521 522 gotStart := fileNums(c.startLevel.files) 523 gotML := "" 524 observedMulti := len(c.extraLevels) > 0 525 if observedMulti { 526 gotML = " " + fileNums(c.extraLevels[0].files) 527 } 528 gotOutput := " " + fileNums(c.outputLevel.files) 529 gotGrandparents := " " + fileNums(c.grandparents) 530 got = gotStart + gotML + gotOutput + gotGrandparents 531 if tc.wantMulti != observedMulti { 532 t.Fatalf("Expected Multi %t; Observed Multi %t, for %s", tc.wantMulti, observedMulti, got) 533 } 534 535 } 536 if got != tc.want { 537 t.Fatalf("%s:\ngot %q\nwant %q", tc.desc, got, tc.want) 538 } 539 } 540 } 541 542 func TestElideTombstone(t *testing.T) { 543 var d *DB 544 defer func() { 545 if d != nil { 546 require.NoError(t, d.Close()) 547 } 548 }() 549 var buf bytes.Buffer 550 datadriven.RunTest(t, "testdata/compaction_elide_tombstone", 551 func(t *testing.T, td *datadriven.TestData) string { 552 switch td.Cmd { 553 case "define": 554 if d != nil { 555 if err := d.Close(); err != nil { 556 return err.Error() 557 } 558 } 559 var err error 560 if d, err = runDBDefineCmd(td, (&Options{ 561 FS: vfs.NewMem(), 562 DebugCheck: DebugCheckLevels, 563 FormatMajorVersion: FormatNewest, 564 DisableAutomaticCompactions: true, 565 }).WithFSDefaults()); err != nil { 566 return err.Error() 567 } 568 if td.HasArg("verbose") { 569 return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) 570 } 571 return d.mu.versions.currentVersion().String() 572 case "elide": 573 buf.Reset() 574 var startLevel int 575 td.ScanArgs(t, "start-level", &startLevel) 576 c := compaction{ 577 cmp: testkeys.Comparer.Compare, 578 comparer: testkeys.Comparer, 579 version: d.mu.versions.currentVersion(), 580 inputs: []compactionLevel{{level: startLevel}, {level: startLevel + 1}}, 581 smallest: base.ParseInternalKey("a.SET.0"), 582 largest: base.ParseInternalKey("z.SET.0"), 583 } 584 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 585 c.setupInuseKeyRanges() 586 for _, ukey := range strings.Split(td.Input, "\n") { 587 fmt.Fprintf(&buf, "elideTombstone(%q) = %t\n", ukey, c.elideTombstone([]byte(ukey))) 588 } 589 return buf.String() 590 default: 591 return fmt.Sprintf("unknown command: %s", td.Cmd) 592 } 593 }) 594 } 595 596 func TestElideRangeTombstone(t *testing.T) { 597 opts := (*Options)(nil).EnsureDefaults() 598 599 newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata { 600 m := (&fileMetadata{}).ExtendPointKeyBounds( 601 opts.Comparer.Compare, smallest, largest, 602 ) 603 m.InitPhysicalBacking() 604 return m 605 } 606 607 type want struct { 608 key string 609 endKey string 610 expected bool 611 } 612 613 testCases := []struct { 614 desc string 615 level int 616 version *version 617 wants []want 618 flushing flushableList 619 }{ 620 { 621 desc: "empty", 622 level: 1, 623 version: newVersion(opts, [numLevels][]*fileMetadata{}), 624 wants: []want{ 625 {"x", "y", true}, 626 }, 627 }, 628 { 629 desc: "non-empty", 630 level: 1, 631 version: newVersion(opts, [numLevels][]*fileMetadata{ 632 1: { 633 newFileMeta( 634 base.ParseInternalKey("c.SET.801"), 635 base.ParseInternalKey("g.SET.800"), 636 ), 637 newFileMeta( 638 base.ParseInternalKey("x.SET.701"), 639 base.ParseInternalKey("y.SET.700"), 640 ), 641 }, 642 2: { 643 newFileMeta( 644 base.ParseInternalKey("d.SET.601"), 645 base.ParseInternalKey("h.SET.600"), 646 ), 647 newFileMeta( 648 base.ParseInternalKey("r.SET.501"), 649 base.ParseInternalKey("t.SET.500"), 650 ), 651 }, 652 3: { 653 newFileMeta( 654 base.ParseInternalKey("f.SET.401"), 655 base.ParseInternalKey("g.SET.400"), 656 ), 657 newFileMeta( 658 base.ParseInternalKey("w.SET.301"), 659 base.ParseInternalKey("x.SET.300"), 660 ), 661 }, 662 4: { 663 newFileMeta( 664 base.ParseInternalKey("f.SET.201"), 665 base.ParseInternalKey("m.SET.200"), 666 ), 667 newFileMeta( 668 base.ParseInternalKey("t.SET.101"), 669 base.ParseInternalKey("t.SET.100"), 670 ), 671 }, 672 }), 673 wants: []want{ 674 {"b", "c", true}, 675 {"c", "d", true}, 676 {"d", "e", true}, 677 {"e", "f", false}, 678 {"f", "g", false}, 679 {"g", "h", false}, 680 {"h", "i", false}, 681 {"l", "m", false}, 682 {"m", "n", false}, 683 {"n", "o", true}, 684 {"q", "r", true}, 685 {"r", "s", true}, 686 {"s", "t", false}, 687 {"t", "u", false}, 688 {"u", "v", true}, 689 {"v", "w", false}, 690 {"w", "x", false}, 691 {"x", "y", false}, 692 {"y", "z", true}, 693 }, 694 }, 695 { 696 desc: "flushing", 697 level: -1, 698 version: newVersion(opts, [numLevels][]*fileMetadata{ 699 0: { 700 newFileMeta( 701 base.ParseInternalKey("h.SET.901"), 702 base.ParseInternalKey("j.SET.900"), 703 ), 704 }, 705 1: { 706 newFileMeta( 707 base.ParseInternalKey("c.SET.801"), 708 base.ParseInternalKey("g.SET.800"), 709 ), 710 newFileMeta( 711 base.ParseInternalKey("x.SET.701"), 712 base.ParseInternalKey("y.SET.700"), 713 ), 714 }, 715 }), 716 wants: []want{ 717 {"m", "n", false}, 718 }, 719 // Pretend one memtable is being flushed 720 flushing: flushableList{nil}, 721 }, 722 } 723 724 for _, tc := range testCases { 725 c := compaction{ 726 cmp: DefaultComparer.Compare, 727 comparer: DefaultComparer, 728 version: tc.version, 729 inputs: []compactionLevel{{level: tc.level}, {level: tc.level + 1}}, 730 smallest: base.ParseInternalKey("a.SET.0"), 731 largest: base.ParseInternalKey("z.SET.0"), 732 flushing: tc.flushing, 733 } 734 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 735 c.setupInuseKeyRanges() 736 for _, w := range tc.wants { 737 if got := c.elideRangeTombstone([]byte(w.key), []byte(w.endKey)); got != w.expected { 738 t.Errorf("%s: keys=%q-%q: got %v, want %v", tc.desc, w.key, w.endKey, got, w.expected) 739 } 740 } 741 } 742 } 743 744 func TestCompactionTransform(t *testing.T) { 745 datadriven.RunTest(t, "testdata/compaction_transform", func(t *testing.T, td *datadriven.TestData) string { 746 switch td.Cmd { 747 case "transform": 748 var snapshots []uint64 749 var keyRanges []manifest.UserKeyRange 750 disableElision := td.HasArg("disable-elision") 751 td.MaybeScanArgs(t, "snapshots", &snapshots) 752 if arg, ok := td.Arg("in-use-key-ranges"); ok { 753 for _, keyRange := range arg.Vals { 754 parts := strings.SplitN(keyRange, "-", 2) 755 start := []byte(strings.TrimSpace(parts[0])) 756 end := []byte(strings.TrimSpace(parts[1])) 757 keyRanges = append(keyRanges, manifest.UserKeyRange{ 758 Start: start, 759 End: end, 760 }) 761 } 762 } 763 span := keyspan.ParseSpan(td.Input) 764 for i := range span.Keys { 765 if i > 0 { 766 if span.Keys[i-1].Trailer < span.Keys[i].Trailer { 767 return "span keys not sorted" 768 } 769 } 770 } 771 var outSpan keyspan.Span 772 c := compaction{ 773 cmp: base.DefaultComparer.Compare, 774 comparer: base.DefaultComparer, 775 disableSpanElision: disableElision, 776 inuseKeyRanges: keyRanges, 777 } 778 transformer := rangeKeyCompactionTransform(base.DefaultComparer.Equal, snapshots, c.elideRangeTombstone) 779 if err := transformer.Transform(base.DefaultComparer.Compare, span, &outSpan); err != nil { 780 return fmt.Sprintf("error: %s", err) 781 } 782 return outSpan.String() 783 default: 784 return fmt.Sprintf("unknown command: %s", td.Cmd) 785 } 786 }) 787 } 788 789 type cpuPermissionGranter struct { 790 // requestCount is used to confirm that every GetPermission function call 791 // has a corresponding CPUWorkDone function call. 792 requestCount int 793 used bool 794 permit bool 795 } 796 797 type cpuWorkHandle struct { 798 permit bool 799 } 800 801 func (c cpuWorkHandle) Permitted() bool { 802 return c.permit 803 } 804 805 func (t *cpuPermissionGranter) GetPermission(dur time.Duration) CPUWorkHandle { 806 t.requestCount++ 807 t.used = true 808 return cpuWorkHandle{t.permit} 809 } 810 811 func (t *cpuPermissionGranter) CPUWorkDone(_ CPUWorkHandle) { 812 t.requestCount-- 813 } 814 815 // Simple test to check if compactions are using the granter, and if exactly 816 // the acquired handles are returned. 817 func TestCompactionCPUGranter(t *testing.T) { 818 mem := vfs.NewMem() 819 opts := (&Options{FS: mem}).WithFSDefaults() 820 g := &cpuPermissionGranter{permit: true} 821 opts.Experimental.CPUWorkPermissionGranter = g 822 d, err := Open("", opts) 823 if err != nil { 824 t.Fatalf("Open: %v", err) 825 } 826 defer d.Close() 827 828 d.Set([]byte{'a'}, []byte{'a'}, nil) 829 err = d.Compact([]byte{'a'}, []byte{'b'}, true) 830 if err != nil { 831 t.Fatalf("Compact: %v", err) 832 } 833 require.True(t, g.used) 834 require.Equal(t, g.requestCount, 0) 835 } 836 837 // Tests that there's no errors or panics when the default CPU granter is used. 838 func TestCompactionCPUGranterDefault(t *testing.T) { 839 mem := vfs.NewMem() 840 opts := (&Options{FS: mem}).WithFSDefaults() 841 d, err := Open("", opts) 842 if err != nil { 843 t.Fatalf("Open: %v", err) 844 } 845 defer d.Close() 846 847 d.Set([]byte{'a'}, []byte{'a'}, nil) 848 err = d.Compact([]byte{'a'}, []byte{'b'}, true) 849 if err != nil { 850 t.Fatalf("Compact: %v", err) 851 } 852 } 853 854 func TestCompaction(t *testing.T) { 855 const memTableSize = 10000 856 // Tuned so that 2 values can reside in the memtable before a flush, but a 857 // 3rd value will cause a flush. Needs to account for the max skiplist node 858 // size. 859 const valueSize = 3500 860 861 mem := vfs.NewMem() 862 opts := &Options{ 863 FS: mem, 864 MemTableSize: memTableSize, 865 DebugCheck: DebugCheckLevels, 866 L0CompactionThreshold: 8, 867 } 868 opts.testingRandomized(t).WithFSDefaults() 869 d, err := Open("", opts) 870 if err != nil { 871 t.Fatalf("Open: %v", err) 872 } 873 874 get1 := func(iter internalIterator) (ret string) { 875 b := &bytes.Buffer{} 876 for key, _ := iter.First(); key != nil; key, _ = iter.Next() { 877 b.Write(key.UserKey) 878 } 879 if err := iter.Close(); err != nil { 880 t.Fatalf("iterator Close: %v", err) 881 } 882 return b.String() 883 } 884 getAll := func() (gotMem, gotDisk string, err error) { 885 d.mu.Lock() 886 defer d.mu.Unlock() 887 888 if d.mu.mem.mutable != nil { 889 gotMem = get1(d.mu.mem.mutable.newIter(nil)) 890 } 891 ss := []string(nil) 892 v := d.mu.versions.currentVersion() 893 provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "" /* dirName */)) 894 if err != nil { 895 t.Fatalf("%v", err) 896 } 897 defer provider.Close() 898 for _, levelMetadata := range v.Levels { 899 iter := levelMetadata.Iter() 900 for meta := iter.First(); meta != nil; meta = iter.Next() { 901 if meta.Virtual { 902 continue 903 } 904 f, err := provider.OpenForReading(context.Background(), base.FileTypeTable, meta.FileBacking.DiskFileNum, objstorage.OpenOptions{}) 905 if err != nil { 906 return "", "", errors.WithStack(err) 907 } 908 r, err := sstable.NewReader(f, sstable.ReaderOptions{}) 909 if err != nil { 910 return "", "", errors.WithStack(err) 911 } 912 defer r.Close() 913 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 914 if err != nil { 915 return "", "", errors.WithStack(err) 916 } 917 ss = append(ss, get1(iter)+".") 918 } 919 } 920 sort.Strings(ss) 921 return gotMem, strings.Join(ss, ""), nil 922 } 923 924 value := bytes.Repeat([]byte("x"), valueSize) 925 testCases := []struct { 926 key, wantMem, wantDisk string 927 }{ 928 {"+A", "A", ""}, 929 {"+a", "Aa", ""}, 930 {"+B", "B", "Aa."}, 931 {"+b", "Bb", "Aa."}, 932 // The next level-0 table overwrites the B key. 933 {"+C", "C", "Aa.Bb."}, 934 {"+B", "BC", "Aa.Bb."}, 935 // The next level-0 table deletes the a key. 936 {"+D", "D", "Aa.BC.Bb."}, 937 {"-a", "Da", "Aa.BC.Bb."}, 938 {"+d", "Dad", "Aa.BC.Bb."}, 939 {"+E", "E", "Aa.BC.Bb.Dad."}, 940 {"+e", "Ee", "Aa.BC.Bb.Dad."}, 941 // The next addition creates the fourth level-0 table, and l0CompactionTrigger == 8, 942 // but since the sublevel count is doubled when comparing with l0CompactionTrigger, 943 // the addition of the 4th sublevel triggers a non-trivial compaction into one level-1 table. 944 // Note that the keys in this one larger table are interleaved from the four smaller ones. 945 {"+F", "F", "ABCDEbde."}, 946 } 947 for _, tc := range testCases { 948 if key := tc.key[1:]; tc.key[0] == '+' { 949 if err := d.Set([]byte(key), value, nil); err != nil { 950 t.Errorf("%q: Set: %v", key, err) 951 break 952 } 953 } else { 954 if err := d.Delete([]byte(key), nil); err != nil { 955 t.Errorf("%q: Delete: %v", key, err) 956 break 957 } 958 } 959 960 // try backs off to allow any writes to the memfs to complete. 961 err := try(100*time.Microsecond, 20*time.Second, func() error { 962 gotMem, gotDisk, err := getAll() 963 if err != nil { 964 return err 965 } 966 if testing.Verbose() { 967 fmt.Printf("mem=%s (%s) disk=%s (%s)\n", gotMem, tc.wantMem, gotDisk, tc.wantDisk) 968 } 969 970 if gotMem != tc.wantMem { 971 return errors.Errorf("mem: got %q, want %q", gotMem, tc.wantMem) 972 } 973 if gotDisk != tc.wantDisk { 974 return errors.Errorf("ldb: got %q, want %q", gotDisk, tc.wantDisk) 975 } 976 return nil 977 }) 978 if err != nil { 979 t.Errorf("%q: %v", tc.key, err) 980 } 981 } 982 if err := d.Close(); err != nil { 983 t.Fatalf("db Close: %v", err) 984 } 985 } 986 987 func TestValidateVersionEdit(t *testing.T) { 988 const badKey = "malformed-key" 989 990 errValidationFailed := errors.New("validation failed") 991 validateFn := func(key []byte) error { 992 if string(key) == badKey { 993 return errValidationFailed 994 } 995 return nil 996 } 997 998 cmp := DefaultComparer.Compare 999 newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata { 1000 m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest) 1001 m.InitPhysicalBacking() 1002 return m 1003 } 1004 1005 testCases := []struct { 1006 desc string 1007 ve *versionEdit 1008 vFunc func([]byte) error 1009 wantErr error 1010 }{ 1011 { 1012 desc: "single new file; start key", 1013 ve: &versionEdit{ 1014 NewFiles: []manifest.NewFileEntry{ 1015 { 1016 Meta: newFileMeta( 1017 manifest.InternalKey{UserKey: []byte(badKey)}, 1018 manifest.InternalKey{UserKey: []byte("z")}, 1019 ), 1020 }, 1021 }, 1022 }, 1023 vFunc: validateFn, 1024 wantErr: errValidationFailed, 1025 }, 1026 { 1027 desc: "single new file; end key", 1028 ve: &versionEdit{ 1029 NewFiles: []manifest.NewFileEntry{ 1030 { 1031 Meta: newFileMeta( 1032 manifest.InternalKey{UserKey: []byte("a")}, 1033 manifest.InternalKey{UserKey: []byte(badKey)}, 1034 ), 1035 }, 1036 }, 1037 }, 1038 vFunc: validateFn, 1039 wantErr: errValidationFailed, 1040 }, 1041 { 1042 desc: "multiple new files", 1043 ve: &versionEdit{ 1044 NewFiles: []manifest.NewFileEntry{ 1045 { 1046 Meta: newFileMeta( 1047 manifest.InternalKey{UserKey: []byte("a")}, 1048 manifest.InternalKey{UserKey: []byte("c")}, 1049 ), 1050 }, 1051 { 1052 Meta: newFileMeta( 1053 manifest.InternalKey{UserKey: []byte(badKey)}, 1054 manifest.InternalKey{UserKey: []byte("z")}, 1055 ), 1056 }, 1057 }, 1058 }, 1059 vFunc: validateFn, 1060 wantErr: errValidationFailed, 1061 }, 1062 { 1063 desc: "single deleted file; start key", 1064 ve: &versionEdit{ 1065 DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ 1066 deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta( 1067 manifest.InternalKey{UserKey: []byte(badKey)}, 1068 manifest.InternalKey{UserKey: []byte("z")}, 1069 ), 1070 }, 1071 }, 1072 vFunc: validateFn, 1073 wantErr: errValidationFailed, 1074 }, 1075 { 1076 desc: "single deleted file; end key", 1077 ve: &versionEdit{ 1078 DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ 1079 deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta( 1080 manifest.InternalKey{UserKey: []byte("a")}, 1081 manifest.InternalKey{UserKey: []byte(badKey)}, 1082 ), 1083 }, 1084 }, 1085 vFunc: validateFn, 1086 wantErr: errValidationFailed, 1087 }, 1088 { 1089 desc: "multiple deleted files", 1090 ve: &versionEdit{ 1091 DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ 1092 deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta( 1093 manifest.InternalKey{UserKey: []byte("a")}, 1094 manifest.InternalKey{UserKey: []byte("c")}, 1095 ), 1096 deletedFileEntry{Level: 0, FileNum: 1}: newFileMeta( 1097 manifest.InternalKey{UserKey: []byte(badKey)}, 1098 manifest.InternalKey{UserKey: []byte("z")}, 1099 ), 1100 }, 1101 }, 1102 vFunc: validateFn, 1103 wantErr: errValidationFailed, 1104 }, 1105 { 1106 desc: "no errors", 1107 ve: &versionEdit{ 1108 NewFiles: []manifest.NewFileEntry{ 1109 { 1110 Level: 0, 1111 Meta: newFileMeta( 1112 manifest.InternalKey{UserKey: []byte("b")}, 1113 manifest.InternalKey{UserKey: []byte("c")}, 1114 ), 1115 }, 1116 { 1117 Level: 0, 1118 Meta: newFileMeta( 1119 manifest.InternalKey{UserKey: []byte("d")}, 1120 manifest.InternalKey{UserKey: []byte("g")}, 1121 ), 1122 }, 1123 }, 1124 DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ 1125 deletedFileEntry{Level: 6, FileNum: 0}: newFileMeta( 1126 manifest.InternalKey{UserKey: []byte("a")}, 1127 manifest.InternalKey{UserKey: []byte("d")}, 1128 ), 1129 deletedFileEntry{Level: 6, FileNum: 1}: newFileMeta( 1130 manifest.InternalKey{UserKey: []byte("x")}, 1131 manifest.InternalKey{UserKey: []byte("z")}, 1132 ), 1133 }, 1134 }, 1135 vFunc: validateFn, 1136 }, 1137 } 1138 1139 for _, tc := range testCases { 1140 t.Run(tc.desc, func(t *testing.T) { 1141 err := validateVersionEdit(tc.ve, tc.vFunc, base.DefaultFormatter) 1142 if tc.wantErr != nil { 1143 if !errors.Is(err, tc.wantErr) { 1144 t.Fatalf("got: %s; want: %s", err, tc.wantErr) 1145 } 1146 return 1147 } 1148 if err != nil { 1149 t.Fatalf("got %s; wanted no error", err) 1150 } 1151 }) 1152 } 1153 } 1154 1155 func TestManualCompaction(t *testing.T) { 1156 var mem vfs.FS 1157 var d *DB 1158 defer func() { 1159 if d != nil { 1160 require.NoError(t, closeAllSnapshots(d)) 1161 require.NoError(t, d.Close()) 1162 } 1163 }() 1164 1165 seed := time.Now().UnixNano() 1166 rng := rand.New(rand.NewSource(seed)) 1167 t.Logf("seed: %d", seed) 1168 1169 randVersion := func(min, max FormatMajorVersion) FormatMajorVersion { 1170 return FormatMajorVersion(int(min) + rng.Intn(int(max)-int(min)+1)) 1171 } 1172 1173 var compactionLog bytes.Buffer 1174 compactionLogEventListener := &EventListener{ 1175 CompactionEnd: func(info CompactionInfo) { 1176 // Ensure determinism. 1177 info.JobID = 1 1178 info.Duration = time.Second 1179 info.TotalDuration = time.Second 1180 fmt.Fprintln(&compactionLog, info.String()) 1181 }, 1182 } 1183 reset := func(minVersion, maxVersion FormatMajorVersion) { 1184 compactionLog.Reset() 1185 if d != nil { 1186 require.NoError(t, closeAllSnapshots(d)) 1187 require.NoError(t, d.Close()) 1188 } 1189 mem = vfs.NewMem() 1190 require.NoError(t, mem.MkdirAll("ext", 0755)) 1191 1192 opts := (&Options{ 1193 FS: mem, 1194 DebugCheck: DebugCheckLevels, 1195 DisableAutomaticCompactions: true, 1196 EventListener: compactionLogEventListener, 1197 FormatMajorVersion: randVersion(minVersion, maxVersion), 1198 }).WithFSDefaults() 1199 1200 var err error 1201 d, err = Open("", opts) 1202 require.NoError(t, err) 1203 } 1204 1205 // d.mu must be held when calling. 1206 createOngoingCompaction := func(start, end []byte, startLevel, outputLevel int) (ongoingCompaction *compaction) { 1207 ongoingCompaction = &compaction{ 1208 inputs: []compactionLevel{{level: startLevel}, {level: outputLevel}}, 1209 smallest: InternalKey{UserKey: start}, 1210 largest: InternalKey{UserKey: end}, 1211 } 1212 ongoingCompaction.startLevel = &ongoingCompaction.inputs[0] 1213 ongoingCompaction.outputLevel = &ongoingCompaction.inputs[1] 1214 // Mark files as compacting. 1215 curr := d.mu.versions.currentVersion() 1216 ongoingCompaction.startLevel.files = curr.Overlaps(startLevel, d.cmp, start, end, false) 1217 ongoingCompaction.outputLevel.files = curr.Overlaps(outputLevel, d.cmp, start, end, false) 1218 for _, cl := range ongoingCompaction.inputs { 1219 iter := cl.files.Iter() 1220 for f := iter.First(); f != nil; f = iter.Next() { 1221 f.CompactionState = manifest.CompactionStateCompacting 1222 } 1223 } 1224 d.mu.compact.inProgress[ongoingCompaction] = struct{}{} 1225 d.mu.compact.compactingCount++ 1226 return 1227 } 1228 1229 // d.mu must be held when calling. 1230 deleteOngoingCompaction := func(ongoingCompaction *compaction) { 1231 for _, cl := range ongoingCompaction.inputs { 1232 iter := cl.files.Iter() 1233 for f := iter.First(); f != nil; f = iter.Next() { 1234 f.CompactionState = manifest.CompactionStateNotCompacting 1235 } 1236 } 1237 delete(d.mu.compact.inProgress, ongoingCompaction) 1238 d.mu.compact.compactingCount-- 1239 } 1240 1241 runTest := func(t *testing.T, testData string, minVersion, maxVersion FormatMajorVersion, verbose bool) { 1242 reset(minVersion, maxVersion) 1243 var ongoingCompaction *compaction 1244 datadriven.RunTest(t, testData, func(t *testing.T, td *datadriven.TestData) string { 1245 switch td.Cmd { 1246 case "reset": 1247 reset(minVersion, maxVersion) 1248 return "" 1249 1250 case "batch": 1251 b := d.NewIndexedBatch() 1252 if err := runBatchDefineCmd(td, b); err != nil { 1253 return err.Error() 1254 } 1255 require.NoError(t, b.Commit(nil)) 1256 return "" 1257 1258 case "build": 1259 if err := runBuildCmd(td, d, mem); err != nil { 1260 return err.Error() 1261 } 1262 return "" 1263 1264 case "compact": 1265 if err := runCompactCmd(td, d); err != nil { 1266 return err.Error() 1267 } 1268 d.mu.Lock() 1269 s := d.mu.versions.currentVersion().String() 1270 if verbose { 1271 s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) 1272 } 1273 d.mu.Unlock() 1274 if td.HasArg("hide-file-num") { 1275 re := regexp.MustCompile(`([0-9]*):\[`) 1276 s = re.ReplaceAllString(s, "[") 1277 } 1278 return s 1279 1280 case "define": 1281 if d != nil { 1282 if err := closeAllSnapshots(d); err != nil { 1283 return err.Error() 1284 } 1285 if err := d.Close(); err != nil { 1286 return err.Error() 1287 } 1288 } 1289 1290 mem = vfs.NewMem() 1291 opts := (&Options{ 1292 FS: mem, 1293 DebugCheck: DebugCheckLevels, 1294 EventListener: compactionLogEventListener, 1295 FormatMajorVersion: randVersion(minVersion, maxVersion), 1296 DisableAutomaticCompactions: true, 1297 }).WithFSDefaults() 1298 1299 var err error 1300 if d, err = runDBDefineCmd(td, opts); err != nil { 1301 return err.Error() 1302 } 1303 1304 s := d.mu.versions.currentVersion().String() 1305 if verbose { 1306 s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) 1307 } 1308 return s 1309 1310 case "file-sizes": 1311 return runTableFileSizesCmd(td, d) 1312 1313 case "flush": 1314 if err := d.Flush(); err != nil { 1315 return err.Error() 1316 } 1317 d.mu.Lock() 1318 s := d.mu.versions.currentVersion().String() 1319 if verbose { 1320 s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) 1321 } 1322 d.mu.Unlock() 1323 return s 1324 1325 case "ingest": 1326 if err := runIngestCmd(td, d, mem); err != nil { 1327 return err.Error() 1328 } 1329 d.mu.Lock() 1330 s := d.mu.versions.currentVersion().String() 1331 if verbose { 1332 s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) 1333 } 1334 d.mu.Unlock() 1335 return s 1336 1337 case "iter": 1338 // TODO(peter): runDBDefineCmd doesn't properly update the visible 1339 // sequence number. So we have to use a snapshot with a very large 1340 // sequence number, otherwise the DB appears empty. 1341 snap := Snapshot{ 1342 db: d, 1343 seqNum: InternalKeySeqNumMax, 1344 } 1345 iter, _ := snap.NewIter(nil) 1346 return runIterCmd(td, iter, true) 1347 1348 case "lsm": 1349 return runLSMCmd(td, d) 1350 1351 case "populate": 1352 b := d.NewBatch() 1353 runPopulateCmd(t, td, b) 1354 count := b.Count() 1355 require.NoError(t, b.Commit(nil)) 1356 return fmt.Sprintf("wrote %d keys\n", count) 1357 1358 case "async-compact": 1359 var s string 1360 ch := make(chan error, 1) 1361 go func() { 1362 if err := runCompactCmd(td, d); err != nil { 1363 ch <- err 1364 close(ch) 1365 return 1366 } 1367 d.mu.Lock() 1368 s = d.mu.versions.currentVersion().String() 1369 d.mu.Unlock() 1370 close(ch) 1371 }() 1372 1373 manualDone := func() bool { 1374 select { 1375 case <-ch: 1376 return true 1377 default: 1378 return false 1379 } 1380 } 1381 1382 err := try(100*time.Microsecond, 20*time.Second, func() error { 1383 if manualDone() { 1384 return nil 1385 } 1386 1387 d.mu.Lock() 1388 defer d.mu.Unlock() 1389 if len(d.mu.compact.manual) == 0 { 1390 return errors.New("no manual compaction queued") 1391 } 1392 manual := d.mu.compact.manual[0] 1393 if manual.retries == 0 { 1394 return errors.New("manual compaction has not been retried") 1395 } 1396 return nil 1397 }) 1398 if err != nil { 1399 return err.Error() 1400 } 1401 1402 if manualDone() { 1403 return "manual compaction did not block for ongoing\n" + s 1404 } 1405 1406 d.mu.Lock() 1407 deleteOngoingCompaction(ongoingCompaction) 1408 ongoingCompaction = nil 1409 d.maybeScheduleCompaction() 1410 d.mu.Unlock() 1411 if err := <-ch; err != nil { 1412 return err.Error() 1413 } 1414 return "manual compaction blocked until ongoing finished\n" + s 1415 1416 case "add-ongoing-compaction": 1417 var startLevel int 1418 var outputLevel int 1419 var start string 1420 var end string 1421 td.ScanArgs(t, "startLevel", &startLevel) 1422 td.ScanArgs(t, "outputLevel", &outputLevel) 1423 td.ScanArgs(t, "start", &start) 1424 td.ScanArgs(t, "end", &end) 1425 d.mu.Lock() 1426 ongoingCompaction = createOngoingCompaction([]byte(start), []byte(end), startLevel, outputLevel) 1427 d.mu.Unlock() 1428 return "" 1429 1430 case "remove-ongoing-compaction": 1431 d.mu.Lock() 1432 deleteOngoingCompaction(ongoingCompaction) 1433 ongoingCompaction = nil 1434 d.mu.Unlock() 1435 return "" 1436 1437 case "set-concurrent-compactions": 1438 var concurrentCompactions int 1439 td.ScanArgs(t, "num", &concurrentCompactions) 1440 d.opts.MaxConcurrentCompactions = func() int { 1441 return concurrentCompactions 1442 } 1443 return "" 1444 1445 case "sstable-properties": 1446 return runSSTablePropertiesCmd(t, td, d) 1447 1448 case "wait-pending-table-stats": 1449 return runTableStatsCmd(td, d) 1450 1451 case "close-snapshots": 1452 d.mu.Lock() 1453 // Re-enable automatic compactions if they were disabled so that 1454 // closing snapshots can trigger elision-only compactions if 1455 // necessary. 1456 d.opts.DisableAutomaticCompactions = false 1457 1458 var ss []*Snapshot 1459 l := &d.mu.snapshots 1460 for i := l.root.next; i != &l.root; i = i.next { 1461 ss = append(ss, i) 1462 } 1463 d.mu.Unlock() 1464 for i := range ss { 1465 if err := ss[i].Close(); err != nil { 1466 return err.Error() 1467 } 1468 } 1469 return "" 1470 1471 case "compaction-log": 1472 defer compactionLog.Reset() 1473 return compactionLog.String() 1474 1475 default: 1476 return fmt.Sprintf("unknown command: %s", td.Cmd) 1477 } 1478 }) 1479 } 1480 1481 testCases := []struct { 1482 testData string 1483 minVersion FormatMajorVersion 1484 maxVersion FormatMajorVersion // inclusive 1485 verbose bool 1486 }{ 1487 { 1488 testData: "testdata/manual_compaction", 1489 minVersion: FormatMostCompatible, 1490 maxVersion: FormatSetWithDelete - 1, 1491 }, 1492 { 1493 testData: "testdata/manual_compaction_set_with_del", 1494 minVersion: FormatBlockPropertyCollector, 1495 // This test exercises split user keys. 1496 maxVersion: FormatSplitUserKeysMarkedCompacted - 1, 1497 }, 1498 { 1499 testData: "testdata/singledel_manual_compaction", 1500 minVersion: FormatMostCompatible, 1501 maxVersion: FormatSetWithDelete - 1, 1502 }, 1503 { 1504 testData: "testdata/singledel_manual_compaction_set_with_del", 1505 minVersion: FormatSetWithDelete, 1506 maxVersion: internalFormatNewest, 1507 }, 1508 { 1509 testData: "testdata/manual_compaction_range_keys", 1510 minVersion: FormatRangeKeys, 1511 maxVersion: internalFormatNewest, 1512 verbose: true, 1513 }, 1514 { 1515 testData: "testdata/manual_compaction_file_boundaries", 1516 minVersion: FormatBlockPropertyCollector, 1517 // This test exercises split user keys. 1518 maxVersion: FormatSplitUserKeysMarkedCompacted - 1, 1519 }, 1520 { 1521 testData: "testdata/manual_compaction_file_boundaries_delsized", 1522 minVersion: FormatDeleteSizedAndObsolete, 1523 maxVersion: internalFormatNewest, 1524 }, 1525 { 1526 testData: "testdata/manual_compaction_set_with_del_sstable_Pebblev4", 1527 minVersion: FormatDeleteSizedAndObsolete, 1528 maxVersion: internalFormatNewest, 1529 }, 1530 { 1531 testData: "testdata/manual_compaction_multilevel", 1532 minVersion: FormatMostCompatible, 1533 maxVersion: internalFormatNewest, 1534 }, 1535 } 1536 1537 for _, tc := range testCases { 1538 t.Run(tc.testData, func(t *testing.T) { 1539 runTest(t, tc.testData, tc.minVersion, tc.maxVersion, tc.verbose) 1540 }) 1541 } 1542 } 1543 1544 func TestCompactionFindGrandparentLimit(t *testing.T) { 1545 cmp := DefaultComparer.Compare 1546 var grandparents []*fileMetadata 1547 1548 var fileNum base.FileNum 1549 parseMeta := func(s string) *fileMetadata { 1550 parts := strings.Split(s, "-") 1551 if len(parts) != 2 { 1552 t.Fatalf("malformed table spec: %s", s) 1553 } 1554 fileNum++ 1555 m := (&fileMetadata{ 1556 FileNum: fileNum, 1557 }).ExtendPointKeyBounds( 1558 cmp, 1559 InternalKey{UserKey: []byte(parts[0])}, 1560 InternalKey{UserKey: []byte(parts[1])}, 1561 ) 1562 m.InitPhysicalBacking() 1563 return m 1564 } 1565 1566 datadriven.RunTest(t, "testdata/compaction_find_grandparent_limit", 1567 func(t *testing.T, d *datadriven.TestData) string { 1568 switch d.Cmd { 1569 case "define": 1570 grandparents = nil 1571 if len(d.Input) == 0 { 1572 return "" 1573 } 1574 for _, data := range strings.Split(d.Input, "\n") { 1575 parts := strings.Fields(data) 1576 if len(parts) != 2 { 1577 return fmt.Sprintf("malformed test:\n%s", d.Input) 1578 } 1579 1580 meta := parseMeta(parts[0]) 1581 var err error 1582 meta.Size, err = strconv.ParseUint(parts[1], 10, 64) 1583 if err != nil { 1584 return err.Error() 1585 } 1586 grandparents = append(grandparents, meta) 1587 } 1588 return "" 1589 1590 case "compact": 1591 c := &compaction{ 1592 cmp: cmp, 1593 equal: DefaultComparer.Equal, 1594 comparer: DefaultComparer, 1595 grandparents: manifest.NewLevelSliceKeySorted(cmp, grandparents), 1596 } 1597 if len(d.CmdArgs) != 1 { 1598 return fmt.Sprintf("%s expects 1 argument", d.Cmd) 1599 } 1600 if len(d.CmdArgs[0].Vals) != 1 { 1601 return fmt.Sprintf("%s expects 1 value", d.CmdArgs[0].Key) 1602 } 1603 var err error 1604 c.maxOverlapBytes, err = strconv.ParseUint(d.CmdArgs[0].Vals[0], 10, 64) 1605 if err != nil { 1606 return err.Error() 1607 } 1608 1609 var buf bytes.Buffer 1610 var smallest, largest string 1611 var grandparentLimit []byte 1612 for i, key := range strings.Fields(d.Input) { 1613 if i == 0 { 1614 smallest = key 1615 grandparentLimit = c.findGrandparentLimit([]byte(key)) 1616 } 1617 if grandparentLimit != nil && c.cmp(grandparentLimit, []byte(key)) < 0 { 1618 fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) 1619 smallest = key 1620 grandparentLimit = c.findGrandparentLimit([]byte(key)) 1621 } 1622 largest = key 1623 } 1624 fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) 1625 return buf.String() 1626 1627 default: 1628 return fmt.Sprintf("unknown command: %s", d.Cmd) 1629 } 1630 }) 1631 } 1632 1633 func TestCompactionFindL0Limit(t *testing.T) { 1634 cmp := DefaultComparer.Compare 1635 1636 fileNumCounter := 1 1637 parseMeta := func(s string) (*fileMetadata, error) { 1638 fields := strings.Fields(s) 1639 parts := strings.Split(fields[0], "-") 1640 if len(parts) != 2 { 1641 return nil, errors.Errorf("malformed table spec: %s", s) 1642 } 1643 m := (&fileMetadata{ 1644 FileNum: base.FileNum(fileNumCounter), 1645 }).ExtendPointKeyBounds( 1646 cmp, 1647 base.ParseInternalKey(strings.TrimSpace(parts[0])), 1648 base.ParseInternalKey(strings.TrimSpace(parts[1])), 1649 ) 1650 fileNumCounter++ 1651 m.SmallestSeqNum = m.Smallest.SeqNum() 1652 m.LargestSeqNum = m.Largest.SeqNum() 1653 1654 for _, field := range fields[1:] { 1655 parts := strings.Split(field, "=") 1656 switch parts[0] { 1657 case "size": 1658 size, err := strconv.ParseUint(parts[1], 10, 64) 1659 if err != nil { 1660 t.Fatal(err) 1661 } 1662 m.Size = size 1663 } 1664 } 1665 m.InitPhysicalBacking() 1666 return m, nil 1667 } 1668 1669 var vers *version 1670 flushSplitBytes := int64(0) 1671 1672 datadriven.RunTest(t, "testdata/compaction_find_l0_limit", 1673 func(t *testing.T, d *datadriven.TestData) string { 1674 switch d.Cmd { 1675 case "define": 1676 fileMetas := [manifest.NumLevels][]*fileMetadata{} 1677 baseLevel := manifest.NumLevels - 1 1678 level := 0 1679 d.MaybeScanArgs(t, "flush_split_bytes", &flushSplitBytes) 1680 1681 var err error 1682 for _, data := range strings.Split(d.Input, "\n") { 1683 data = strings.TrimSpace(data) 1684 switch data { 1685 case "L0", "L1", "L2", "L3", "L4", "L5", "L6": 1686 level, err = strconv.Atoi(data[1:]) 1687 if err != nil { 1688 return err.Error() 1689 } 1690 default: 1691 meta, err := parseMeta(data) 1692 if err != nil { 1693 return err.Error() 1694 } 1695 if level != 0 && level < baseLevel { 1696 baseLevel = level 1697 } 1698 fileMetas[level] = append(fileMetas[level], meta) 1699 } 1700 } 1701 1702 vers = manifest.NewVersion(DefaultComparer.Compare, base.DefaultFormatter, flushSplitBytes, fileMetas) 1703 flushSplitKeys := vers.L0Sublevels.FlushSplitKeys() 1704 1705 var buf strings.Builder 1706 buf.WriteString(vers.String()) 1707 buf.WriteString("flush split keys:\n") 1708 for _, key := range flushSplitKeys { 1709 fmt.Fprintf(&buf, "\t%s\n", base.DefaultFormatter(key)) 1710 } 1711 1712 return buf.String() 1713 1714 case "flush": 1715 c := &compaction{ 1716 cmp: cmp, 1717 equal: DefaultComparer.Equal, 1718 comparer: DefaultComparer, 1719 version: vers, 1720 l0Limits: vers.L0Sublevels.FlushSplitKeys(), 1721 inputs: []compactionLevel{{level: -1}, {level: 0}}, 1722 } 1723 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 1724 1725 var buf bytes.Buffer 1726 var smallest, largest string 1727 var l0Limit []byte 1728 for i, key := range strings.Fields(d.Input) { 1729 if i == 0 { 1730 smallest = key 1731 l0Limit = c.findL0Limit([]byte(key)) 1732 } 1733 if l0Limit != nil && c.cmp(l0Limit, []byte(key)) < 0 { 1734 fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) 1735 smallest = key 1736 l0Limit = c.findL0Limit([]byte(key)) 1737 } 1738 largest = key 1739 } 1740 fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) 1741 return buf.String() 1742 1743 default: 1744 return fmt.Sprintf("unknown command: %s", d.Cmd) 1745 } 1746 }) 1747 } 1748 1749 func TestCompactionOutputLevel(t *testing.T) { 1750 opts := (*Options)(nil).EnsureDefaults() 1751 version := &version{} 1752 1753 datadriven.RunTest(t, "testdata/compaction_output_level", 1754 func(t *testing.T, d *datadriven.TestData) (res string) { 1755 defer func() { 1756 if r := recover(); r != nil { 1757 res = fmt.Sprintln(r) 1758 } 1759 }() 1760 1761 switch d.Cmd { 1762 case "compact": 1763 var start, base int 1764 d.ScanArgs(t, "start", &start) 1765 d.ScanArgs(t, "base", &base) 1766 pc := newPickedCompaction(opts, version, start, defaultOutputLevel(start, base), base) 1767 c := newCompaction(pc, opts, time.Now(), nil /* provider */) 1768 return fmt.Sprintf("output=%d\nmax-output-file-size=%d\n", 1769 c.outputLevel.level, c.maxOutputFileSize) 1770 1771 default: 1772 return fmt.Sprintf("unknown command: %s", d.Cmd) 1773 } 1774 }) 1775 } 1776 1777 func TestCompactionAtomicUnitBounds(t *testing.T) { 1778 cmp := DefaultComparer.Compare 1779 var files manifest.LevelSlice 1780 1781 parseMeta := func(s string) *fileMetadata { 1782 parts := strings.Split(s, "-") 1783 if len(parts) != 2 { 1784 t.Fatalf("malformed table spec: %s", s) 1785 } 1786 m := (&fileMetadata{}).ExtendPointKeyBounds( 1787 cmp, 1788 base.ParseInternalKey(parts[0]), 1789 base.ParseInternalKey(parts[1]), 1790 ) 1791 m.InitPhysicalBacking() 1792 return m 1793 } 1794 1795 datadriven.RunTest(t, "testdata/compaction_atomic_unit_bounds", 1796 func(t *testing.T, d *datadriven.TestData) string { 1797 switch d.Cmd { 1798 case "define": 1799 files = manifest.LevelSlice{} 1800 if len(d.Input) == 0 { 1801 return "" 1802 } 1803 var ff []*fileMetadata 1804 for _, data := range strings.Split(d.Input, "\n") { 1805 meta := parseMeta(data) 1806 meta.FileNum = FileNum(len(ff)) 1807 ff = append(ff, meta) 1808 } 1809 files = manifest.NewLevelSliceKeySorted(cmp, ff) 1810 return "" 1811 1812 case "atomic-unit-bounds": 1813 c := &compaction{ 1814 cmp: cmp, 1815 equal: DefaultComparer.Equal, 1816 comparer: DefaultComparer, 1817 inputs: []compactionLevel{{files: files}, {}}, 1818 } 1819 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 1820 if len(d.CmdArgs) != 1 { 1821 return fmt.Sprintf("%s expects 1 argument", d.Cmd) 1822 } 1823 index, err := strconv.ParseInt(d.CmdArgs[0].String(), 10, 64) 1824 if err != nil { 1825 return err.Error() 1826 } 1827 iter := files.Iter() 1828 // Advance iter to `index`. 1829 _ = iter.First() 1830 for i := int64(0); i < index; i++ { 1831 _ = iter.Next() 1832 } 1833 atomicUnit, _ := expandToAtomicUnit(c.cmp, iter.Take().Slice(), true /* disableIsCompacting */) 1834 lower, upper := manifest.KeyRange(c.cmp, atomicUnit.Iter()) 1835 return fmt.Sprintf("%s-%s\n", lower.UserKey, upper.UserKey) 1836 1837 default: 1838 return fmt.Sprintf("unknown command: %s", d.Cmd) 1839 } 1840 }) 1841 } 1842 1843 func TestCompactionDeleteOnlyHints(t *testing.T) { 1844 parseUint64 := func(s string) uint64 { 1845 v, err := strconv.ParseUint(s, 10, 64) 1846 require.NoError(t, err) 1847 return v 1848 } 1849 var d *DB 1850 defer func() { 1851 if d != nil { 1852 require.NoError(t, closeAllSnapshots(d)) 1853 require.NoError(t, d.Close()) 1854 } 1855 }() 1856 1857 var compactInfo *CompactionInfo // protected by d.mu 1858 reset := func() (*Options, error) { 1859 if d != nil { 1860 compactInfo = nil 1861 if err := closeAllSnapshots(d); err != nil { 1862 return nil, err 1863 } 1864 if err := d.Close(); err != nil { 1865 return nil, err 1866 } 1867 } 1868 opts := (&Options{ 1869 FS: vfs.NewMem(), 1870 DebugCheck: DebugCheckLevels, 1871 EventListener: &EventListener{ 1872 CompactionEnd: func(info CompactionInfo) { 1873 if compactInfo != nil { 1874 return 1875 } 1876 compactInfo = &info 1877 }, 1878 }, 1879 FormatMajorVersion: internalFormatNewest, 1880 }).WithFSDefaults() 1881 1882 // Collection of table stats can trigger compactions. As we want full 1883 // control over when compactions are run, disable stats by default. 1884 opts.private.disableTableStats = true 1885 1886 return opts, nil 1887 } 1888 1889 compactionString := func() string { 1890 for d.mu.compact.compactingCount > 0 { 1891 d.mu.compact.cond.Wait() 1892 } 1893 1894 s := "(none)" 1895 if compactInfo != nil { 1896 // Fix the job ID and durations for determinism. 1897 compactInfo.JobID = 100 1898 compactInfo.Duration = time.Second 1899 compactInfo.TotalDuration = 2 * time.Second 1900 s = compactInfo.String() 1901 compactInfo = nil 1902 } 1903 return s 1904 } 1905 1906 var err error 1907 var opts *Options 1908 datadriven.RunTest(t, "testdata/compaction_delete_only_hints", 1909 func(t *testing.T, td *datadriven.TestData) string { 1910 switch td.Cmd { 1911 case "define": 1912 opts, err = reset() 1913 if err != nil { 1914 return err.Error() 1915 } 1916 d, err = runDBDefineCmd(td, opts) 1917 if err != nil { 1918 return err.Error() 1919 } 1920 d.mu.Lock() 1921 s := d.mu.versions.currentVersion().String() 1922 d.mu.Unlock() 1923 return s 1924 1925 case "force-set-hints": 1926 d.mu.Lock() 1927 defer d.mu.Unlock() 1928 d.mu.compact.deletionHints = d.mu.compact.deletionHints[:0] 1929 var buf bytes.Buffer 1930 for _, data := range strings.Split(td.Input, "\n") { 1931 parts := strings.FieldsFunc(strings.TrimSpace(data), 1932 func(r rune) bool { return r == '-' || r == ' ' || r == '.' }) 1933 1934 start, end := []byte(parts[2]), []byte(parts[3]) 1935 1936 var tombstoneFile *fileMetadata 1937 tombstoneLevel := int(parseUint64(parts[0][1:])) 1938 1939 // Set file number to the value provided in the input. 1940 tombstoneFile = &fileMetadata{ 1941 FileNum: base.FileNum(parseUint64(parts[1])), 1942 } 1943 1944 var hintType deleteCompactionHintType 1945 switch typ := parts[7]; typ { 1946 case "point_key_only": 1947 hintType = deleteCompactionHintTypePointKeyOnly 1948 case "range_key_only": 1949 hintType = deleteCompactionHintTypeRangeKeyOnly 1950 case "point_and_range_key": 1951 hintType = deleteCompactionHintTypePointAndRangeKey 1952 default: 1953 return fmt.Sprintf("unknown hint type: %s", typ) 1954 } 1955 1956 h := deleteCompactionHint{ 1957 hintType: hintType, 1958 start: start, 1959 end: end, 1960 fileSmallestSeqNum: parseUint64(parts[4]), 1961 tombstoneLevel: tombstoneLevel, 1962 tombstoneFile: tombstoneFile, 1963 tombstoneSmallestSeqNum: parseUint64(parts[5]), 1964 tombstoneLargestSeqNum: parseUint64(parts[6]), 1965 } 1966 d.mu.compact.deletionHints = append(d.mu.compact.deletionHints, h) 1967 fmt.Fprintln(&buf, h.String()) 1968 } 1969 return buf.String() 1970 1971 case "get-hints": 1972 d.mu.Lock() 1973 defer d.mu.Unlock() 1974 1975 // Force collection of table stats. This requires re-enabling the 1976 // collection flag. We also do not want compactions to run as part of 1977 // the stats collection job, so we disable it temporarily. 1978 d.opts.private.disableTableStats = false 1979 d.opts.DisableAutomaticCompactions = true 1980 defer func() { 1981 d.opts.private.disableTableStats = true 1982 d.opts.DisableAutomaticCompactions = false 1983 }() 1984 1985 // NB: collectTableStats attempts to acquire the lock. Temporarily 1986 // unlock here to avoid a deadlock. 1987 d.mu.Unlock() 1988 didRun := d.collectTableStats() 1989 d.mu.Lock() 1990 1991 if !didRun { 1992 // If a job was already running, wait for the results. 1993 d.waitTableStats() 1994 } 1995 1996 hints := d.mu.compact.deletionHints 1997 if len(hints) == 0 { 1998 return "(none)" 1999 } 2000 var buf bytes.Buffer 2001 for _, h := range hints { 2002 buf.WriteString(h.String() + "\n") 2003 } 2004 return buf.String() 2005 2006 case "maybe-compact": 2007 d.mu.Lock() 2008 d.maybeScheduleCompaction() 2009 2010 var buf bytes.Buffer 2011 fmt.Fprintf(&buf, "Deletion hints:\n") 2012 for _, h := range d.mu.compact.deletionHints { 2013 fmt.Fprintf(&buf, " %s\n", h.String()) 2014 } 2015 if len(d.mu.compact.deletionHints) == 0 { 2016 fmt.Fprintf(&buf, " (none)\n") 2017 } 2018 fmt.Fprintf(&buf, "Compactions:\n") 2019 fmt.Fprintf(&buf, " %s", compactionString()) 2020 d.mu.Unlock() 2021 return buf.String() 2022 2023 case "compact": 2024 if err := runCompactCmd(td, d); err != nil { 2025 return err.Error() 2026 } 2027 d.mu.Lock() 2028 compactInfo = nil 2029 s := d.mu.versions.currentVersion().String() 2030 d.mu.Unlock() 2031 return s 2032 2033 case "close-snapshot": 2034 seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64) 2035 if err != nil { 2036 return err.Error() 2037 } 2038 d.mu.Lock() 2039 var s *Snapshot 2040 l := &d.mu.snapshots 2041 for i := l.root.next; i != &l.root; i = i.next { 2042 if i.seqNum == seqNum { 2043 s = i 2044 } 2045 } 2046 d.mu.Unlock() 2047 if s == nil { 2048 return "(not found)" 2049 } else if err := s.Close(); err != nil { 2050 return err.Error() 2051 } 2052 2053 d.mu.Lock() 2054 // Closing the snapshot may have triggered a compaction. 2055 str := compactionString() 2056 d.mu.Unlock() 2057 return str 2058 2059 case "iter": 2060 snap := Snapshot{ 2061 db: d, 2062 seqNum: InternalKeySeqNumMax, 2063 } 2064 iter, _ := snap.NewIter(nil) 2065 return runIterCmd(td, iter, true) 2066 2067 case "reset": 2068 opts, err = reset() 2069 if err != nil { 2070 return err.Error() 2071 } 2072 d, err = Open("", opts) 2073 if err != nil { 2074 return err.Error() 2075 } 2076 return "" 2077 2078 case "ingest": 2079 if err = runBuildCmd(td, d, d.opts.FS); err != nil { 2080 return err.Error() 2081 } 2082 if err = runIngestCmd(td, d, d.opts.FS); err != nil { 2083 return err.Error() 2084 } 2085 return "OK" 2086 2087 case "describe-lsm": 2088 d.mu.Lock() 2089 s := d.mu.versions.currentVersion().String() 2090 d.mu.Unlock() 2091 return s 2092 2093 default: 2094 return fmt.Sprintf("unknown command: %s", td.Cmd) 2095 } 2096 }) 2097 } 2098 2099 func TestCompactionTombstones(t *testing.T) { 2100 var d *DB 2101 defer func() { 2102 if d != nil { 2103 require.NoError(t, closeAllSnapshots(d)) 2104 require.NoError(t, d.Close()) 2105 } 2106 }() 2107 2108 var compactInfo *CompactionInfo // protected by d.mu 2109 2110 compactionString := func() string { 2111 for d.mu.compact.compactingCount > 0 { 2112 d.mu.compact.cond.Wait() 2113 } 2114 2115 s := "(none)" 2116 if compactInfo != nil { 2117 // Fix the job ID and durations for determinism. 2118 compactInfo.JobID = 100 2119 compactInfo.Duration = time.Second 2120 compactInfo.TotalDuration = 2 * time.Second 2121 s = compactInfo.String() 2122 compactInfo = nil 2123 } 2124 return s 2125 } 2126 2127 datadriven.RunTest(t, "testdata/compaction_tombstones", 2128 func(t *testing.T, td *datadriven.TestData) string { 2129 switch td.Cmd { 2130 case "define": 2131 if d != nil { 2132 compactInfo = nil 2133 require.NoError(t, closeAllSnapshots(d)) 2134 if err := d.Close(); err != nil { 2135 return err.Error() 2136 } 2137 } 2138 opts := (&Options{ 2139 FS: vfs.NewMem(), 2140 DebugCheck: DebugCheckLevels, 2141 EventListener: &EventListener{ 2142 CompactionEnd: func(info CompactionInfo) { 2143 compactInfo = &info 2144 }, 2145 }, 2146 FormatMajorVersion: internalFormatNewest, 2147 }).WithFSDefaults() 2148 var err error 2149 d, err = runDBDefineCmd(td, opts) 2150 if err != nil { 2151 return err.Error() 2152 } 2153 d.mu.Lock() 2154 s := d.mu.versions.currentVersion().String() 2155 d.mu.Unlock() 2156 return s 2157 2158 case "maybe-compact": 2159 d.mu.Lock() 2160 d.opts.DisableAutomaticCompactions = false 2161 d.maybeScheduleCompaction() 2162 s := compactionString() 2163 d.mu.Unlock() 2164 return s 2165 2166 case "wait-pending-table-stats": 2167 return runTableStatsCmd(td, d) 2168 2169 case "close-snapshot": 2170 seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64) 2171 if err != nil { 2172 return err.Error() 2173 } 2174 d.mu.Lock() 2175 var s *Snapshot 2176 l := &d.mu.snapshots 2177 for i := l.root.next; i != &l.root; i = i.next { 2178 if i.seqNum == seqNum { 2179 s = i 2180 } 2181 } 2182 d.mu.Unlock() 2183 if s == nil { 2184 return "(not found)" 2185 } else if err := s.Close(); err != nil { 2186 return err.Error() 2187 } 2188 2189 d.mu.Lock() 2190 // Closing the snapshot may have triggered a compaction. 2191 str := compactionString() 2192 d.mu.Unlock() 2193 return str 2194 2195 case "close": 2196 if err := d.Close(); err != nil { 2197 return err.Error() 2198 } 2199 d = nil 2200 return "" 2201 2202 case "version": 2203 d.mu.Lock() 2204 s := d.mu.versions.currentVersion().String() 2205 d.mu.Unlock() 2206 return s 2207 2208 default: 2209 return fmt.Sprintf("unknown command: %s", td.Cmd) 2210 } 2211 }) 2212 } 2213 2214 func closeAllSnapshots(d *DB) error { 2215 d.mu.Lock() 2216 var ss []*Snapshot 2217 l := &d.mu.snapshots 2218 for i := l.root.next; i != &l.root; i = i.next { 2219 ss = append(ss, i) 2220 } 2221 d.mu.Unlock() 2222 for i := range ss { 2223 if err := ss[i].Close(); err != nil { 2224 return err 2225 } 2226 } 2227 return nil 2228 } 2229 2230 func TestCompactionReadTriggeredQueue(t *testing.T) { 2231 2232 // Convert a read compaction to a string which this test 2233 // understands. 2234 showRC := func(rc *readCompaction) string { 2235 return fmt.Sprintf( 2236 "L%d: %s-%s %d\n", rc.level, string(rc.start), string(rc.end), rc.fileNum, 2237 ) 2238 } 2239 2240 var queue *readCompactionQueue 2241 2242 datadriven.RunTest(t, "testdata/read_compaction_queue", 2243 func(t *testing.T, td *datadriven.TestData) string { 2244 switch td.Cmd { 2245 case "create": 2246 queue = &readCompactionQueue{} 2247 return "(success)" 2248 case "add-compaction": 2249 for _, line := range strings.Split(td.Input, "\n") { 2250 if line == "" { 2251 continue 2252 } 2253 parts := strings.Split(line, " ") 2254 2255 if len(parts) != 3 { 2256 return "error: malformed data for add-compaction. usage: <level>: <start>-<end> <filenum>" 2257 } 2258 if l, err := strconv.Atoi(parts[0][1:2]); err == nil { 2259 keys := strings.Split(parts[1], "-") 2260 fileNum, _ := strconv.Atoi(parts[2]) 2261 rc := readCompaction{ 2262 level: l, 2263 start: []byte(keys[0]), 2264 end: []byte(keys[1]), 2265 fileNum: base.FileNum(fileNum), 2266 } 2267 queue.add(&rc, DefaultComparer.Compare) 2268 } else { 2269 return err.Error() 2270 } 2271 } 2272 return "" 2273 case "remove-compaction": 2274 rc := queue.remove() 2275 if rc == nil { 2276 return "(nil)" 2277 } 2278 return showRC(rc) 2279 case "print-size": 2280 // Print the size of the queue. 2281 return fmt.Sprintf("%d", queue.size) 2282 case "print-queue": 2283 // Print each element of the queue on a separate line. 2284 var sb strings.Builder 2285 if queue.size == 0 { 2286 sb.WriteString("(empty)") 2287 } 2288 2289 for i := 0; i < queue.size; i++ { 2290 rc := queue.at(i) 2291 sb.WriteString(showRC(rc)) 2292 } 2293 return sb.String() 2294 default: 2295 return fmt.Sprintf("unknown command: %s", td.Cmd) 2296 } 2297 }, 2298 ) 2299 } 2300 2301 func (qu *readCompactionQueue) at(i int) *readCompaction { 2302 if i >= qu.size { 2303 return nil 2304 } 2305 2306 return qu.queue[i] 2307 } 2308 2309 func TestCompactionReadTriggered(t *testing.T) { 2310 var d *DB 2311 defer func() { 2312 if d != nil { 2313 require.NoError(t, d.Close()) 2314 } 2315 }() 2316 2317 var compactInfo *CompactionInfo // protected by d.mu 2318 2319 compactionString := func() string { 2320 for d.mu.compact.compactingCount > 0 { 2321 d.mu.compact.cond.Wait() 2322 } 2323 2324 s := "(none)" 2325 if compactInfo != nil { 2326 // Fix the job ID and durations for determinism. 2327 compactInfo.JobID = 100 2328 compactInfo.Duration = time.Second 2329 compactInfo.TotalDuration = 2 * time.Second 2330 s = compactInfo.String() 2331 compactInfo = nil 2332 } 2333 return s 2334 } 2335 2336 datadriven.RunTest(t, "testdata/compaction_read_triggered", 2337 func(t *testing.T, td *datadriven.TestData) string { 2338 switch td.Cmd { 2339 case "define": 2340 if d != nil { 2341 compactInfo = nil 2342 if err := d.Close(); err != nil { 2343 return err.Error() 2344 } 2345 } 2346 opts := (&Options{ 2347 FS: vfs.NewMem(), 2348 DebugCheck: DebugCheckLevels, 2349 EventListener: &EventListener{ 2350 CompactionEnd: func(info CompactionInfo) { 2351 compactInfo = &info 2352 }, 2353 }, 2354 }).WithFSDefaults() 2355 var err error 2356 d, err = runDBDefineCmd(td, opts) 2357 if err != nil { 2358 return err.Error() 2359 } 2360 d.mu.Lock() 2361 s := d.mu.versions.currentVersion().String() 2362 d.mu.Unlock() 2363 return s 2364 2365 case "add-read-compaction": 2366 d.mu.Lock() 2367 td.MaybeScanArgs(t, "flushing", &d.mu.compact.flushing) 2368 for _, line := range strings.Split(td.Input, "\n") { 2369 if line == "" { 2370 continue 2371 } 2372 parts := strings.Split(line, " ") 2373 if len(parts) != 3 { 2374 return "error: malformed data for add-read-compaction. usage: <level>: <start>-<end> <filenum>" 2375 } 2376 if l, err := strconv.Atoi(parts[0][:1]); err == nil { 2377 keys := strings.Split(parts[1], "-") 2378 fileNum, _ := strconv.Atoi(parts[2]) 2379 rc := readCompaction{ 2380 level: l, 2381 start: []byte(keys[0]), 2382 end: []byte(keys[1]), 2383 fileNum: base.FileNum(fileNum), 2384 } 2385 d.mu.compact.readCompactions.add(&rc, DefaultComparer.Compare) 2386 } else { 2387 return err.Error() 2388 } 2389 } 2390 d.mu.Unlock() 2391 return "" 2392 2393 case "show-read-compactions": 2394 d.mu.Lock() 2395 var sb strings.Builder 2396 if d.mu.compact.readCompactions.size == 0 { 2397 sb.WriteString("(none)") 2398 } 2399 for i := 0; i < d.mu.compact.readCompactions.size; i++ { 2400 rc := d.mu.compact.readCompactions.at(i) 2401 sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) 2402 } 2403 d.mu.Unlock() 2404 return sb.String() 2405 2406 case "maybe-compact": 2407 d.mu.Lock() 2408 d.opts.DisableAutomaticCompactions = false 2409 d.maybeScheduleCompaction() 2410 s := compactionString() 2411 d.mu.Unlock() 2412 return s 2413 2414 case "version": 2415 d.mu.Lock() 2416 s := d.mu.versions.currentVersion().String() 2417 d.mu.Unlock() 2418 return s 2419 2420 default: 2421 return fmt.Sprintf("unknown command: %s", td.Cmd) 2422 } 2423 }) 2424 } 2425 2426 func TestCompactionInuseKeyRanges(t *testing.T) { 2427 cmp := DefaultComparer.Compare 2428 parseMeta := func(s string) *fileMetadata { 2429 parts := strings.Split(s, "-") 2430 if len(parts) != 2 { 2431 t.Fatalf("malformed table spec: %s", s) 2432 } 2433 m := (&fileMetadata{}).ExtendRangeKeyBounds( 2434 cmp, 2435 base.ParseInternalKey(strings.TrimSpace(parts[0])), 2436 base.ParseInternalKey(strings.TrimSpace(parts[1])), 2437 ) 2438 m.SmallestSeqNum = m.Smallest.SeqNum() 2439 m.LargestSeqNum = m.Largest.SeqNum() 2440 m.InitPhysicalBacking() 2441 return m 2442 } 2443 2444 opts := (*Options)(nil).EnsureDefaults() 2445 2446 var c *compaction 2447 datadriven.RunTest(t, "testdata/compaction_inuse_key_ranges", func(t *testing.T, td *datadriven.TestData) string { 2448 switch td.Cmd { 2449 case "define": 2450 c = &compaction{ 2451 cmp: DefaultComparer.Compare, 2452 equal: DefaultComparer.Equal, 2453 comparer: DefaultComparer, 2454 formatKey: DefaultComparer.FormatKey, 2455 inputs: []compactionLevel{{}, {}}, 2456 } 2457 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 2458 var files [numLevels][]*fileMetadata 2459 var currentLevel int 2460 fileNum := FileNum(1) 2461 2462 for _, data := range strings.Split(td.Input, "\n") { 2463 switch data { 2464 case "L0", "L1", "L2", "L3", "L4", "L5", "L6": 2465 level, err := strconv.Atoi(data[1:]) 2466 if err != nil { 2467 return err.Error() 2468 } 2469 currentLevel = level 2470 2471 default: 2472 meta := parseMeta(data) 2473 meta.FileNum = fileNum 2474 fileNum++ 2475 files[currentLevel] = append(files[currentLevel], meta) 2476 } 2477 } 2478 c.version = newVersion(opts, files) 2479 return c.version.String() 2480 2481 case "inuse-key-ranges": 2482 var buf bytes.Buffer 2483 for _, line := range strings.Split(td.Input, "\n") { 2484 parts := strings.Fields(line) 2485 if len(parts) != 3 { 2486 fmt.Fprintf(&buf, "expected <level> <smallest> <largest>: %q\n", line) 2487 continue 2488 } 2489 level, err := strconv.Atoi(parts[0]) 2490 if err != nil { 2491 fmt.Fprintf(&buf, "expected <level> <smallest> <largest>: %q: %v\n", line, err) 2492 continue 2493 } 2494 c.outputLevel.level = level 2495 c.smallest.UserKey = []byte(parts[1]) 2496 c.largest.UserKey = []byte(parts[2]) 2497 2498 c.inuseKeyRanges = nil 2499 c.setupInuseKeyRanges() 2500 if len(c.inuseKeyRanges) == 0 { 2501 fmt.Fprintf(&buf, ".\n") 2502 } else { 2503 for i, r := range c.inuseKeyRanges { 2504 if i > 0 { 2505 fmt.Fprintf(&buf, " ") 2506 } 2507 fmt.Fprintf(&buf, "%s-%s", r.Start, r.End) 2508 } 2509 fmt.Fprintf(&buf, "\n") 2510 } 2511 } 2512 return buf.String() 2513 2514 default: 2515 return fmt.Sprintf("unknown command: %s", td.Cmd) 2516 } 2517 }) 2518 } 2519 2520 func TestCompactionInuseKeyRangesRandomized(t *testing.T) { 2521 var ( 2522 fileNum = FileNum(0) 2523 opts = (*Options)(nil).EnsureDefaults() 2524 seed = int64(time.Now().UnixNano()) 2525 rng = rand.New(rand.NewSource(seed)) 2526 endKeyspace = 26 * 26 2527 ) 2528 t.Logf("Using rng seed %d.", seed) 2529 2530 for iter := 0; iter < 100; iter++ { 2531 makeUserKey := func(i int) []byte { 2532 if i >= endKeyspace { 2533 i = endKeyspace - 1 2534 } 2535 return []byte{byte(i/26 + 'a'), byte(i%26 + 'a')} 2536 } 2537 makeIK := func(level, i int) InternalKey { 2538 return base.MakeInternalKey( 2539 makeUserKey(i), 2540 uint64(numLevels-level), 2541 base.InternalKeyKindSet, 2542 ) 2543 } 2544 makeFile := func(level, start, end int) *fileMetadata { 2545 fileNum++ 2546 m := (&fileMetadata{ 2547 FileNum: fileNum, 2548 }).ExtendPointKeyBounds( 2549 opts.Comparer.Compare, 2550 makeIK(level, start), 2551 makeIK(level, end), 2552 ) 2553 m.SmallestSeqNum = m.Smallest.SeqNum() 2554 m.LargestSeqNum = m.Largest.SeqNum() 2555 m.InitPhysicalBacking() 2556 return m 2557 } 2558 overlaps := func(startA, endA, startB, endB []byte) bool { 2559 disjoint := opts.Comparer.Compare(endB, startA) < 0 || opts.Comparer.Compare(endA, startB) < 0 2560 return !disjoint 2561 } 2562 var files [numLevels][]*fileMetadata 2563 for l := 0; l < numLevels; l++ { 2564 for i := 0; i < rand.Intn(10); i++ { 2565 s := rng.Intn(endKeyspace) 2566 maxWidth := rng.Intn(endKeyspace-s) + 1 2567 e := rng.Intn(maxWidth) + s 2568 sKey, eKey := makeUserKey(s), makeUserKey(e) 2569 // Discard the key range if it overlaps any existing files 2570 // within this level. 2571 var o bool 2572 for _, f := range files[l] { 2573 o = o || overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey) 2574 } 2575 if o { 2576 continue 2577 } 2578 files[l] = append(files[l], makeFile(l, s, e)) 2579 } 2580 slices.SortFunc(files[l], func(a, b *fileMetadata) int { 2581 return opts.Comparer.Compare(a.Smallest.UserKey, b.Smallest.UserKey) 2582 }) 2583 } 2584 v := newVersion(opts, files) 2585 t.Log(v.DebugString(opts.Comparer.FormatKey)) 2586 for i := 0; i < 1000; i++ { 2587 l := rng.Intn(numLevels) 2588 s := rng.Intn(endKeyspace) 2589 maxWidth := rng.Intn(endKeyspace-s) + 1 2590 e := rng.Intn(maxWidth) + s 2591 sKey, eKey := makeUserKey(s), makeUserKey(e) 2592 keyRanges := calculateInuseKeyRanges(v, opts.Comparer.Compare, l, numLevels-1, sKey, eKey) 2593 2594 for level := l; level < numLevels; level++ { 2595 for _, f := range files[level] { 2596 if !overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey) { 2597 // This file doesn't overlap the queried range. Skip it. 2598 continue 2599 } 2600 // This file does overlap the queried range. The key range 2601 // [MAX(f.Smallest, sKey), MIN(f.Largest, eKey)] must be fully 2602 // contained by a key range in keyRanges. 2603 checkStart, checkEnd := f.Smallest.UserKey, f.Largest.UserKey 2604 if opts.Comparer.Compare(checkStart, sKey) < 0 { 2605 checkStart = sKey 2606 } 2607 if opts.Comparer.Compare(checkEnd, eKey) > 0 { 2608 checkEnd = eKey 2609 } 2610 var contained bool 2611 for _, kr := range keyRanges { 2612 contained = contained || 2613 (opts.Comparer.Compare(checkStart, kr.Start) >= 0 && 2614 opts.Comparer.Compare(checkEnd, kr.End) <= 0) 2615 } 2616 if !contained { 2617 t.Errorf("Seed %d, iter %d: File %s overlaps %q-%q, but is not fully contained in any of the key ranges.", 2618 seed, iter, f, sKey, eKey) 2619 } 2620 } 2621 } 2622 } 2623 } 2624 } 2625 2626 func TestCompactionAllowZeroSeqNum(t *testing.T) { 2627 var d *DB 2628 defer func() { 2629 if d != nil { 2630 require.NoError(t, closeAllSnapshots(d)) 2631 require.NoError(t, d.Close()) 2632 } 2633 }() 2634 2635 metaRE := regexp.MustCompile(`^L([0-9]+):([^-]+)-(.+)$`) 2636 var fileNum base.FileNum 2637 parseMeta := func(s string) (level int, meta *fileMetadata) { 2638 match := metaRE.FindStringSubmatch(s) 2639 if match == nil { 2640 t.Fatalf("malformed table spec: %s", s) 2641 } 2642 level, err := strconv.Atoi(match[1]) 2643 if err != nil { 2644 t.Fatalf("malformed table spec: %s: %s", s, err) 2645 } 2646 fileNum++ 2647 meta = (&fileMetadata{ 2648 FileNum: fileNum, 2649 }).ExtendPointKeyBounds( 2650 d.cmp, 2651 InternalKey{UserKey: []byte(match[2])}, 2652 InternalKey{UserKey: []byte(match[3])}, 2653 ) 2654 meta.InitPhysicalBacking() 2655 return level, meta 2656 } 2657 2658 datadriven.RunTest(t, "testdata/compaction_allow_zero_seqnum", 2659 func(t *testing.T, td *datadriven.TestData) string { 2660 switch td.Cmd { 2661 case "define": 2662 if d != nil { 2663 require.NoError(t, closeAllSnapshots(d)) 2664 if err := d.Close(); err != nil { 2665 return err.Error() 2666 } 2667 } 2668 2669 var err error 2670 if d, err = runDBDefineCmd(td, nil /* options */); err != nil { 2671 return err.Error() 2672 } 2673 2674 d.mu.Lock() 2675 s := d.mu.versions.currentVersion().String() 2676 d.mu.Unlock() 2677 return s 2678 2679 case "allow-zero-seqnum": 2680 d.mu.Lock() 2681 c := &compaction{ 2682 cmp: d.cmp, 2683 comparer: d.opts.Comparer, 2684 version: d.mu.versions.currentVersion(), 2685 inputs: []compactionLevel{{}, {}}, 2686 } 2687 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 2688 d.mu.Unlock() 2689 2690 var buf bytes.Buffer 2691 for _, line := range strings.Split(td.Input, "\n") { 2692 parts := strings.Fields(line) 2693 if len(parts) == 0 { 2694 continue 2695 } 2696 c.flushing = nil 2697 c.startLevel.level = -1 2698 2699 var startFiles, outputFiles []*fileMetadata 2700 2701 switch { 2702 case len(parts) == 1 && parts[0] == "flush": 2703 c.outputLevel.level = 0 2704 d.mu.Lock() 2705 c.flushing = d.mu.mem.queue 2706 d.mu.Unlock() 2707 2708 default: 2709 for _, p := range parts { 2710 level, meta := parseMeta(p) 2711 if c.startLevel.level == -1 { 2712 c.startLevel.level = level 2713 } 2714 2715 switch level { 2716 case c.startLevel.level: 2717 startFiles = append(startFiles, meta) 2718 case c.startLevel.level + 1: 2719 outputFiles = append(outputFiles, meta) 2720 default: 2721 return fmt.Sprintf("invalid level %d: expected %d or %d", 2722 level, c.startLevel.level, c.startLevel.level+1) 2723 } 2724 } 2725 c.outputLevel.level = c.startLevel.level + 1 2726 c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles) 2727 c.outputLevel.files = manifest.NewLevelSliceKeySorted(c.cmp, outputFiles) 2728 } 2729 2730 c.smallest, c.largest = manifest.KeyRange(c.cmp, 2731 c.startLevel.files.Iter(), 2732 c.outputLevel.files.Iter()) 2733 2734 c.inuseKeyRanges = nil 2735 c.setupInuseKeyRanges() 2736 fmt.Fprintf(&buf, "%t\n", c.allowZeroSeqNum()) 2737 } 2738 return buf.String() 2739 2740 default: 2741 return fmt.Sprintf("unknown command: %s", td.Cmd) 2742 } 2743 }) 2744 } 2745 2746 func TestCompactionErrorOnUserKeyOverlap(t *testing.T) { 2747 cmp := DefaultComparer.Compare 2748 parseMeta := func(s string) *fileMetadata { 2749 parts := strings.Split(s, "-") 2750 if len(parts) != 2 { 2751 t.Fatalf("malformed table spec: %s", s) 2752 } 2753 m := (&fileMetadata{}).ExtendPointKeyBounds( 2754 cmp, 2755 base.ParseInternalKey(strings.TrimSpace(parts[0])), 2756 base.ParseInternalKey(strings.TrimSpace(parts[1])), 2757 ) 2758 m.SmallestSeqNum = m.Smallest.SeqNum() 2759 m.LargestSeqNum = m.Largest.SeqNum() 2760 m.InitPhysicalBacking() 2761 return m 2762 } 2763 2764 datadriven.RunTest(t, "testdata/compaction_error_on_user_key_overlap", 2765 func(t *testing.T, d *datadriven.TestData) string { 2766 switch d.Cmd { 2767 case "error-on-user-key-overlap": 2768 c := &compaction{ 2769 cmp: DefaultComparer.Compare, 2770 comparer: DefaultComparer, 2771 formatKey: DefaultComparer.FormatKey, 2772 } 2773 var files []manifest.NewFileEntry 2774 fileNum := FileNum(1) 2775 2776 for _, data := range strings.Split(d.Input, "\n") { 2777 meta := parseMeta(data) 2778 meta.FileNum = fileNum 2779 fileNum++ 2780 files = append(files, manifest.NewFileEntry{Level: 1, Meta: meta}) 2781 } 2782 2783 result := "OK" 2784 ve := &versionEdit{ 2785 NewFiles: files, 2786 } 2787 if err := c.errorOnUserKeyOverlap(ve); err != nil { 2788 result = fmt.Sprint(err) 2789 } 2790 return result 2791 2792 default: 2793 return fmt.Sprintf("unknown command: %s", d.Cmd) 2794 } 2795 }) 2796 } 2797 2798 // TestCompactionErrorCleanup tests an error encountered during a compaction 2799 // after some output tables have been created. It ensures that the pending 2800 // output tables are removed from the filesystem. 2801 func TestCompactionErrorCleanup(t *testing.T) { 2802 // protected by d.mu 2803 var ( 2804 initialSetupDone bool 2805 tablesCreated []FileNum 2806 ) 2807 2808 mem := vfs.NewMem() 2809 ii := errorfs.OnIndex(math.MaxInt32) // start disabled 2810 opts := (&Options{ 2811 FS: errorfs.Wrap(mem, errorfs.ErrInjected.If(ii)), 2812 Levels: make([]LevelOptions, numLevels), 2813 EventListener: &EventListener{ 2814 TableCreated: func(info TableCreateInfo) { 2815 t.Log(info) 2816 2817 // If the initial setup is over, record tables created and 2818 // inject an error immediately after the second table is 2819 // created. 2820 if initialSetupDone { 2821 tablesCreated = append(tablesCreated, info.FileNum) 2822 if len(tablesCreated) >= 2 { 2823 ii.Store(0) 2824 } 2825 } 2826 }, 2827 }, 2828 }).WithFSDefaults() 2829 for i := range opts.Levels { 2830 opts.Levels[i].TargetFileSize = 1 2831 } 2832 opts.testingRandomized(t) 2833 d, err := Open("", opts) 2834 require.NoError(t, err) 2835 2836 ingest := func(keys ...string) { 2837 t.Helper() 2838 f, err := mem.Create("ext") 2839 require.NoError(t, err) 2840 2841 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 2842 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 2843 }) 2844 for _, k := range keys { 2845 require.NoError(t, w.Set([]byte(k), nil)) 2846 } 2847 require.NoError(t, w.Close()) 2848 require.NoError(t, d.Ingest([]string{"ext"})) 2849 } 2850 ingest("a", "c") 2851 ingest("b") 2852 2853 // Trigger a manual compaction, which will encounter an injected error 2854 // after the second table is created. 2855 d.mu.Lock() 2856 initialSetupDone = true 2857 d.mu.Unlock() 2858 err = d.Compact([]byte("a"), []byte("d"), false) 2859 require.Error(t, err, "injected error") 2860 2861 d.mu.Lock() 2862 if len(tablesCreated) < 2 { 2863 t.Fatalf("expected 2 output tables created by compaction: found %d", len(tablesCreated)) 2864 } 2865 d.mu.Unlock() 2866 2867 require.NoError(t, d.Close()) 2868 for _, fileNum := range tablesCreated { 2869 filename := fmt.Sprintf("%s.sst", fileNum) 2870 if _, err = mem.Stat(filename); err == nil || !oserror.IsNotExist(err) { 2871 t.Errorf("expected %q to not exist: %s", filename, err) 2872 } 2873 } 2874 } 2875 2876 func TestCompactionCheckOrdering(t *testing.T) { 2877 cmp := DefaultComparer.Compare 2878 parseMeta := func(s string) *fileMetadata { 2879 parts := strings.Split(s, "-") 2880 if len(parts) != 2 { 2881 t.Fatalf("malformed table spec: %s", s) 2882 } 2883 m := (&fileMetadata{}).ExtendPointKeyBounds( 2884 cmp, 2885 base.ParseInternalKey(strings.TrimSpace(parts[0])), 2886 base.ParseInternalKey(strings.TrimSpace(parts[1])), 2887 ) 2888 m.SmallestSeqNum = m.Smallest.SeqNum() 2889 m.LargestSeqNum = m.Largest.SeqNum() 2890 m.InitPhysicalBacking() 2891 return m 2892 } 2893 2894 datadriven.RunTest(t, "testdata/compaction_check_ordering", 2895 func(t *testing.T, d *datadriven.TestData) string { 2896 switch d.Cmd { 2897 case "check-ordering": 2898 c := &compaction{ 2899 cmp: DefaultComparer.Compare, 2900 comparer: DefaultComparer, 2901 formatKey: DefaultComparer.FormatKey, 2902 logger: panicLogger{}, 2903 inputs: []compactionLevel{{level: -1}, {level: -1}}, 2904 } 2905 c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] 2906 var startFiles, outputFiles []*fileMetadata 2907 var sublevels []manifest.LevelSlice 2908 var files *[]*fileMetadata 2909 var sublevel []*fileMetadata 2910 var sublevelNum int 2911 var parsingSublevel bool 2912 fileNum := FileNum(1) 2913 2914 switchSublevel := func() { 2915 if sublevel != nil { 2916 sublevels = append( 2917 sublevels, manifest.NewLevelSliceSpecificOrder(sublevel), 2918 ) 2919 sublevel = nil 2920 } 2921 parsingSublevel = false 2922 } 2923 2924 for _, data := range strings.Split(d.Input, "\n") { 2925 if data[0] == 'L' && len(data) == 4 { 2926 // Format L0.{sublevel}. 2927 switchSublevel() 2928 level, err := strconv.Atoi(data[1:2]) 2929 if err != nil { 2930 return err.Error() 2931 } 2932 sublevelNum, err = strconv.Atoi(data[3:]) 2933 if err != nil { 2934 return err.Error() 2935 } 2936 if c.startLevel.level == -1 { 2937 c.startLevel.level = level 2938 files = &startFiles 2939 } 2940 parsingSublevel = true 2941 } else if data[0] == 'L' { 2942 switchSublevel() 2943 level, err := strconv.Atoi(data[1:]) 2944 if err != nil { 2945 return err.Error() 2946 } 2947 if c.startLevel.level == -1 { 2948 c.startLevel.level = level 2949 files = &startFiles 2950 } else if c.outputLevel.level == -1 { 2951 if c.startLevel.level >= level { 2952 return fmt.Sprintf("startLevel=%d >= outputLevel=%d\n", c.startLevel.level, level) 2953 } 2954 c.outputLevel.level = level 2955 files = &outputFiles 2956 } else { 2957 return "outputLevel already set\n" 2958 } 2959 } else { 2960 meta := parseMeta(data) 2961 meta.FileNum = fileNum 2962 fileNum++ 2963 *files = append(*files, meta) 2964 if parsingSublevel { 2965 meta.SubLevel = sublevelNum 2966 sublevel = append(sublevel, meta) 2967 } 2968 } 2969 } 2970 2971 switchSublevel() 2972 c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles) 2973 c.outputLevel.files = manifest.NewLevelSliceSpecificOrder(outputFiles) 2974 if c.outputLevel.level == -1 { 2975 c.outputLevel.level = 0 2976 } 2977 if c.startLevel.level == 0 { 2978 // We don't change the input files for the compaction beyond this point. 2979 c.startLevel.l0SublevelInfo = generateSublevelInfo(c.cmp, c.startLevel.files) 2980 } 2981 2982 newIters := func( 2983 _ context.Context, _ *manifest.FileMetadata, _ *IterOptions, _ internalIterOpts, 2984 ) (internalIterator, keyspan.FragmentIterator, error) { 2985 return &errorIter{}, nil, nil 2986 } 2987 result := "OK" 2988 _, err := c.newInputIter(newIters, nil, nil) 2989 if err != nil { 2990 result = fmt.Sprint(err) 2991 } 2992 return result 2993 2994 default: 2995 return fmt.Sprintf("unknown command: %s", d.Cmd) 2996 } 2997 }) 2998 } 2999 3000 type mockSplitter struct { 3001 shouldSplitVal maybeSplit 3002 } 3003 3004 func (m *mockSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit { 3005 return m.shouldSplitVal 3006 } 3007 3008 func (m *mockSplitter) onNewOutput(key []byte) []byte { 3009 return nil 3010 } 3011 3012 func TestCompactionOutputSplitters(t *testing.T) { 3013 var main, child0, child1 compactionOutputSplitter 3014 var prevUserKey []byte 3015 pickSplitter := func(input string) *compactionOutputSplitter { 3016 switch input { 3017 case "main": 3018 return &main 3019 case "child0": 3020 return &child0 3021 case "child1": 3022 return &child1 3023 default: 3024 t.Fatalf("invalid splitter slot: %s", input) 3025 return nil 3026 } 3027 } 3028 3029 datadriven.RunTest(t, "testdata/compaction_output_splitters", 3030 func(t *testing.T, d *datadriven.TestData) string { 3031 switch d.Cmd { 3032 case "reset": 3033 main = nil 3034 child0 = nil 3035 child1 = nil 3036 case "init": 3037 if len(d.CmdArgs) < 2 { 3038 return "expected at least 2 args" 3039 } 3040 splitterToInit := pickSplitter(d.CmdArgs[0].Key) 3041 switch d.CmdArgs[1].Key { 3042 case "array": 3043 *splitterToInit = &splitterGroup{ 3044 cmp: base.DefaultComparer.Compare, 3045 splitters: []compactionOutputSplitter{child0, child1}, 3046 } 3047 case "mock": 3048 *splitterToInit = &mockSplitter{} 3049 case "userkey": 3050 *splitterToInit = &userKeyChangeSplitter{ 3051 cmp: base.DefaultComparer.Compare, 3052 unsafePrevUserKey: func() []byte { 3053 return prevUserKey 3054 }, 3055 splitter: child0, 3056 } 3057 } 3058 (*splitterToInit).onNewOutput(nil) 3059 case "set-should-split": 3060 if len(d.CmdArgs) < 2 { 3061 return "expected at least 2 args" 3062 } 3063 splitterToSet := (*pickSplitter(d.CmdArgs[0].Key)).(*mockSplitter) 3064 var val maybeSplit 3065 switch d.CmdArgs[1].Key { 3066 case "split-now": 3067 val = splitNow 3068 case "no-split": 3069 val = noSplit 3070 default: 3071 t.Fatalf("unexpected value for should-split: %s", d.CmdArgs[1].Key) 3072 } 3073 splitterToSet.shouldSplitVal = val 3074 case "should-split-before": 3075 if len(d.CmdArgs) < 1 { 3076 return "expected at least 1 arg" 3077 } 3078 key := base.ParseInternalKey(d.CmdArgs[0].Key) 3079 shouldSplit := main.shouldSplitBefore(&key, nil) 3080 if shouldSplit == splitNow { 3081 main.onNewOutput(key.UserKey) 3082 prevUserKey = nil 3083 } else { 3084 prevUserKey = key.UserKey 3085 } 3086 return shouldSplit.String() 3087 default: 3088 return fmt.Sprintf("unknown command: %s", d.Cmd) 3089 } 3090 return "ok" 3091 }) 3092 } 3093 3094 func TestCompactFlushQueuedMemTableAndFlushMetrics(t *testing.T) { 3095 t.Run("", func(t *testing.T) { 3096 // Verify that manual compaction forces a flush of a queued memtable. 3097 3098 mem := vfs.NewMem() 3099 d, err := Open("", testingRandomized(t, &Options{ 3100 FS: mem, 3101 }).WithFSDefaults()) 3102 require.NoError(t, err) 3103 3104 // Add the key "a" to the memtable, then fill up the memtable with the key 3105 // prefix "b". The compaction will only overlap with the queued memtable, 3106 // not the mutable memtable. 3107 // NB: The initial memtable size is 256KB, which is filled up with random 3108 // values which typically don't compress well. The test also appends the 3109 // random value to the "b" key to limit overwriting of the same key, which 3110 // would get collapsed at flush time since there are no open snapshots. 3111 value := make([]byte, 50) 3112 _, err = crand.Read(value) 3113 require.NoError(t, err) 3114 require.NoError(t, d.Set([]byte("a"), value, nil)) 3115 for { 3116 _, err = crand.Read(value) 3117 require.NoError(t, err) 3118 require.NoError(t, d.Set(append([]byte("b"), value...), value, nil)) 3119 d.mu.Lock() 3120 done := len(d.mu.mem.queue) == 2 3121 d.mu.Unlock() 3122 if done { 3123 break 3124 } 3125 } 3126 3127 require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) 3128 d.mu.Lock() 3129 require.Equal(t, 1, len(d.mu.mem.queue)) 3130 d.mu.Unlock() 3131 // Flush metrics are updated after and non-atomically with the memtable 3132 // being removed from the queue. 3133 for begin := time.Now(); ; { 3134 metrics := d.Metrics() 3135 require.NotNil(t, metrics) 3136 if metrics.Flush.WriteThroughput.Bytes >= 50*1024 { 3137 // The writes (during which the flush is idle) and the flush work 3138 // should not be so fast as to be unrealistic. If these turn out to be 3139 // flaky we could instead inject a clock. 3140 // 3141 // Windows timer precision is bad (on the order of 1 millisecond) and 3142 // can cause the duration to be 0. 3143 if runtime.GOOS != "windows" { 3144 tinyInterval := 50 * time.Microsecond 3145 require.Less(t, tinyInterval, metrics.Flush.WriteThroughput.WorkDuration) 3146 require.Less(t, tinyInterval, metrics.Flush.WriteThroughput.IdleDuration) 3147 } 3148 break 3149 } 3150 if time.Since(begin) > 2*time.Second { 3151 t.Fatal("flush did not happen") 3152 } 3153 time.Sleep(time.Millisecond) 3154 } 3155 require.NoError(t, d.Close()) 3156 }) 3157 } 3158 3159 func TestCompactFlushQueuedLargeBatch(t *testing.T) { 3160 // Verify that compaction forces a flush of a queued large batch. 3161 3162 mem := vfs.NewMem() 3163 d, err := Open("", testingRandomized(t, &Options{ 3164 FS: mem, 3165 }).WithFSDefaults()) 3166 require.NoError(t, err) 3167 3168 // The default large batch threshold is slightly less than 1/2 of the 3169 // memtable size which makes triggering a problem with flushing queued large 3170 // batches irritating. Manually adjust the threshold to 1/8 of the memtable 3171 // size in order to more easily create a situation where a large batch is 3172 // queued but not automatically flushed. 3173 d.mu.Lock() 3174 d.largeBatchThreshold = d.opts.MemTableSize / 8 3175 require.Equal(t, 1, len(d.mu.mem.queue)) 3176 d.mu.Unlock() 3177 3178 // Set a record with a large value. This will be transformed into a large 3179 // batch and placed in the flushable queue. 3180 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), int(d.largeBatchThreshold)), nil)) 3181 d.mu.Lock() 3182 require.Greater(t, len(d.mu.mem.queue), 1) 3183 d.mu.Unlock() 3184 3185 require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) 3186 d.mu.Lock() 3187 require.Equal(t, 1, len(d.mu.mem.queue)) 3188 d.mu.Unlock() 3189 3190 require.NoError(t, d.Close()) 3191 } 3192 3193 func TestFlushError(t *testing.T) { 3194 // Error the first five times we try to write a sstable. 3195 var errorOps atomic.Int32 3196 errorOps.Store(3) 3197 fs := errorfs.Wrap(vfs.NewMem(), errorfs.InjectorFunc(func(op errorfs.Op) error { 3198 if op.Kind == errorfs.OpCreate && filepath.Ext(op.Path) == ".sst" && errorOps.Add(-1) >= 0 { 3199 return errorfs.ErrInjected 3200 } 3201 return nil 3202 })) 3203 d, err := Open("", testingRandomized(t, &Options{ 3204 FS: fs, 3205 EventListener: &EventListener{ 3206 BackgroundError: func(err error) { 3207 t.Log(err) 3208 }, 3209 }, 3210 }).WithFSDefaults()) 3211 require.NoError(t, err) 3212 require.NoError(t, d.Set([]byte("a"), []byte("foo"), NoSync)) 3213 require.NoError(t, d.Flush()) 3214 require.NoError(t, d.Close()) 3215 } 3216 3217 func TestAdjustGrandparentOverlapBytesForFlush(t *testing.T) { 3218 // 500MB in Lbase 3219 var lbaseFiles []*manifest.FileMetadata 3220 const lbaseSize = 5 << 20 3221 for i := 0; i < 100; i++ { 3222 m := &manifest.FileMetadata{Size: lbaseSize, FileNum: FileNum(i)} 3223 m.InitPhysicalBacking() 3224 lbaseFiles = 3225 append(lbaseFiles, m) 3226 } 3227 const maxOutputFileSize = 2 << 20 3228 // 20MB max overlap, so flush split into 25 files. 3229 const maxOverlapBytes = 20 << 20 3230 ls := manifest.NewLevelSliceSpecificOrder(lbaseFiles) 3231 testCases := []struct { 3232 flushingBytes uint64 3233 adjustedOverlapBytes uint64 3234 }{ 3235 // Flushes large enough that 25 files is acceptable. 3236 {flushingBytes: 128 << 20, adjustedOverlapBytes: 20971520}, 3237 {flushingBytes: 64 << 20, adjustedOverlapBytes: 20971520}, 3238 // Small increase in adjustedOverlapBytes. 3239 {flushingBytes: 32 << 20, adjustedOverlapBytes: 32768000}, 3240 // Large increase in adjusterOverlapBytes, to limit to 4 files. 3241 {flushingBytes: 1 << 20, adjustedOverlapBytes: 131072000}, 3242 } 3243 for _, tc := range testCases { 3244 t.Run("", func(t *testing.T) { 3245 c := compaction{ 3246 grandparents: ls, 3247 maxOverlapBytes: maxOverlapBytes, 3248 maxOutputFileSize: maxOutputFileSize, 3249 } 3250 adjustGrandparentOverlapBytesForFlush(&c, tc.flushingBytes) 3251 require.Equal(t, tc.adjustedOverlapBytes, c.maxOverlapBytes) 3252 }) 3253 } 3254 } 3255 3256 func TestCompactionInvalidBounds(t *testing.T) { 3257 db, err := Open("", testingRandomized(t, &Options{ 3258 FS: vfs.NewMem(), 3259 }).WithFSDefaults()) 3260 require.NoError(t, err) 3261 defer db.Close() 3262 require.NoError(t, db.Compact([]byte("a"), []byte("b"), false)) 3263 require.Error(t, db.Compact([]byte("a"), []byte("a"), false)) 3264 require.Error(t, db.Compact([]byte("b"), []byte("a"), false)) 3265 } 3266 3267 func Test_calculateInuseKeyRanges(t *testing.T) { 3268 opts := (*Options)(nil).EnsureDefaults() 3269 cmp := base.DefaultComparer.Compare 3270 newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata { 3271 m := (&fileMetadata{ 3272 FileNum: fileNum, 3273 Size: size, 3274 }).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest) 3275 m.InitPhysicalBacking() 3276 return m 3277 } 3278 tests := []struct { 3279 name string 3280 v *version 3281 level int 3282 depth int 3283 smallest []byte 3284 largest []byte 3285 want []manifest.UserKeyRange 3286 }{ 3287 { 3288 name: "No files in next level", 3289 v: newVersion(opts, [numLevels][]*fileMetadata{ 3290 1: { 3291 newFileMeta( 3292 1, 3293 1, 3294 base.ParseInternalKey("a.SET.2"), 3295 base.ParseInternalKey("c.SET.2"), 3296 ), 3297 newFileMeta( 3298 2, 3299 1, 3300 base.ParseInternalKey("d.SET.2"), 3301 base.ParseInternalKey("e.SET.2"), 3302 ), 3303 }, 3304 }), 3305 level: 1, 3306 depth: 2, 3307 smallest: []byte("a"), 3308 largest: []byte("e"), 3309 want: []manifest.UserKeyRange{ 3310 { 3311 Start: []byte("a"), 3312 End: []byte("c"), 3313 }, 3314 { 3315 Start: []byte("d"), 3316 End: []byte("e"), 3317 }, 3318 }, 3319 }, 3320 { 3321 name: "No overlapping key ranges", 3322 v: newVersion(opts, [numLevels][]*fileMetadata{ 3323 1: { 3324 newFileMeta( 3325 1, 3326 1, 3327 base.ParseInternalKey("a.SET.1"), 3328 base.ParseInternalKey("c.SET.1"), 3329 ), 3330 newFileMeta( 3331 2, 3332 1, 3333 base.ParseInternalKey("l.SET.1"), 3334 base.ParseInternalKey("p.SET.1"), 3335 ), 3336 }, 3337 2: { 3338 newFileMeta( 3339 3, 3340 1, 3341 base.ParseInternalKey("d.SET.1"), 3342 base.ParseInternalKey("i.SET.1"), 3343 ), 3344 newFileMeta( 3345 4, 3346 1, 3347 base.ParseInternalKey("s.SET.1"), 3348 base.ParseInternalKey("w.SET.1"), 3349 ), 3350 }, 3351 }), 3352 level: 1, 3353 depth: 2, 3354 smallest: []byte("a"), 3355 largest: []byte("z"), 3356 want: []manifest.UserKeyRange{ 3357 { 3358 Start: []byte("a"), 3359 End: []byte("c"), 3360 }, 3361 { 3362 Start: []byte("d"), 3363 End: []byte("i"), 3364 }, 3365 { 3366 Start: []byte("l"), 3367 End: []byte("p"), 3368 }, 3369 { 3370 Start: []byte("s"), 3371 End: []byte("w"), 3372 }, 3373 }, 3374 }, 3375 { 3376 name: "First few non-overlapping, followed by overlapping", 3377 v: newVersion(opts, [numLevels][]*fileMetadata{ 3378 1: { 3379 newFileMeta( 3380 1, 3381 1, 3382 base.ParseInternalKey("a.SET.1"), 3383 base.ParseInternalKey("c.SET.1"), 3384 ), 3385 newFileMeta( 3386 2, 3387 1, 3388 base.ParseInternalKey("d.SET.1"), 3389 base.ParseInternalKey("e.SET.1"), 3390 ), 3391 newFileMeta( 3392 3, 3393 1, 3394 base.ParseInternalKey("n.SET.1"), 3395 base.ParseInternalKey("o.SET.1"), 3396 ), 3397 newFileMeta( 3398 4, 3399 1, 3400 base.ParseInternalKey("p.SET.1"), 3401 base.ParseInternalKey("q.SET.1"), 3402 ), 3403 }, 3404 2: { 3405 newFileMeta( 3406 5, 3407 1, 3408 base.ParseInternalKey("m.SET.1"), 3409 base.ParseInternalKey("q.SET.1"), 3410 ), 3411 newFileMeta( 3412 6, 3413 1, 3414 base.ParseInternalKey("s.SET.1"), 3415 base.ParseInternalKey("w.SET.1"), 3416 ), 3417 }, 3418 }), 3419 level: 1, 3420 depth: 2, 3421 smallest: []byte("a"), 3422 largest: []byte("z"), 3423 want: []manifest.UserKeyRange{ 3424 { 3425 Start: []byte("a"), 3426 End: []byte("c"), 3427 }, 3428 { 3429 Start: []byte("d"), 3430 End: []byte("e"), 3431 }, 3432 { 3433 Start: []byte("m"), 3434 End: []byte("q"), 3435 }, 3436 { 3437 Start: []byte("s"), 3438 End: []byte("w"), 3439 }, 3440 }, 3441 }, 3442 { 3443 name: "All overlapping", 3444 v: newVersion(opts, [numLevels][]*fileMetadata{ 3445 1: { 3446 newFileMeta( 3447 1, 3448 1, 3449 base.ParseInternalKey("d.SET.1"), 3450 base.ParseInternalKey("e.SET.1"), 3451 ), 3452 newFileMeta( 3453 2, 3454 1, 3455 base.ParseInternalKey("n.SET.1"), 3456 base.ParseInternalKey("o.SET.1"), 3457 ), 3458 newFileMeta( 3459 3, 3460 1, 3461 base.ParseInternalKey("p.SET.1"), 3462 base.ParseInternalKey("q.SET.1"), 3463 ), 3464 }, 3465 2: { 3466 newFileMeta( 3467 4, 3468 1, 3469 base.ParseInternalKey("a.SET.1"), 3470 base.ParseInternalKey("c.SET.1"), 3471 ), 3472 newFileMeta( 3473 5, 3474 1, 3475 base.ParseInternalKey("d.SET.1"), 3476 base.ParseInternalKey("w.SET.1"), 3477 ), 3478 }, 3479 }), 3480 level: 1, 3481 depth: 2, 3482 smallest: []byte("a"), 3483 largest: []byte("z"), 3484 want: []manifest.UserKeyRange{ 3485 { 3486 Start: []byte("a"), 3487 End: []byte("c"), 3488 }, 3489 { 3490 Start: []byte("d"), 3491 End: []byte("w"), 3492 }, 3493 }, 3494 }, 3495 } 3496 for _, tt := range tests { 3497 t.Run(tt.name, func(t *testing.T) { 3498 if got := calculateInuseKeyRanges(tt.v, cmp, tt.level, tt.depth, tt.smallest, tt.largest); !reflect.DeepEqual(got, tt.want) { 3499 t.Errorf("calculateInuseKeyRanges() = %v, want %v", got, tt.want) 3500 } 3501 }) 3502 } 3503 } 3504 3505 func TestMarkedForCompaction(t *testing.T) { 3506 var mem vfs.FS = vfs.NewMem() 3507 var d *DB 3508 defer func() { 3509 if d != nil { 3510 require.NoError(t, d.Close()) 3511 } 3512 }() 3513 3514 var buf bytes.Buffer 3515 opts := (&Options{ 3516 FS: mem, 3517 DebugCheck: DebugCheckLevels, 3518 DisableAutomaticCompactions: true, 3519 FormatMajorVersion: internalFormatNewest, 3520 EventListener: &EventListener{ 3521 CompactionEnd: func(info CompactionInfo) { 3522 // Fix the job ID and durations for determinism. 3523 info.JobID = 100 3524 info.Duration = time.Second 3525 info.TotalDuration = 2 * time.Second 3526 fmt.Fprintln(&buf, info) 3527 }, 3528 }, 3529 }).WithFSDefaults() 3530 3531 reset := func() { 3532 if d != nil { 3533 require.NoError(t, d.Close()) 3534 } 3535 mem = vfs.NewMem() 3536 require.NoError(t, mem.MkdirAll("ext", 0755)) 3537 3538 var err error 3539 d, err = Open("", opts) 3540 require.NoError(t, err) 3541 } 3542 datadriven.RunTest(t, "testdata/marked_for_compaction", func(t *testing.T, td *datadriven.TestData) string { 3543 switch td.Cmd { 3544 case "reset": 3545 reset() 3546 return "" 3547 3548 case "define": 3549 if d != nil { 3550 if err := d.Close(); err != nil { 3551 return err.Error() 3552 } 3553 } 3554 var err error 3555 if d, err = runDBDefineCmd(td, opts); err != nil { 3556 return err.Error() 3557 } 3558 d.mu.Lock() 3559 defer d.mu.Unlock() 3560 t := time.Now() 3561 d.timeNow = func() time.Time { 3562 t = t.Add(time.Second) 3563 return t 3564 } 3565 s := d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) 3566 return s 3567 3568 case "mark-for-compaction": 3569 d.mu.Lock() 3570 defer d.mu.Unlock() 3571 vers := d.mu.versions.currentVersion() 3572 var fileNum uint64 3573 td.ScanArgs(t, "file", &fileNum) 3574 for l, lm := range vers.Levels { 3575 iter := lm.Iter() 3576 for f := iter.First(); f != nil; f = iter.Next() { 3577 if f.FileNum != base.FileNum(fileNum) { 3578 continue 3579 } 3580 f.MarkedForCompaction = true 3581 vers.Stats.MarkedForCompaction++ 3582 vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{}) 3583 return fmt.Sprintf("marked L%d.%s", l, f.FileNum) 3584 } 3585 } 3586 return "not-found" 3587 3588 case "maybe-compact": 3589 d.mu.Lock() 3590 defer d.mu.Unlock() 3591 d.opts.DisableAutomaticCompactions = false 3592 d.maybeScheduleCompaction() 3593 for d.mu.compact.compactingCount > 0 { 3594 d.mu.compact.cond.Wait() 3595 } 3596 3597 fmt.Fprintln(&buf, d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)) 3598 s := strings.TrimSpace(buf.String()) 3599 buf.Reset() 3600 opts.DisableAutomaticCompactions = true 3601 return s 3602 3603 default: 3604 return fmt.Sprintf("unknown command: %s", td.Cmd) 3605 } 3606 }) 3607 } 3608 3609 // createManifestErrorInjector injects errors (when enabled) into vfs.FS calls 3610 // to create MANIFEST files. 3611 type createManifestErrorInjector struct { 3612 enabled atomic.Bool 3613 } 3614 3615 // TODO(jackson): Replace the createManifestErrorInjector with the composition 3616 // of primitives defined in errorfs. This may require additional primitives. 3617 3618 func (i *createManifestErrorInjector) String() string { return "MANIFEST-Creates" } 3619 3620 // enable enables error injection for the vfs.FS. 3621 func (i *createManifestErrorInjector) enable() { 3622 i.enabled.Store(true) 3623 } 3624 3625 // MaybeError implements errorfs.Injector. 3626 func (i *createManifestErrorInjector) MaybeError(op errorfs.Op) error { 3627 if !i.enabled.Load() { 3628 return nil 3629 } 3630 // This necessitates having a MaxManifestSize of 1, to reliably induce 3631 // logAndApply errors. 3632 if strings.Contains(op.Path, "MANIFEST") && op.Kind == errorfs.OpCreate { 3633 return errorfs.ErrInjected 3634 } 3635 return nil 3636 } 3637 3638 var _ errorfs.Injector = &createManifestErrorInjector{} 3639 3640 // TestCompaction_LogAndApplyFails exercises a flush or ingest encountering an 3641 // unrecoverable error during logAndApply. 3642 // 3643 // Regression test for #1669. 3644 func TestCompaction_LogAndApplyFails(t *testing.T) { 3645 // flushKeys writes the given keys to the DB, flushing the resulting memtable. 3646 var key = []byte("foo") 3647 flushErrC := make(chan error) 3648 flushKeys := func(db *DB) error { 3649 b := db.NewBatch() 3650 err := b.Set(key, nil, nil) 3651 require.NoError(t, err) 3652 err = b.Commit(nil) 3653 require.NoError(t, err) 3654 // An error from a failing flush is returned asynchronously. 3655 go func() { _ = db.Flush() }() 3656 return <-flushErrC 3657 } 3658 3659 // ingestKeys adds the given keys to the DB via an ingestion. 3660 ingestKeys := func(db *DB) error { 3661 // Create an SST for ingestion. 3662 const fName = "ext" 3663 f, err := db.opts.FS.Create(fName) 3664 require.NoError(t, err) 3665 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3666 require.NoError(t, w.Set(key, nil)) 3667 require.NoError(t, w.Close()) 3668 // Ingest the SST. 3669 return db.Ingest([]string{fName}) 3670 } 3671 3672 testCases := []struct { 3673 name string 3674 addFn func(db *DB) error 3675 backgroundErrorFn func(*DB, error) 3676 }{ 3677 { 3678 name: "flush", 3679 addFn: flushKeys, 3680 backgroundErrorFn: func(db *DB, err error) { 3681 require.True(t, errors.Is(err, errorfs.ErrInjected)) 3682 flushErrC <- err 3683 // A flush will attempt to retry in the background. For the purposes of 3684 // testing this particular scenario, where we would have crashed anyway, 3685 // drop the memtable on the floor to short circuit the retry loop. 3686 // NB: we hold db.mu here. 3687 var cur *flushableEntry 3688 cur, db.mu.mem.queue = db.mu.mem.queue[0], db.mu.mem.queue[1:] 3689 cur.readerUnrefLocked(true) 3690 }, 3691 }, 3692 { 3693 name: "ingest", 3694 addFn: ingestKeys, 3695 }, 3696 } 3697 3698 runTest := func(t *testing.T, addFn func(db *DB) error, bgFn func(*DB, error)) { 3699 var db *DB 3700 inj := &createManifestErrorInjector{} 3701 logger := &fatalCapturingLogger{t: t} 3702 opts := (&Options{ 3703 FS: errorfs.Wrap(vfs.NewMem(), inj), 3704 // Rotate the manifest after each write. This is required to trigger a 3705 // file creation, into which errors can be injected. 3706 MaxManifestFileSize: 1, 3707 Logger: logger, 3708 EventListener: &EventListener{ 3709 BackgroundError: func(err error) { 3710 if bgFn != nil { 3711 bgFn(db, err) 3712 } 3713 }, 3714 }, 3715 DisableAutomaticCompactions: true, 3716 }).WithFSDefaults() 3717 3718 db, err := Open("", opts) 3719 require.NoError(t, err) 3720 defer func() { _ = db.Close() }() 3721 3722 inj.enable() 3723 err = addFn(db) 3724 require.True(t, errors.Is(err, errorfs.ErrInjected)) 3725 3726 // Under normal circumstances, such an error in logAndApply would panic and 3727 // cause the DB to terminate here. Assert that we captured the fatal error. 3728 require.True(t, errors.Is(logger.err, errorfs.ErrInjected)) 3729 } 3730 for _, tc := range testCases { 3731 t.Run(tc.name, func(t *testing.T) { 3732 runTest(t, tc.addFn, tc.backgroundErrorFn) 3733 }) 3734 } 3735 } 3736 3737 // TestSharedObjectDeletePacing tests that we don't throttle shared object 3738 // deletes (see the TargetBytesDeletionRate option). 3739 func TestSharedObjectDeletePacing(t *testing.T) { 3740 var opts Options 3741 opts.FS = vfs.NewMem() 3742 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 3743 "": remote.NewInMem(), 3744 }) 3745 opts.Experimental.CreateOnShared = remote.CreateOnSharedAll 3746 opts.TargetByteDeletionRate = 1 3747 3748 d, err := Open("", &opts) 3749 require.NoError(t, err) 3750 require.NoError(t, d.SetCreatorID(1)) 3751 3752 randVal := func() []byte { 3753 res := make([]byte, 1024) 3754 _, err := crand.Read(res) 3755 require.NoError(t, err) 3756 return res 3757 } 3758 3759 // We must set up things so that we will have more live bytes than obsolete 3760 // bytes, otherwise delete pacing will be disabled anyway. 3761 key := func(i int) string { 3762 return fmt.Sprintf("k%02d", i) 3763 } 3764 const numKeys = 20 3765 for i := 1; i <= numKeys; i++ { 3766 require.NoError(t, d.Set([]byte(key(i)), randVal(), nil)) 3767 require.NoError(t, d.Compact([]byte(key(i)), []byte(key(i)+"1"), false)) 3768 } 3769 3770 done := make(chan struct{}) 3771 go func() { 3772 err = d.DeleteRange([]byte(key(5)), []byte(key(9)), nil) 3773 if err == nil { 3774 err = d.Compact([]byte(key(5)), []byte(key(9)), false) 3775 } 3776 // Wait for objects to be deleted. 3777 for { 3778 time.Sleep(10 * time.Millisecond) 3779 if len(d.objProvider.List()) < numKeys-2 { 3780 break 3781 } 3782 } 3783 close(done) 3784 }() 3785 3786 select { 3787 case <-time.After(60 * time.Second): 3788 // Don't close the DB in this case (the goroutine above might panic). 3789 t.Fatalf("compaction timed out, possibly due to incorrect deletion pacing") 3790 case <-done: 3791 } 3792 require.NoError(t, err) 3793 d.Close() 3794 } 3795 3796 type WriteErrorInjector struct { 3797 enabled atomic.Bool 3798 } 3799 3800 // TODO(jackson): Replace WriteErrorInjector with use of primitives in errorfs, 3801 // adding new primitives as necessary. 3802 3803 func (i *WriteErrorInjector) String() string { return "FileWrites(ErrInjected)" } 3804 3805 // enable enables error injection for the vfs.FS. 3806 func (i *WriteErrorInjector) enable() { 3807 i.enabled.Store(true) 3808 } 3809 3810 // disable disabled error injection for the vfs.FS. 3811 func (i *WriteErrorInjector) disable() { 3812 i.enabled.Store(false) 3813 } 3814 3815 // MaybeError implements errorfs.Injector. 3816 func (i *WriteErrorInjector) MaybeError(op errorfs.Op) error { 3817 if !i.enabled.Load() { 3818 return nil 3819 } 3820 // Fail any future write. 3821 if op.Kind == errorfs.OpFileWrite { 3822 return errorfs.ErrInjected 3823 } 3824 return nil 3825 } 3826 3827 var _ errorfs.Injector = &WriteErrorInjector{} 3828 3829 // Cumulative compaction stats shouldn't be updated on compaction error. 3830 func TestCompactionErrorStats(t *testing.T) { 3831 // protected by d.mu 3832 var ( 3833 useInjector bool 3834 tablesCreated []FileNum 3835 ) 3836 3837 mem := vfs.NewMem() 3838 injector := &WriteErrorInjector{} 3839 opts := (&Options{ 3840 FS: errorfs.Wrap(mem, injector), 3841 Levels: make([]LevelOptions, numLevels), 3842 EventListener: &EventListener{ 3843 TableCreated: func(info TableCreateInfo) { 3844 t.Log(info) 3845 3846 if useInjector { 3847 // We'll write 3 tables during compaction, and we only need 3848 // the writes to error on the third file write, so only enable 3849 // the injector after the first two files have been written to. 3850 tablesCreated = append(tablesCreated, info.FileNum) 3851 if len(tablesCreated) >= 2 { 3852 injector.enable() 3853 } 3854 } 3855 }, 3856 }, 3857 }).WithFSDefaults() 3858 for i := range opts.Levels { 3859 opts.Levels[i].TargetFileSize = 1 3860 } 3861 opts.testingRandomized(t) 3862 d, err := Open("", opts) 3863 require.NoError(t, err) 3864 3865 ingest := func(keys ...string) { 3866 t.Helper() 3867 f, err := mem.Create("ext") 3868 require.NoError(t, err) 3869 3870 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 3871 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 3872 }) 3873 for _, k := range keys { 3874 require.NoError(t, w.Set([]byte(k), nil)) 3875 } 3876 require.NoError(t, w.Close()) 3877 require.NoError(t, d.Ingest([]string{"ext"})) 3878 } 3879 ingest("a", "c") 3880 // Snapshot will preserve the older "a" key during compaction. 3881 snap := d.NewSnapshot() 3882 ingest("a", "b") 3883 3884 // Trigger a manual compaction, which will encounter an injected error 3885 // after the second table is created. 3886 d.mu.Lock() 3887 useInjector = true 3888 d.mu.Unlock() 3889 3890 err = d.Compact([]byte("a"), []byte("d"), false) 3891 require.Error(t, err, "injected error") 3892 3893 // Due to the error, stats shouldn't have been updated. 3894 d.mu.Lock() 3895 require.Equal(t, 0, int(d.mu.snapshots.cumulativePinnedCount)) 3896 require.Equal(t, 0, int(d.mu.snapshots.cumulativePinnedSize)) 3897 useInjector = false 3898 d.mu.Unlock() 3899 3900 injector.disable() 3901 3902 // The following compaction won't error, but snapshot is open, so snapshot 3903 // pinned stats should update. 3904 require.NoError(t, d.Compact([]byte("a"), []byte("d"), false)) 3905 require.NoError(t, snap.Close()) 3906 3907 d.mu.Lock() 3908 require.Equal(t, 1, int(d.mu.snapshots.cumulativePinnedCount)) 3909 require.Equal(t, 9, int(d.mu.snapshots.cumulativePinnedSize)) 3910 d.mu.Unlock() 3911 require.NoError(t, d.Close()) 3912 }