github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/ingest_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "math" 13 "os" 14 "path/filepath" 15 "sort" 16 "strconv" 17 "strings" 18 "sync" 19 "sync/atomic" 20 "testing" 21 "time" 22 23 "github.com/cockroachdb/datadriven" 24 "github.com/cockroachdb/errors" 25 "github.com/cockroachdb/errors/oserror" 26 "github.com/cockroachdb/pebble/internal/base" 27 "github.com/cockroachdb/pebble/internal/keyspan" 28 "github.com/cockroachdb/pebble/internal/manifest" 29 "github.com/cockroachdb/pebble/internal/rangekey" 30 "github.com/cockroachdb/pebble/internal/testkeys" 31 "github.com/cockroachdb/pebble/objstorage" 32 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 33 "github.com/cockroachdb/pebble/objstorage/remote" 34 "github.com/cockroachdb/pebble/record" 35 "github.com/cockroachdb/pebble/sstable" 36 "github.com/cockroachdb/pebble/vfs" 37 "github.com/cockroachdb/pebble/vfs/errorfs" 38 "github.com/kr/pretty" 39 "github.com/stretchr/testify/require" 40 "golang.org/x/exp/rand" 41 ) 42 43 func TestSSTableKeyCompare(t *testing.T) { 44 var buf bytes.Buffer 45 datadriven.RunTest(t, "testdata/sstable_key_compare", func(t *testing.T, td *datadriven.TestData) string { 46 switch td.Cmd { 47 case "cmp": 48 buf.Reset() 49 for _, line := range strings.Split(td.Input, "\n") { 50 fields := strings.Fields(line) 51 a := base.ParseInternalKey(fields[0]) 52 b := base.ParseInternalKey(fields[1]) 53 got := sstableKeyCompare(testkeys.Comparer.Compare, a, b) 54 fmt.Fprintf(&buf, "%38s", fmt.Sprint(a.Pretty(base.DefaultFormatter))) 55 switch got { 56 case -1: 57 fmt.Fprint(&buf, " < ") 58 case +1: 59 fmt.Fprint(&buf, " > ") 60 case 0: 61 fmt.Fprint(&buf, " = ") 62 } 63 fmt.Fprintf(&buf, "%s\n", fmt.Sprint(b.Pretty(base.DefaultFormatter))) 64 } 65 return buf.String() 66 default: 67 return fmt.Sprintf("unrecognized command %q", td.Cmd) 68 } 69 }) 70 } 71 72 func TestIngestLoad(t *testing.T) { 73 mem := vfs.NewMem() 74 75 datadriven.RunTest(t, "testdata/ingest_load", func(t *testing.T, td *datadriven.TestData) string { 76 switch td.Cmd { 77 case "load": 78 writerOpts := sstable.WriterOptions{} 79 var dbVersion FormatMajorVersion 80 for _, cmdArgs := range td.CmdArgs { 81 v, err := strconv.Atoi(cmdArgs.Vals[0]) 82 if err != nil { 83 return err.Error() 84 } 85 switch k := cmdArgs.Key; k { 86 case "writer-version": 87 fmv := FormatMajorVersion(v) 88 writerOpts.TableFormat = fmv.MaxTableFormat() 89 case "db-version": 90 dbVersion = FormatMajorVersion(v) 91 default: 92 return fmt.Sprintf("unknown cmd %s\n", k) 93 } 94 } 95 f, err := mem.Create("ext") 96 if err != nil { 97 return err.Error() 98 } 99 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writerOpts) 100 for _, data := range strings.Split(td.Input, "\n") { 101 if strings.HasPrefix(data, "rangekey: ") { 102 data = strings.TrimPrefix(data, "rangekey: ") 103 s := keyspan.ParseSpan(data) 104 err := rangekey.Encode(&s, w.AddRangeKey) 105 if err != nil { 106 return err.Error() 107 } 108 continue 109 } 110 111 j := strings.Index(data, ":") 112 if j < 0 { 113 return fmt.Sprintf("malformed input: %s\n", data) 114 } 115 key := base.ParseInternalKey(data[:j]) 116 value := []byte(data[j+1:]) 117 if err := w.Add(key, value); err != nil { 118 return err.Error() 119 } 120 } 121 if err := w.Close(); err != nil { 122 return err.Error() 123 } 124 125 opts := (&Options{ 126 Comparer: DefaultComparer, 127 FS: mem, 128 }).WithFSDefaults() 129 lr, err := ingestLoad(opts, dbVersion, []string{"ext"}, nil, nil, 0, []base.DiskFileNum{base.FileNum(1).DiskFileNum()}, nil, 0) 130 if err != nil { 131 return err.Error() 132 } 133 var buf bytes.Buffer 134 for _, m := range lr.localMeta { 135 fmt.Fprintf(&buf, "%d: %s-%s\n", m.FileNum, m.Smallest, m.Largest) 136 fmt.Fprintf(&buf, " points: %s-%s\n", m.SmallestPointKey, m.LargestPointKey) 137 fmt.Fprintf(&buf, " ranges: %s-%s\n", m.SmallestRangeKey, m.LargestRangeKey) 138 } 139 return buf.String() 140 141 default: 142 return fmt.Sprintf("unknown command: %s", td.Cmd) 143 } 144 }) 145 } 146 147 func TestIngestLoadRand(t *testing.T) { 148 mem := vfs.NewMem() 149 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 150 cmp := DefaultComparer.Compare 151 version := internalFormatNewest 152 153 randBytes := func(size int) []byte { 154 data := make([]byte, size) 155 for i := range data { 156 data[i] = byte(rng.Int() & 0xff) 157 } 158 return data 159 } 160 161 paths := make([]string, 1+rng.Intn(10)) 162 pending := make([]base.DiskFileNum, len(paths)) 163 expected := make([]*fileMetadata, len(paths)) 164 for i := range paths { 165 paths[i] = fmt.Sprint(i) 166 pending[i] = base.FileNum(rng.Uint64()).DiskFileNum() 167 expected[i] = &fileMetadata{ 168 FileNum: pending[i].FileNum(), 169 } 170 expected[i].StatsMarkValid() 171 172 func() { 173 f, err := mem.Create(paths[i]) 174 require.NoError(t, err) 175 176 keys := make([]InternalKey, 1+rng.Intn(100)) 177 for i := range keys { 178 keys[i] = base.MakeInternalKey( 179 randBytes(1+rng.Intn(10)), 180 0, 181 InternalKeyKindSet) 182 } 183 sort.Slice(keys, func(i, j int) bool { 184 return base.InternalCompare(cmp, keys[i], keys[j]) < 0 185 }) 186 187 expected[i].ExtendPointKeyBounds(cmp, keys[0], keys[len(keys)-1]) 188 189 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 190 TableFormat: version.MaxTableFormat(), 191 }) 192 var count uint64 193 for i := range keys { 194 if i > 0 && base.InternalCompare(cmp, keys[i-1], keys[i]) == 0 { 195 // Duplicate key, ignore. 196 continue 197 } 198 w.Add(keys[i], nil) 199 count++ 200 } 201 expected[i].Stats.NumEntries = count 202 require.NoError(t, w.Close()) 203 204 meta, err := w.Metadata() 205 require.NoError(t, err) 206 207 expected[i].Size = meta.Size 208 expected[i].InitPhysicalBacking() 209 }() 210 } 211 212 opts := (&Options{ 213 Comparer: DefaultComparer, 214 FS: mem, 215 }).WithFSDefaults() 216 lr, err := ingestLoad(opts, version, paths, nil, nil, 0, pending, nil, 0) 217 require.NoError(t, err) 218 219 for _, m := range lr.localMeta { 220 m.CreationTime = 0 221 } 222 t.Log(strings.Join(pretty.Diff(expected, lr.localMeta), "\n")) 223 require.Equal(t, expected, lr.localMeta) 224 } 225 226 func TestIngestLoadInvalid(t *testing.T) { 227 mem := vfs.NewMem() 228 f, err := mem.Create("invalid") 229 require.NoError(t, err) 230 require.NoError(t, f.Close()) 231 232 opts := (&Options{ 233 Comparer: DefaultComparer, 234 FS: mem, 235 }).WithFSDefaults() 236 if _, err := ingestLoad(opts, internalFormatNewest, []string{"invalid"}, nil, nil, 0, []base.DiskFileNum{base.FileNum(1).DiskFileNum()}, nil, 0); err == nil { 237 t.Fatalf("expected error, but found success") 238 } 239 } 240 241 func TestIngestSortAndVerify(t *testing.T) { 242 comparers := map[string]Compare{ 243 "default": DefaultComparer.Compare, 244 "reverse": func(a, b []byte) int { 245 return DefaultComparer.Compare(b, a) 246 }, 247 } 248 249 t.Run("", func(t *testing.T) { 250 datadriven.RunTest(t, "testdata/ingest_sort_and_verify", func(t *testing.T, d *datadriven.TestData) string { 251 switch d.Cmd { 252 case "ingest": 253 var buf bytes.Buffer 254 var meta []*fileMetadata 255 var paths []string 256 var cmpName string 257 d.ScanArgs(t, "cmp", &cmpName) 258 cmp := comparers[cmpName] 259 if cmp == nil { 260 return fmt.Sprintf("%s unknown comparer: %s", d.Cmd, cmpName) 261 } 262 for i, data := range strings.Split(d.Input, "\n") { 263 parts := strings.Split(data, "-") 264 if len(parts) != 2 { 265 return fmt.Sprintf("malformed test case: %s", d.Input) 266 } 267 smallest := base.ParseInternalKey(parts[0]) 268 largest := base.ParseInternalKey(parts[1]) 269 if cmp(smallest.UserKey, largest.UserKey) > 0 { 270 return fmt.Sprintf("range %v-%v is not valid", smallest, largest) 271 } 272 m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest) 273 m.InitPhysicalBacking() 274 meta = append(meta, m) 275 paths = append(paths, strconv.Itoa(i)) 276 } 277 lr := ingestLoadResult{localPaths: paths, localMeta: meta} 278 err := ingestSortAndVerify(cmp, lr, KeyRange{}) 279 if err != nil { 280 return fmt.Sprintf("%v\n", err) 281 } 282 for i := range meta { 283 fmt.Fprintf(&buf, "%s: %v-%v\n", paths[i], meta[i].Smallest, meta[i].Largest) 284 } 285 return buf.String() 286 287 default: 288 return fmt.Sprintf("unknown command: %s", d.Cmd) 289 } 290 }) 291 }) 292 } 293 294 func TestIngestLink(t *testing.T) { 295 // Test linking of tables into the DB directory. Test cleanup when one of the 296 // tables cannot be linked. 297 298 const dir = "db" 299 const count = 10 300 for i := 0; i <= count; i++ { 301 t.Run("", func(t *testing.T) { 302 opts := &Options{FS: vfs.NewMem()} 303 opts.EnsureDefaults().WithFSDefaults() 304 require.NoError(t, opts.FS.MkdirAll(dir, 0755)) 305 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(opts.FS, dir)) 306 require.NoError(t, err) 307 defer objProvider.Close() 308 309 paths := make([]string, 10) 310 meta := make([]*fileMetadata, len(paths)) 311 contents := make([][]byte, len(paths)) 312 for j := range paths { 313 paths[j] = fmt.Sprintf("external%d", j) 314 meta[j] = &fileMetadata{} 315 meta[j].FileNum = FileNum(j) 316 meta[j].InitPhysicalBacking() 317 f, err := opts.FS.Create(paths[j]) 318 require.NoError(t, err) 319 320 contents[j] = []byte(fmt.Sprintf("data%d", j)) 321 // memFile.Write will modify the supplied buffer when invariants are 322 // enabled, so provide a throw-away copy. 323 _, err = f.Write(append([]byte(nil), contents[j]...)) 324 require.NoError(t, err) 325 require.NoError(t, f.Close()) 326 } 327 328 if i < count { 329 opts.FS.Remove(paths[i]) 330 } 331 332 lr := ingestLoadResult{localMeta: meta, localPaths: paths} 333 err = ingestLink(0 /* jobID */, opts, objProvider, lr, nil /* shared */) 334 if i < count { 335 if err == nil { 336 t.Fatalf("expected error, but found success") 337 } 338 } else { 339 require.NoError(t, err) 340 } 341 342 files, err := opts.FS.List(dir) 343 require.NoError(t, err) 344 345 sort.Strings(files) 346 347 if i < count { 348 if len(files) > 0 { 349 t.Fatalf("expected all of the files to be cleaned up, but found:\n%s", 350 strings.Join(files, "\n")) 351 } 352 } else { 353 if len(files) != count { 354 t.Fatalf("expected %d files, but found:\n%s", count, strings.Join(files, "\n")) 355 } 356 for j := range files { 357 ftype, fileNum, ok := base.ParseFilename(opts.FS, files[j]) 358 if !ok { 359 t.Fatalf("unable to parse filename: %s", files[j]) 360 } 361 if fileTypeTable != ftype { 362 t.Fatalf("expected table, but found %d", ftype) 363 } 364 if j != int(fileNum.FileNum()) { 365 t.Fatalf("expected table %d, but found %d", j, fileNum) 366 } 367 f, err := opts.FS.Open(opts.FS.PathJoin(dir, files[j])) 368 require.NoError(t, err) 369 370 data, err := io.ReadAll(f) 371 require.NoError(t, err) 372 require.NoError(t, f.Close()) 373 if !bytes.Equal(contents[j], data) { 374 t.Fatalf("expected %s, but found %s", contents[j], data) 375 } 376 } 377 } 378 }) 379 } 380 } 381 382 func TestIngestLinkFallback(t *testing.T) { 383 // Verify that ingestLink succeeds if linking fails by falling back to 384 // copying. 385 mem := vfs.NewMem() 386 src, err := mem.Create("source") 387 require.NoError(t, err) 388 389 opts := &Options{FS: errorfs.Wrap(mem, errorfs.OnIndex(1))} 390 opts.EnsureDefaults().WithFSDefaults() 391 objSettings := objstorageprovider.DefaultSettings(opts.FS, "") 392 // Prevent the provider from listing the dir (where we may get an injected error). 393 objSettings.FSDirInitialListing = []string{} 394 objProvider, err := objstorageprovider.Open(objSettings) 395 require.NoError(t, err) 396 defer objProvider.Close() 397 398 meta := []*fileMetadata{{FileNum: 1}} 399 meta[0].InitPhysicalBacking() 400 lr := ingestLoadResult{localMeta: meta, localPaths: []string{"source"}} 401 err = ingestLink(0, opts, objProvider, lr, nil /* shared */) 402 require.NoError(t, err) 403 404 dest, err := mem.Open("000001.sst") 405 require.NoError(t, err) 406 407 // We should be able to write bytes to src, and not have them show up in 408 // dest. 409 _, _ = src.Write([]byte("test")) 410 data, err := io.ReadAll(dest) 411 require.NoError(t, err) 412 if len(data) != 0 { 413 t.Fatalf("expected copy, but files appear to be hard linked: [%s] unexpectedly found", data) 414 } 415 } 416 417 func TestOverlappingIngestedSSTs(t *testing.T) { 418 dir := "" 419 var ( 420 mem vfs.FS 421 d *DB 422 opts *Options 423 closed = false 424 blockFlush = false 425 ) 426 defer func() { 427 if !closed { 428 require.NoError(t, d.Close()) 429 } 430 }() 431 432 reset := func(strictMem bool) { 433 if d != nil && !closed { 434 require.NoError(t, d.Close()) 435 } 436 blockFlush = false 437 438 if strictMem { 439 mem = vfs.NewStrictMem() 440 } else { 441 mem = vfs.NewMem() 442 } 443 444 require.NoError(t, mem.MkdirAll("ext", 0755)) 445 opts = (&Options{ 446 FS: mem, 447 MemTableStopWritesThreshold: 4, 448 L0CompactionThreshold: 100, 449 L0StopWritesThreshold: 100, 450 DebugCheck: DebugCheckLevels, 451 FormatMajorVersion: internalFormatNewest, 452 }).WithFSDefaults() 453 // Disable automatic compactions because otherwise we'll race with 454 // delete-only compactions triggered by ingesting range tombstones. 455 opts.DisableAutomaticCompactions = true 456 457 var err error 458 d, err = Open(dir, opts) 459 require.NoError(t, err) 460 d.TestOnlyWaitForCleaning() 461 } 462 waitForFlush := func() { 463 if d == nil { 464 return 465 } 466 d.mu.Lock() 467 for d.mu.compact.flushing { 468 d.mu.compact.cond.Wait() 469 } 470 d.mu.Unlock() 471 } 472 reset(false) 473 474 datadriven.RunTest(t, "testdata/flushable_ingest", func(t *testing.T, td *datadriven.TestData) string { 475 switch td.Cmd { 476 case "reset": 477 reset(td.HasArg("strictMem")) 478 return "" 479 480 case "ignoreSyncs": 481 var ignoreSyncs bool 482 if len(td.CmdArgs) == 1 && td.CmdArgs[0].String() == "true" { 483 ignoreSyncs = true 484 } 485 mem.(*vfs.MemFS).SetIgnoreSyncs(ignoreSyncs) 486 return "" 487 488 case "resetToSynced": 489 mem.(*vfs.MemFS).ResetToSyncedState() 490 files, err := mem.List(dir) 491 sort.Strings(files) 492 require.NoError(t, err) 493 return strings.Join(files, "\n") 494 495 case "batch": 496 b := d.NewIndexedBatch() 497 if err := runBatchDefineCmd(td, b); err != nil { 498 return err.Error() 499 } 500 if err := b.Commit(nil); err != nil { 501 return err.Error() 502 } 503 return "" 504 505 case "build": 506 if err := runBuildCmd(td, d, mem); err != nil { 507 return err.Error() 508 } 509 return "" 510 511 case "ingest": 512 if err := runIngestCmd(td, d, mem); err != nil { 513 return err.Error() 514 } 515 if !blockFlush { 516 waitForFlush() 517 } 518 return "" 519 520 case "iter": 521 iter, _ := d.NewIter(nil) 522 return runIterCmd(td, iter, true) 523 524 case "lsm": 525 return runLSMCmd(td, d) 526 527 case "close": 528 if closed { 529 return "already closed" 530 } 531 require.NoError(t, d.Close()) 532 closed = true 533 return "" 534 535 case "ls": 536 files, err := mem.List(dir) 537 sort.Strings(files) 538 require.NoError(t, err) 539 return strings.Join(files, "\n") 540 541 case "open": 542 opts.ReadOnly = td.HasArg("readOnly") 543 var err error 544 d, err = Open(dir, opts) 545 closed = false 546 require.NoError(t, err) 547 waitForFlush() 548 d.TestOnlyWaitForCleaning() 549 return "" 550 551 case "blockFlush": 552 blockFlush = true 553 d.mu.Lock() 554 d.mu.compact.flushing = true 555 d.mu.Unlock() 556 return "" 557 558 case "allowFlush": 559 blockFlush = false 560 d.mu.Lock() 561 d.mu.compact.flushing = false 562 d.mu.Unlock() 563 return "" 564 565 case "flush": 566 d.maybeScheduleFlush() 567 waitForFlush() 568 d.TestOnlyWaitForCleaning() 569 return "" 570 571 case "get": 572 return runGetCmd(t, td, d) 573 574 default: 575 return fmt.Sprintf("unknown command: %s", td.Cmd) 576 } 577 }) 578 } 579 580 func TestExcise(t *testing.T) { 581 var mem vfs.FS 582 var d *DB 583 var flushed bool 584 defer func() { 585 require.NoError(t, d.Close()) 586 }() 587 588 var opts *Options 589 reset := func() { 590 if d != nil { 591 require.NoError(t, d.Close()) 592 } 593 594 mem = vfs.NewMem() 595 require.NoError(t, mem.MkdirAll("ext", 0755)) 596 opts = &Options{ 597 FS: mem, 598 L0CompactionThreshold: 100, 599 L0StopWritesThreshold: 100, 600 DebugCheck: DebugCheckLevels, 601 EventListener: &EventListener{FlushEnd: func(info FlushInfo) { 602 flushed = true 603 }}, 604 FormatMajorVersion: FormatVirtualSSTables, 605 Comparer: testkeys.Comparer, 606 } 607 // Disable automatic compactions because otherwise we'll race with 608 // delete-only compactions triggered by ingesting range tombstones. 609 opts.DisableAutomaticCompactions = true 610 // Set this to true to add some testing for the virtual sstable validation 611 // code paths. 612 opts.Experimental.ValidateOnIngest = true 613 614 var err error 615 d, err = Open("", opts) 616 require.NoError(t, err) 617 } 618 reset() 619 620 datadriven.RunTest(t, "testdata/excise", func(t *testing.T, td *datadriven.TestData) string { 621 switch td.Cmd { 622 case "reset": 623 reset() 624 return "" 625 case "reopen": 626 require.NoError(t, d.Close()) 627 var err error 628 d, err = Open("", opts) 629 require.NoError(t, err) 630 631 return "" 632 case "batch": 633 b := d.NewIndexedBatch() 634 if err := runBatchDefineCmd(td, b); err != nil { 635 return err.Error() 636 } 637 if err := b.Commit(nil); err != nil { 638 return err.Error() 639 } 640 return "" 641 case "build": 642 if err := runBuildCmd(td, d, mem); err != nil { 643 return err.Error() 644 } 645 return "" 646 647 case "flush": 648 if err := d.Flush(); err != nil { 649 return err.Error() 650 } 651 return "" 652 653 case "ingest": 654 flushed = false 655 if err := runIngestCmd(td, d, mem); err != nil { 656 return err.Error() 657 } 658 // Wait for a possible flush. 659 d.mu.Lock() 660 for d.mu.compact.flushing { 661 d.mu.compact.cond.Wait() 662 } 663 d.mu.Unlock() 664 if flushed { 665 return "memtable flushed" 666 } 667 return "" 668 669 case "ingest-and-excise": 670 flushed = false 671 if err := runIngestAndExciseCmd(td, d, mem); err != nil { 672 return err.Error() 673 } 674 // Wait for a possible flush. 675 d.mu.Lock() 676 for d.mu.compact.flushing { 677 d.mu.compact.cond.Wait() 678 } 679 d.mu.Unlock() 680 if flushed { 681 return "memtable flushed" 682 } 683 return "" 684 685 case "get": 686 return runGetCmd(t, td, d) 687 688 case "iter": 689 iter, _ := d.NewIter(&IterOptions{ 690 KeyTypes: IterKeyTypePointsAndRanges, 691 }) 692 return runIterCmd(td, iter, true) 693 694 case "lsm": 695 return runLSMCmd(td, d) 696 697 case "metrics": 698 // The asynchronous loading of table stats can change metrics, so 699 // wait for all the tables' stats to be loaded. 700 d.mu.Lock() 701 d.waitTableStats() 702 d.mu.Unlock() 703 704 return d.Metrics().StringForTests() 705 706 case "wait-pending-table-stats": 707 return runTableStatsCmd(td, d) 708 709 case "excise": 710 ve := &versionEdit{ 711 DeletedFiles: map[deletedFileEntry]*fileMetadata{}, 712 } 713 var exciseSpan KeyRange 714 if len(td.CmdArgs) != 2 { 715 panic("insufficient args for compact command") 716 } 717 exciseSpan.Start = []byte(td.CmdArgs[0].Key) 718 exciseSpan.End = []byte(td.CmdArgs[1].Key) 719 720 d.mu.Lock() 721 d.mu.versions.logLock() 722 d.mu.Unlock() 723 current := d.mu.versions.currentVersion() 724 for level := range current.Levels { 725 iter := current.Levels[level].Iter() 726 for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { 727 _, err := d.excise(exciseSpan, m, ve, level) 728 if err != nil { 729 d.mu.Lock() 730 d.mu.versions.logUnlock() 731 d.mu.Unlock() 732 return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) 733 } 734 } 735 } 736 d.mu.Lock() 737 d.mu.versions.logUnlock() 738 d.mu.Unlock() 739 return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.DebugString(base.DefaultFormatter)) 740 741 case "confirm-backing": 742 // Confirms that the files have the same FileBacking. 743 fileNums := make(map[base.FileNum]struct{}) 744 for i := range td.CmdArgs { 745 fNum, err := strconv.Atoi(td.CmdArgs[i].Key) 746 if err != nil { 747 panic("invalid file number") 748 } 749 fileNums[base.FileNum(fNum)] = struct{}{} 750 } 751 d.mu.Lock() 752 currVersion := d.mu.versions.currentVersion() 753 var ptr *manifest.FileBacking 754 for _, level := range currVersion.Levels { 755 lIter := level.Iter() 756 for f := lIter.First(); f != nil; f = lIter.Next() { 757 if _, ok := fileNums[f.FileNum]; ok { 758 if ptr == nil { 759 ptr = f.FileBacking 760 continue 761 } 762 if f.FileBacking != ptr { 763 d.mu.Unlock() 764 return "file backings are not the same" 765 } 766 } 767 } 768 } 769 d.mu.Unlock() 770 return "file backings are the same" 771 case "compact": 772 if len(td.CmdArgs) != 2 { 773 panic("insufficient args for compact command") 774 } 775 l := td.CmdArgs[0].Key 776 r := td.CmdArgs[1].Key 777 err := d.Compact([]byte(l), []byte(r), false) 778 if err != nil { 779 return err.Error() 780 } 781 return "" 782 default: 783 return fmt.Sprintf("unknown command: %s", td.Cmd) 784 } 785 }) 786 } 787 788 func testIngestSharedImpl( 789 t *testing.T, createOnShared remote.CreateOnSharedStrategy, fileName string, 790 ) { 791 var d, d1, d2 *DB 792 var efos map[string]*EventuallyFileOnlySnapshot 793 defer func() { 794 for _, e := range efos { 795 require.NoError(t, e.Close()) 796 } 797 if d1 != nil { 798 require.NoError(t, d1.Close()) 799 } 800 if d2 != nil { 801 require.NoError(t, d2.Close()) 802 } 803 }() 804 creatorIDCounter := uint64(1) 805 replicateCounter := 1 806 807 reset := func() { 808 for _, e := range efos { 809 require.NoError(t, e.Close()) 810 } 811 if d1 != nil { 812 require.NoError(t, d1.Close()) 813 } 814 if d2 != nil { 815 require.NoError(t, d2.Close()) 816 } 817 efos = make(map[string]*EventuallyFileOnlySnapshot) 818 819 sstorage := remote.NewInMem() 820 mem1 := vfs.NewMem() 821 mem2 := vfs.NewMem() 822 require.NoError(t, mem1.MkdirAll("ext", 0755)) 823 require.NoError(t, mem2.MkdirAll("ext", 0755)) 824 opts1 := &Options{ 825 Comparer: testkeys.Comparer, 826 FS: mem1, 827 LBaseMaxBytes: 1, 828 L0CompactionThreshold: 100, 829 L0StopWritesThreshold: 100, 830 DebugCheck: DebugCheckLevels, 831 FormatMajorVersion: FormatVirtualSSTables, 832 } 833 // lel. 834 lel := MakeLoggingEventListener(DefaultLogger) 835 opts1.EventListener = &lel 836 opts1.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 837 "": sstorage, 838 }) 839 opts1.Experimental.CreateOnShared = createOnShared 840 opts1.Experimental.CreateOnSharedLocator = "" 841 // Disable automatic compactions because otherwise we'll race with 842 // delete-only compactions triggered by ingesting range tombstones. 843 opts1.DisableAutomaticCompactions = true 844 845 opts2 := &Options{} 846 *opts2 = *opts1 847 opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 848 "": sstorage, 849 }) 850 opts2.Experimental.CreateOnShared = createOnShared 851 opts2.Experimental.CreateOnSharedLocator = "" 852 opts2.FS = mem2 853 854 var err error 855 d1, err = Open("", opts1) 856 require.NoError(t, err) 857 require.NoError(t, d1.SetCreatorID(creatorIDCounter)) 858 creatorIDCounter++ 859 d2, err = Open("", opts2) 860 require.NoError(t, err) 861 require.NoError(t, d2.SetCreatorID(creatorIDCounter)) 862 creatorIDCounter++ 863 d = d1 864 } 865 reset() 866 867 datadriven.RunTest(t, fmt.Sprintf("testdata/%s", fileName), func(t *testing.T, td *datadriven.TestData) string { 868 switch td.Cmd { 869 case "reset": 870 reset() 871 return "" 872 case "switch": 873 if len(td.CmdArgs) != 1 { 874 return "usage: switch <1 or 2>" 875 } 876 switch td.CmdArgs[0].Key { 877 case "1": 878 d = d1 879 case "2": 880 d = d2 881 default: 882 return "usage: switch <1 or 2>" 883 } 884 return "ok" 885 case "batch": 886 b := d.NewIndexedBatch() 887 if err := runBatchDefineCmd(td, b); err != nil { 888 return err.Error() 889 } 890 if err := b.Commit(nil); err != nil { 891 return err.Error() 892 } 893 return "" 894 case "build": 895 if err := runBuildCmd(td, d, d.opts.FS); err != nil { 896 return err.Error() 897 } 898 return "" 899 900 case "flush": 901 if err := d.Flush(); err != nil { 902 return err.Error() 903 } 904 return "" 905 906 case "ingest": 907 if err := runIngestCmd(td, d, d.opts.FS); err != nil { 908 return err.Error() 909 } 910 // Wait for a possible flush. 911 d.mu.Lock() 912 for d.mu.compact.flushing { 913 d.mu.compact.cond.Wait() 914 } 915 d.mu.Unlock() 916 return "" 917 918 case "ingest-and-excise": 919 if err := runIngestAndExciseCmd(td, d, d.opts.FS); err != nil { 920 return err.Error() 921 } 922 // Wait for a possible flush. 923 d.mu.Lock() 924 for d.mu.compact.flushing { 925 d.mu.compact.cond.Wait() 926 } 927 d.mu.Unlock() 928 return "" 929 930 case "replicate": 931 if len(td.CmdArgs) != 4 { 932 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 933 } 934 var from, to *DB 935 switch td.CmdArgs[0].Key { 936 case "1": 937 from = d1 938 case "2": 939 from = d2 940 default: 941 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 942 } 943 switch td.CmdArgs[1].Key { 944 case "1": 945 to = d1 946 case "2": 947 to = d2 948 default: 949 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 950 } 951 startKey := []byte(td.CmdArgs[2].Key) 952 endKey := []byte(td.CmdArgs[3].Key) 953 954 writeOpts := d.opts.MakeWriterOptions(0 /* level */, to.opts.FormatMajorVersion.MaxTableFormat()) 955 sstPath := fmt.Sprintf("ext/replicate%d.sst", replicateCounter) 956 f, err := to.opts.FS.Create(sstPath) 957 require.NoError(t, err) 958 replicateCounter++ 959 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) 960 961 var sharedSSTs []SharedSSTMeta 962 err = from.ScanInternal(context.TODO(), startKey, endKey, 963 func(key *InternalKey, value LazyValue, _ IteratorLevel) error { 964 val, _, err := value.Value(nil) 965 require.NoError(t, err) 966 require.NoError(t, w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)) 967 return nil 968 }, 969 func(start, end []byte, seqNum uint64) error { 970 require.NoError(t, w.DeleteRange(start, end)) 971 return nil 972 }, 973 func(start, end []byte, keys []keyspan.Key) error { 974 s := keyspan.Span{ 975 Start: start, 976 End: end, 977 Keys: keys, 978 KeysOrder: 0, 979 } 980 require.NoError(t, rangekey.Encode(&s, func(k base.InternalKey, v []byte) error { 981 return w.AddRangeKey(base.MakeInternalKey(k.UserKey, 0, k.Kind()), v) 982 })) 983 return nil 984 }, 985 func(sst *SharedSSTMeta) error { 986 sharedSSTs = append(sharedSSTs, *sst) 987 return nil 988 }, 989 ) 990 require.NoError(t, err) 991 require.NoError(t, w.Close()) 992 993 _, err = to.IngestAndExcise([]string{sstPath}, sharedSSTs, KeyRange{Start: startKey, End: endKey}) 994 require.NoError(t, err) 995 return fmt.Sprintf("replicated %d shared SSTs", len(sharedSSTs)) 996 997 case "get": 998 return runGetCmd(t, td, d) 999 1000 case "iter": 1001 o := &IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 1002 var reader Reader 1003 reader = d 1004 for _, arg := range td.CmdArgs { 1005 switch arg.Key { 1006 case "mask-suffix": 1007 o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) 1008 case "mask-filter": 1009 o.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { 1010 return sstable.NewTestKeysMaskingFilter() 1011 } 1012 case "snapshot": 1013 reader = efos[arg.Vals[0]] 1014 } 1015 } 1016 iter, err := reader.NewIter(o) 1017 if err != nil { 1018 return err.Error() 1019 } 1020 return runIterCmd(td, iter, true) 1021 1022 case "lsm": 1023 return runLSMCmd(td, d) 1024 1025 case "metrics": 1026 // The asynchronous loading of table stats can change metrics, so 1027 // wait for all the tables' stats to be loaded. 1028 d.mu.Lock() 1029 d.waitTableStats() 1030 d.mu.Unlock() 1031 1032 return d.Metrics().StringForTests() 1033 1034 case "wait-pending-table-stats": 1035 return runTableStatsCmd(td, d) 1036 1037 case "excise": 1038 ve := &versionEdit{ 1039 DeletedFiles: map[deletedFileEntry]*fileMetadata{}, 1040 } 1041 var exciseSpan KeyRange 1042 if len(td.CmdArgs) != 2 { 1043 panic("insufficient args for excise command") 1044 } 1045 exciseSpan.Start = []byte(td.CmdArgs[0].Key) 1046 exciseSpan.End = []byte(td.CmdArgs[1].Key) 1047 1048 d.mu.Lock() 1049 d.mu.versions.logLock() 1050 d.mu.Unlock() 1051 current := d.mu.versions.currentVersion() 1052 for level := range current.Levels { 1053 iter := current.Levels[level].Iter() 1054 for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { 1055 _, err := d.excise(exciseSpan, m, ve, level) 1056 if err != nil { 1057 d.mu.Lock() 1058 d.mu.versions.logUnlock() 1059 d.mu.Unlock() 1060 return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) 1061 } 1062 } 1063 } 1064 d.mu.Lock() 1065 d.mu.versions.logUnlock() 1066 d.mu.Unlock() 1067 return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.String()) 1068 1069 case "file-only-snapshot": 1070 if len(td.CmdArgs) != 1 { 1071 panic("insufficient args for file-only-snapshot command") 1072 } 1073 name := td.CmdArgs[0].Key 1074 var keyRanges []KeyRange 1075 for _, line := range strings.Split(td.Input, "\n") { 1076 fields := strings.Fields(line) 1077 if len(fields) != 2 { 1078 return "expected two fields for file-only snapshot KeyRanges" 1079 } 1080 kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} 1081 keyRanges = append(keyRanges, kr) 1082 } 1083 1084 s := d.NewEventuallyFileOnlySnapshot(keyRanges) 1085 efos[name] = s 1086 return "ok" 1087 1088 case "wait-for-file-only-snapshot": 1089 if len(td.CmdArgs) != 1 { 1090 panic("insufficient args for file-only-snapshot command") 1091 } 1092 name := td.CmdArgs[0].Key 1093 err := efos[name].WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond) 1094 if err != nil { 1095 return err.Error() 1096 } 1097 return "ok" 1098 1099 case "compact": 1100 err := runCompactCmd(td, d) 1101 if err != nil { 1102 return err.Error() 1103 } 1104 return "ok" 1105 default: 1106 return fmt.Sprintf("unknown command: %s", td.Cmd) 1107 } 1108 }) 1109 } 1110 1111 func TestIngestShared(t *testing.T) { 1112 for _, strategy := range []remote.CreateOnSharedStrategy{remote.CreateOnSharedAll, remote.CreateOnSharedLower} { 1113 strategyStr := "all" 1114 if strategy == remote.CreateOnSharedLower { 1115 strategyStr = "lower" 1116 } 1117 t.Run(fmt.Sprintf("createOnShared=%s", strategyStr), func(t *testing.T) { 1118 fileName := "ingest_shared" 1119 if strategy == remote.CreateOnSharedLower { 1120 fileName = "ingest_shared_lower" 1121 } 1122 testIngestSharedImpl(t, strategy, fileName) 1123 }) 1124 } 1125 } 1126 1127 func TestSimpleIngestShared(t *testing.T) { 1128 mem := vfs.NewMem() 1129 var d *DB 1130 var provider2 objstorage.Provider 1131 opts2 := Options{FS: vfs.NewMem(), FormatMajorVersion: FormatVirtualSSTables} 1132 opts2.EnsureDefaults() 1133 1134 // Create an objProvider where we will fake-create some sstables that can 1135 // then be shared back to the db instance. 1136 providerSettings := objstorageprovider.Settings{ 1137 Logger: opts2.Logger, 1138 FS: opts2.FS, 1139 FSDirName: "", 1140 FSDirInitialListing: nil, 1141 FSCleaner: opts2.Cleaner, 1142 NoSyncOnClose: opts2.NoSyncOnClose, 1143 BytesPerSync: opts2.BytesPerSync, 1144 } 1145 providerSettings.Remote.StorageFactory = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1146 "": remote.NewInMem(), 1147 }) 1148 providerSettings.Remote.CreateOnShared = remote.CreateOnSharedAll 1149 providerSettings.Remote.CreateOnSharedLocator = "" 1150 1151 provider2, err := objstorageprovider.Open(providerSettings) 1152 require.NoError(t, err) 1153 creatorIDCounter := uint64(1) 1154 provider2.SetCreatorID(objstorage.CreatorID(creatorIDCounter)) 1155 creatorIDCounter++ 1156 1157 defer func() { 1158 require.NoError(t, d.Close()) 1159 }() 1160 1161 reset := func() { 1162 if d != nil { 1163 require.NoError(t, d.Close()) 1164 } 1165 1166 mem = vfs.NewMem() 1167 require.NoError(t, mem.MkdirAll("ext", 0755)) 1168 opts := &Options{ 1169 FormatMajorVersion: FormatVirtualSSTables, 1170 FS: mem, 1171 L0CompactionThreshold: 100, 1172 L0StopWritesThreshold: 100, 1173 } 1174 opts.Experimental.RemoteStorage = providerSettings.Remote.StorageFactory 1175 opts.Experimental.CreateOnShared = providerSettings.Remote.CreateOnShared 1176 opts.Experimental.CreateOnSharedLocator = providerSettings.Remote.CreateOnSharedLocator 1177 1178 var err error 1179 d, err = Open("", opts) 1180 require.NoError(t, err) 1181 require.NoError(t, d.SetCreatorID(creatorIDCounter)) 1182 creatorIDCounter++ 1183 } 1184 reset() 1185 1186 metaMap := map[base.DiskFileNum]objstorage.ObjectMetadata{} 1187 1188 require.NoError(t, d.Set([]byte("d"), []byte("unexpected"), nil)) 1189 require.NoError(t, d.Set([]byte("e"), []byte("unexpected"), nil)) 1190 require.NoError(t, d.Set([]byte("a"), []byte("unexpected"), nil)) 1191 require.NoError(t, d.Set([]byte("f"), []byte("unexpected"), nil)) 1192 d.Flush() 1193 1194 { 1195 // Create a shared file. 1196 fn := base.FileNum(2) 1197 f, meta, err := provider2.Create(context.TODO(), fileTypeTable, fn.DiskFileNum(), objstorage.CreateOptions{PreferSharedStorage: true}) 1198 require.NoError(t, err) 1199 w := sstable.NewWriter(f, d.opts.MakeWriterOptions(0, d.opts.FormatMajorVersion.MaxTableFormat())) 1200 w.Set([]byte("d"), []byte("shared")) 1201 w.Set([]byte("e"), []byte("shared")) 1202 w.Close() 1203 metaMap[fn.DiskFileNum()] = meta 1204 } 1205 1206 m := metaMap[base.FileNum(2).DiskFileNum()] 1207 handle, err := provider2.RemoteObjectBacking(&m) 1208 require.NoError(t, err) 1209 size, err := provider2.Size(m) 1210 require.NoError(t, err) 1211 1212 sharedSSTMeta := SharedSSTMeta{ 1213 Backing: handle, 1214 Smallest: base.MakeInternalKey([]byte("d"), 0, InternalKeyKindSet), 1215 Largest: base.MakeInternalKey([]byte("e"), 0, InternalKeyKindSet), 1216 SmallestPointKey: base.MakeInternalKey([]byte("d"), 0, InternalKeyKindSet), 1217 LargestPointKey: base.MakeInternalKey([]byte("e"), 0, InternalKeyKindSet), 1218 Level: 6, 1219 Size: uint64(size + 5), 1220 } 1221 _, err = d.IngestAndExcise([]string{}, []SharedSSTMeta{sharedSSTMeta}, KeyRange{Start: []byte("d"), End: []byte("ee")}) 1222 require.NoError(t, err) 1223 1224 // TODO(bilal): Once reading of shared sstables is in, verify that the values 1225 // of d and e have been updated. 1226 } 1227 1228 type blockedCompaction struct { 1229 startBlock, unblock chan struct{} 1230 } 1231 1232 func TestConcurrentExcise(t *testing.T) { 1233 var d, d1, d2 *DB 1234 var efos map[string]*EventuallyFileOnlySnapshot 1235 backgroundErrs := make(chan error, 5) 1236 var compactions map[string]*blockedCompaction 1237 defer func() { 1238 for _, e := range efos { 1239 require.NoError(t, e.Close()) 1240 } 1241 if d1 != nil { 1242 require.NoError(t, d1.Close()) 1243 } 1244 if d2 != nil { 1245 require.NoError(t, d2.Close()) 1246 } 1247 }() 1248 creatorIDCounter := uint64(1) 1249 replicateCounter := 1 1250 1251 var wg sync.WaitGroup 1252 defer wg.Wait() 1253 var blockNextCompaction bool 1254 var blockedJobID int 1255 var blockedCompactionName string 1256 var blockedCompactionsMu sync.Mutex // protects the above three variables. 1257 1258 reset := func() { 1259 wg.Wait() 1260 for _, e := range efos { 1261 require.NoError(t, e.Close()) 1262 } 1263 if d1 != nil { 1264 require.NoError(t, d1.Close()) 1265 } 1266 if d2 != nil { 1267 require.NoError(t, d2.Close()) 1268 } 1269 efos = make(map[string]*EventuallyFileOnlySnapshot) 1270 compactions = make(map[string]*blockedCompaction) 1271 backgroundErrs = make(chan error, 5) 1272 1273 var el EventListener 1274 el.EnsureDefaults(testLogger{t: t}) 1275 el.FlushBegin = func(info FlushInfo) { 1276 // Don't block flushes 1277 } 1278 el.BackgroundError = func(err error) { 1279 backgroundErrs <- err 1280 } 1281 el.CompactionBegin = func(info CompactionInfo) { 1282 if info.Reason == "move" { 1283 return 1284 } 1285 blockedCompactionsMu.Lock() 1286 defer blockedCompactionsMu.Unlock() 1287 if blockNextCompaction { 1288 blockNextCompaction = false 1289 blockedJobID = info.JobID 1290 } 1291 } 1292 el.TableCreated = func(info TableCreateInfo) { 1293 blockedCompactionsMu.Lock() 1294 if info.JobID != blockedJobID { 1295 blockedCompactionsMu.Unlock() 1296 return 1297 } 1298 blockedJobID = 0 1299 c := compactions[blockedCompactionName] 1300 blockedCompactionName = "" 1301 blockedCompactionsMu.Unlock() 1302 c.startBlock <- struct{}{} 1303 <-c.unblock 1304 } 1305 1306 sstorage := remote.NewInMem() 1307 mem1 := vfs.NewMem() 1308 mem2 := vfs.NewMem() 1309 require.NoError(t, mem1.MkdirAll("ext", 0755)) 1310 require.NoError(t, mem2.MkdirAll("ext", 0755)) 1311 opts1 := &Options{ 1312 Comparer: testkeys.Comparer, 1313 LBaseMaxBytes: 1, 1314 FS: mem1, 1315 L0CompactionThreshold: 100, 1316 L0StopWritesThreshold: 100, 1317 DebugCheck: DebugCheckLevels, 1318 FormatMajorVersion: FormatVirtualSSTables, 1319 } 1320 // lel. 1321 lel := MakeLoggingEventListener(DefaultLogger) 1322 tel := TeeEventListener(lel, el) 1323 opts1.EventListener = &tel 1324 opts1.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1325 "": sstorage, 1326 }) 1327 opts1.Experimental.CreateOnShared = remote.CreateOnSharedAll 1328 opts1.Experimental.CreateOnSharedLocator = "" 1329 // Disable automatic compactions because otherwise we'll race with 1330 // delete-only compactions triggered by ingesting range tombstones. 1331 opts1.DisableAutomaticCompactions = true 1332 1333 opts2 := &Options{} 1334 *opts2 = *opts1 1335 opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1336 "": sstorage, 1337 }) 1338 opts2.Experimental.CreateOnShared = remote.CreateOnSharedAll 1339 opts2.Experimental.CreateOnSharedLocator = "" 1340 opts2.FS = mem2 1341 1342 var err error 1343 d1, err = Open("", opts1) 1344 require.NoError(t, err) 1345 require.NoError(t, d1.SetCreatorID(creatorIDCounter)) 1346 creatorIDCounter++ 1347 d2, err = Open("", opts2) 1348 require.NoError(t, err) 1349 require.NoError(t, d2.SetCreatorID(creatorIDCounter)) 1350 creatorIDCounter++ 1351 d = d1 1352 } 1353 reset() 1354 1355 datadriven.RunTest(t, "testdata/concurrent_excise", func(t *testing.T, td *datadriven.TestData) string { 1356 switch td.Cmd { 1357 case "reset": 1358 reset() 1359 return "" 1360 case "switch": 1361 if len(td.CmdArgs) != 1 { 1362 return "usage: switch <1 or 2>" 1363 } 1364 switch td.CmdArgs[0].Key { 1365 case "1": 1366 d = d1 1367 case "2": 1368 d = d2 1369 default: 1370 return "usage: switch <1 or 2>" 1371 } 1372 return "ok" 1373 case "batch": 1374 b := d.NewIndexedBatch() 1375 if err := runBatchDefineCmd(td, b); err != nil { 1376 return err.Error() 1377 } 1378 if err := b.Commit(nil); err != nil { 1379 return err.Error() 1380 } 1381 return "" 1382 case "build": 1383 if err := runBuildCmd(td, d, d.opts.FS); err != nil { 1384 return err.Error() 1385 } 1386 return "" 1387 1388 case "flush": 1389 if err := d.Flush(); err != nil { 1390 return err.Error() 1391 } 1392 return "" 1393 1394 case "ingest": 1395 if err := runIngestCmd(td, d, d.opts.FS); err != nil { 1396 return err.Error() 1397 } 1398 // Wait for a possible flush. 1399 d.mu.Lock() 1400 for d.mu.compact.flushing { 1401 d.mu.compact.cond.Wait() 1402 } 1403 d.mu.Unlock() 1404 return "" 1405 1406 case "ingest-and-excise": 1407 if err := runIngestAndExciseCmd(td, d, d.opts.FS); err != nil { 1408 return err.Error() 1409 } 1410 // Wait for a possible flush. 1411 d.mu.Lock() 1412 for d.mu.compact.flushing { 1413 d.mu.compact.cond.Wait() 1414 } 1415 d.mu.Unlock() 1416 return "" 1417 1418 case "replicate": 1419 if len(td.CmdArgs) != 4 { 1420 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 1421 } 1422 var from, to *DB 1423 switch td.CmdArgs[0].Key { 1424 case "1": 1425 from = d1 1426 case "2": 1427 from = d2 1428 default: 1429 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 1430 } 1431 switch td.CmdArgs[1].Key { 1432 case "1": 1433 to = d1 1434 case "2": 1435 to = d2 1436 default: 1437 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 1438 } 1439 startKey := []byte(td.CmdArgs[2].Key) 1440 endKey := []byte(td.CmdArgs[3].Key) 1441 1442 writeOpts := d.opts.MakeWriterOptions(0 /* level */, to.opts.FormatMajorVersion.MaxTableFormat()) 1443 sstPath := fmt.Sprintf("ext/replicate%d.sst", replicateCounter) 1444 f, err := to.opts.FS.Create(sstPath) 1445 require.NoError(t, err) 1446 replicateCounter++ 1447 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) 1448 1449 var sharedSSTs []SharedSSTMeta 1450 err = from.ScanInternal(context.TODO(), startKey, endKey, 1451 func(key *InternalKey, value LazyValue, _ IteratorLevel) error { 1452 val, _, err := value.Value(nil) 1453 require.NoError(t, err) 1454 require.NoError(t, w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)) 1455 return nil 1456 }, 1457 func(start, end []byte, seqNum uint64) error { 1458 require.NoError(t, w.DeleteRange(start, end)) 1459 return nil 1460 }, 1461 func(start, end []byte, keys []keyspan.Key) error { 1462 s := keyspan.Span{ 1463 Start: start, 1464 End: end, 1465 Keys: keys, 1466 KeysOrder: 0, 1467 } 1468 require.NoError(t, rangekey.Encode(&s, func(k base.InternalKey, v []byte) error { 1469 return w.AddRangeKey(base.MakeInternalKey(k.UserKey, 0, k.Kind()), v) 1470 })) 1471 return nil 1472 }, 1473 func(sst *SharedSSTMeta) error { 1474 sharedSSTs = append(sharedSSTs, *sst) 1475 return nil 1476 }, 1477 ) 1478 require.NoError(t, err) 1479 require.NoError(t, w.Close()) 1480 1481 _, err = to.IngestAndExcise([]string{sstPath}, sharedSSTs, KeyRange{Start: startKey, End: endKey}) 1482 require.NoError(t, err) 1483 return fmt.Sprintf("replicated %d shared SSTs", len(sharedSSTs)) 1484 1485 case "get": 1486 return runGetCmd(t, td, d) 1487 1488 case "iter": 1489 o := &IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 1490 var reader Reader 1491 reader = d 1492 for _, arg := range td.CmdArgs { 1493 switch arg.Key { 1494 case "mask-suffix": 1495 o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) 1496 case "mask-filter": 1497 o.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { 1498 return sstable.NewTestKeysMaskingFilter() 1499 } 1500 case "snapshot": 1501 reader = efos[arg.Vals[0]] 1502 } 1503 } 1504 iter, err := reader.NewIter(o) 1505 if err != nil { 1506 return err.Error() 1507 } 1508 return runIterCmd(td, iter, true) 1509 1510 case "lsm": 1511 return runLSMCmd(td, d) 1512 1513 case "metrics": 1514 // The asynchronous loading of table stats can change metrics, so 1515 // wait for all the tables' stats to be loaded. 1516 d.mu.Lock() 1517 d.waitTableStats() 1518 d.mu.Unlock() 1519 1520 return d.Metrics().StringForTests() 1521 1522 case "wait-pending-table-stats": 1523 return runTableStatsCmd(td, d) 1524 1525 case "excise": 1526 ve := &versionEdit{ 1527 DeletedFiles: map[deletedFileEntry]*fileMetadata{}, 1528 } 1529 var exciseSpan KeyRange 1530 if len(td.CmdArgs) != 2 { 1531 panic("insufficient args for excise command") 1532 } 1533 exciseSpan.Start = []byte(td.CmdArgs[0].Key) 1534 exciseSpan.End = []byte(td.CmdArgs[1].Key) 1535 1536 d.mu.Lock() 1537 d.mu.versions.logLock() 1538 d.mu.Unlock() 1539 current := d.mu.versions.currentVersion() 1540 for level := range current.Levels { 1541 iter := current.Levels[level].Iter() 1542 for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { 1543 _, err := d.excise(exciseSpan, m, ve, level) 1544 if err != nil { 1545 d.mu.Lock() 1546 d.mu.versions.logUnlock() 1547 d.mu.Unlock() 1548 return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) 1549 } 1550 } 1551 } 1552 d.mu.Lock() 1553 d.mu.versions.logUnlock() 1554 d.mu.Unlock() 1555 return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.String()) 1556 1557 case "file-only-snapshot": 1558 if len(td.CmdArgs) != 1 { 1559 panic("insufficient args for file-only-snapshot command") 1560 } 1561 name := td.CmdArgs[0].Key 1562 var keyRanges []KeyRange 1563 for _, line := range strings.Split(td.Input, "\n") { 1564 fields := strings.Fields(line) 1565 if len(fields) != 2 { 1566 return "expected two fields for file-only snapshot KeyRanges" 1567 } 1568 kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} 1569 keyRanges = append(keyRanges, kr) 1570 } 1571 1572 s := d.NewEventuallyFileOnlySnapshot(keyRanges) 1573 efos[name] = s 1574 return "ok" 1575 1576 case "wait-for-file-only-snapshot": 1577 if len(td.CmdArgs) != 1 { 1578 panic("insufficient args for file-only-snapshot command") 1579 } 1580 name := td.CmdArgs[0].Key 1581 err := efos[name].WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond) 1582 if err != nil { 1583 return err.Error() 1584 } 1585 return "ok" 1586 1587 case "unblock": 1588 name := td.CmdArgs[0].Key 1589 blockedCompactionsMu.Lock() 1590 c := compactions[name] 1591 delete(compactions, name) 1592 blockedCompactionsMu.Unlock() 1593 c.unblock <- struct{}{} 1594 return "ok" 1595 1596 case "compact": 1597 async := false 1598 var otherArgs []datadriven.CmdArg 1599 var bc *blockedCompaction 1600 for i := range td.CmdArgs { 1601 switch td.CmdArgs[i].Key { 1602 case "block": 1603 name := td.CmdArgs[i].Vals[0] 1604 bc = &blockedCompaction{startBlock: make(chan struct{}), unblock: make(chan struct{})} 1605 blockedCompactionsMu.Lock() 1606 compactions[name] = bc 1607 blockNextCompaction = true 1608 blockedCompactionName = name 1609 blockedCompactionsMu.Unlock() 1610 async = true 1611 default: 1612 otherArgs = append(otherArgs, td.CmdArgs[i]) 1613 } 1614 } 1615 var tdClone datadriven.TestData 1616 tdClone = *td 1617 tdClone.CmdArgs = otherArgs 1618 if !async { 1619 err := runCompactCmd(td, d) 1620 if err != nil { 1621 return err.Error() 1622 } 1623 } else { 1624 wg.Add(1) 1625 go func() { 1626 defer wg.Done() 1627 _ = runCompactCmd(&tdClone, d) 1628 }() 1629 <-bc.startBlock 1630 return "spun off in separate goroutine" 1631 } 1632 return "ok" 1633 case "wait-for-background-error": 1634 err := <-backgroundErrs 1635 return err.Error() 1636 default: 1637 return fmt.Sprintf("unknown command: %s", td.Cmd) 1638 } 1639 }) 1640 } 1641 1642 func TestIngestExternal(t *testing.T) { 1643 var mem vfs.FS 1644 var d *DB 1645 var flushed bool 1646 defer func() { 1647 require.NoError(t, d.Close()) 1648 }() 1649 1650 var remoteStorage remote.Storage 1651 1652 reset := func() { 1653 if d != nil { 1654 require.NoError(t, d.Close()) 1655 } 1656 1657 mem = vfs.NewMem() 1658 require.NoError(t, mem.MkdirAll("ext", 0755)) 1659 remoteStorage = remote.NewInMem() 1660 opts := &Options{ 1661 FS: mem, 1662 L0CompactionThreshold: 100, 1663 L0StopWritesThreshold: 100, 1664 DebugCheck: DebugCheckLevels, 1665 EventListener: &EventListener{FlushEnd: func(info FlushInfo) { 1666 flushed = true 1667 }}, 1668 FormatMajorVersion: FormatVirtualSSTables, 1669 } 1670 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1671 "external-locator": remoteStorage, 1672 }) 1673 opts.Experimental.CreateOnShared = remote.CreateOnSharedNone 1674 // Disable automatic compactions because otherwise we'll race with 1675 // delete-only compactions triggered by ingesting range tombstones. 1676 opts.DisableAutomaticCompactions = true 1677 1678 var err error 1679 d, err = Open("", opts) 1680 require.NoError(t, err) 1681 require.NoError(t, d.SetCreatorID(1)) 1682 } 1683 reset() 1684 1685 datadriven.RunTest(t, "testdata/ingest_external", func(t *testing.T, td *datadriven.TestData) string { 1686 switch td.Cmd { 1687 case "reset": 1688 reset() 1689 return "" 1690 case "batch": 1691 b := d.NewIndexedBatch() 1692 if err := runBatchDefineCmd(td, b); err != nil { 1693 return err.Error() 1694 } 1695 if err := b.Commit(nil); err != nil { 1696 return err.Error() 1697 } 1698 return "" 1699 case "build-remote": 1700 if err := runBuildRemoteCmd(td, d, remoteStorage); err != nil { 1701 return err.Error() 1702 } 1703 return "" 1704 1705 case "flush": 1706 if err := d.Flush(); err != nil { 1707 return err.Error() 1708 } 1709 return "" 1710 1711 case "ingest-external": 1712 flushed = false 1713 if err := runIngestExternalCmd(td, d, "external-locator"); err != nil { 1714 return err.Error() 1715 } 1716 // Wait for a possible flush. 1717 d.mu.Lock() 1718 for d.mu.compact.flushing { 1719 d.mu.compact.cond.Wait() 1720 } 1721 d.mu.Unlock() 1722 if flushed { 1723 return "memtable flushed" 1724 } 1725 return "" 1726 1727 case "get": 1728 return runGetCmd(t, td, d) 1729 1730 case "iter": 1731 iter, _ := d.NewIter(&IterOptions{ 1732 KeyTypes: IterKeyTypePointsAndRanges, 1733 }) 1734 return runIterCmd(td, iter, true) 1735 1736 case "lsm": 1737 return runLSMCmd(td, d) 1738 1739 case "metrics": 1740 // The asynchronous loading of table stats can change metrics, so 1741 // wait for all the tables' stats to be loaded. 1742 d.mu.Lock() 1743 d.waitTableStats() 1744 d.mu.Unlock() 1745 1746 return d.Metrics().StringForTests() 1747 1748 case "wait-pending-table-stats": 1749 return runTableStatsCmd(td, d) 1750 1751 case "compact": 1752 if len(td.CmdArgs) != 2 { 1753 panic("insufficient args for compact command") 1754 } 1755 l := td.CmdArgs[0].Key 1756 r := td.CmdArgs[1].Key 1757 err := d.Compact([]byte(l), []byte(r), false) 1758 if err != nil { 1759 return err.Error() 1760 } 1761 return "" 1762 default: 1763 return fmt.Sprintf("unknown command: %s", td.Cmd) 1764 } 1765 }) 1766 } 1767 1768 func TestIngestMemtableOverlaps(t *testing.T) { 1769 comparers := []Comparer{ 1770 {Name: "default", Compare: DefaultComparer.Compare, FormatKey: DefaultComparer.FormatKey}, 1771 { 1772 Name: "reverse", 1773 Compare: func(a, b []byte) int { return DefaultComparer.Compare(b, a) }, 1774 FormatKey: DefaultComparer.FormatKey, 1775 }, 1776 } 1777 m := make(map[string]*Comparer) 1778 for i := range comparers { 1779 c := &comparers[i] 1780 m[c.Name] = c 1781 } 1782 1783 for _, comparer := range comparers { 1784 t.Run(comparer.Name, func(t *testing.T) { 1785 var mem *memTable 1786 1787 parseMeta := func(s string) *fileMetadata { 1788 parts := strings.Split(s, "-") 1789 meta := &fileMetadata{} 1790 if len(parts) != 2 { 1791 t.Fatalf("malformed table spec: %s", s) 1792 } 1793 var smallest, largest base.InternalKey 1794 if strings.Contains(parts[0], ".") { 1795 if !strings.Contains(parts[1], ".") { 1796 t.Fatalf("malformed table spec: %s", s) 1797 } 1798 smallest = base.ParseInternalKey(parts[0]) 1799 largest = base.ParseInternalKey(parts[1]) 1800 } else { 1801 smallest = InternalKey{UserKey: []byte(parts[0])} 1802 largest = InternalKey{UserKey: []byte(parts[1])} 1803 } 1804 // If we're using a reverse comparer, flip the file bounds. 1805 if mem.cmp(smallest.UserKey, largest.UserKey) > 0 { 1806 smallest, largest = largest, smallest 1807 } 1808 meta.ExtendPointKeyBounds(comparer.Compare, smallest, largest) 1809 meta.InitPhysicalBacking() 1810 return meta 1811 } 1812 1813 datadriven.RunTest(t, "testdata/ingest_memtable_overlaps", func(t *testing.T, d *datadriven.TestData) string { 1814 switch d.Cmd { 1815 case "define": 1816 b := newBatch(nil) 1817 if err := runBatchDefineCmd(d, b); err != nil { 1818 return err.Error() 1819 } 1820 1821 opts := &Options{ 1822 Comparer: &comparer, 1823 } 1824 opts.EnsureDefaults().WithFSDefaults() 1825 if len(d.CmdArgs) > 1 { 1826 return fmt.Sprintf("%s expects at most 1 argument", d.Cmd) 1827 } 1828 if len(d.CmdArgs) == 1 { 1829 opts.Comparer = m[d.CmdArgs[0].String()] 1830 if opts.Comparer == nil { 1831 return fmt.Sprintf("%s unknown comparer: %s", d.Cmd, d.CmdArgs[0].String()) 1832 } 1833 } 1834 1835 mem = newMemTable(memTableOptions{Options: opts}) 1836 if err := mem.apply(b, 0); err != nil { 1837 return err.Error() 1838 } 1839 return "" 1840 1841 case "overlaps": 1842 var buf bytes.Buffer 1843 for _, data := range strings.Split(d.Input, "\n") { 1844 var keyRanges []internalKeyRange 1845 for _, part := range strings.Fields(data) { 1846 meta := parseMeta(part) 1847 keyRanges = append(keyRanges, internalKeyRange{smallest: meta.Smallest, largest: meta.Largest}) 1848 } 1849 fmt.Fprintf(&buf, "%t\n", ingestMemtableOverlaps(mem.cmp, mem, keyRanges)) 1850 } 1851 return buf.String() 1852 1853 default: 1854 return fmt.Sprintf("unknown command: %s", d.Cmd) 1855 } 1856 }) 1857 }) 1858 } 1859 } 1860 1861 func TestKeyRangeBasic(t *testing.T) { 1862 cmp := base.DefaultComparer.Compare 1863 k1 := KeyRange{Start: []byte("b"), End: []byte("c")} 1864 1865 // Tests for Contains() 1866 require.True(t, k1.Contains(cmp, base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet))) 1867 require.False(t, k1.Contains(cmp, base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet))) 1868 require.True(t, k1.Contains(cmp, base.MakeInternalKey([]byte("bb"), 1, InternalKeyKindSet))) 1869 require.True(t, k1.Contains(cmp, base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, []byte("c")))) 1870 1871 m1 := &fileMetadata{ 1872 Smallest: base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet), 1873 Largest: base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet), 1874 } 1875 require.True(t, k1.Overlaps(cmp, m1)) 1876 m2 := &fileMetadata{ 1877 Smallest: base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet), 1878 Largest: base.MakeInternalKey([]byte("d"), 1, InternalKeyKindSet), 1879 } 1880 require.False(t, k1.Overlaps(cmp, m2)) 1881 m3 := &fileMetadata{ 1882 Smallest: base.MakeInternalKey([]byte("a"), 1, InternalKeyKindSet), 1883 Largest: base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, []byte("b")), 1884 } 1885 require.False(t, k1.Overlaps(cmp, m3)) 1886 m4 := &fileMetadata{ 1887 Smallest: base.MakeInternalKey([]byte("a"), 1, InternalKeyKindSet), 1888 Largest: base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet), 1889 } 1890 require.True(t, k1.Overlaps(cmp, m4)) 1891 } 1892 1893 func BenchmarkIngestOverlappingMemtable(b *testing.B) { 1894 assertNoError := func(err error) { 1895 b.Helper() 1896 if err != nil { 1897 b.Fatal(err) 1898 } 1899 } 1900 1901 for count := 1; count < 6; count++ { 1902 b.Run(fmt.Sprintf("memtables=%d", count), func(b *testing.B) { 1903 for i := 0; i < b.N; i++ { 1904 b.StopTimer() 1905 mem := vfs.NewMem() 1906 d, err := Open("", &Options{ 1907 FS: mem, 1908 }) 1909 assertNoError(err) 1910 1911 // Create memtables. 1912 for { 1913 assertNoError(d.Set([]byte("a"), nil, nil)) 1914 d.mu.Lock() 1915 done := len(d.mu.mem.queue) == count 1916 d.mu.Unlock() 1917 if done { 1918 break 1919 } 1920 } 1921 1922 // Create the overlapping sstable that will force a flush when ingested. 1923 f, err := mem.Create("ext") 1924 assertNoError(err) 1925 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 1926 assertNoError(w.Set([]byte("a"), nil)) 1927 assertNoError(w.Close()) 1928 1929 b.StartTimer() 1930 assertNoError(d.Ingest([]string{"ext"})) 1931 } 1932 }) 1933 } 1934 } 1935 1936 func TestIngestTargetLevel(t *testing.T) { 1937 var d *DB 1938 defer func() { 1939 if d != nil { 1940 // Ignore errors because this test defines fake in-progress transactions 1941 // that prohibit clean shutdown. 1942 _ = d.Close() 1943 } 1944 }() 1945 1946 parseMeta := func(s string) *fileMetadata { 1947 var rkey bool 1948 if len(s) >= 4 && s[0:4] == "rkey" { 1949 rkey = true 1950 s = s[5:] 1951 } 1952 parts := strings.Split(s, "-") 1953 if len(parts) != 2 { 1954 t.Fatalf("malformed table spec: %s", s) 1955 } 1956 var m *fileMetadata 1957 if rkey { 1958 m = (&fileMetadata{}).ExtendRangeKeyBounds( 1959 d.cmp, 1960 InternalKey{UserKey: []byte(parts[0])}, 1961 InternalKey{UserKey: []byte(parts[1])}, 1962 ) 1963 } else { 1964 m = (&fileMetadata{}).ExtendPointKeyBounds( 1965 d.cmp, 1966 InternalKey{UserKey: []byte(parts[0])}, 1967 InternalKey{UserKey: []byte(parts[1])}, 1968 ) 1969 } 1970 m.InitPhysicalBacking() 1971 return m 1972 } 1973 1974 datadriven.RunTest(t, "testdata/ingest_target_level", func(t *testing.T, td *datadriven.TestData) string { 1975 switch td.Cmd { 1976 case "define": 1977 if d != nil { 1978 // Ignore errors because this test defines fake in-progress 1979 // transactions that prohibit clean shutdown. 1980 _ = d.Close() 1981 } 1982 1983 var err error 1984 opts := Options{ 1985 FormatMajorVersion: internalFormatNewest, 1986 } 1987 opts.WithFSDefaults() 1988 if d, err = runDBDefineCmd(td, &opts); err != nil { 1989 return err.Error() 1990 } 1991 1992 readState := d.loadReadState() 1993 c := &checkConfig{ 1994 logger: d.opts.Logger, 1995 comparer: d.opts.Comparer, 1996 readState: readState, 1997 newIters: d.newIters, 1998 // TODO: runDBDefineCmd doesn't properly update the visible 1999 // sequence number. So we have to explicitly configure level checker with a very large 2000 // sequence number, otherwise the DB appears empty. 2001 seqNum: InternalKeySeqNumMax, 2002 } 2003 if err := checkLevelsInternal(c); err != nil { 2004 return err.Error() 2005 } 2006 readState.unref() 2007 2008 d.mu.Lock() 2009 s := d.mu.versions.currentVersion().String() 2010 d.mu.Unlock() 2011 return s 2012 2013 case "target": 2014 var buf bytes.Buffer 2015 suggestSplit := false 2016 for _, cmd := range td.CmdArgs { 2017 switch cmd.Key { 2018 case "suggest-split": 2019 suggestSplit = true 2020 } 2021 } 2022 for _, target := range strings.Split(td.Input, "\n") { 2023 meta := parseMeta(target) 2024 level, overlapFile, err := ingestTargetLevel( 2025 d.newIters, d.tableNewRangeKeyIter, IterOptions{logger: d.opts.Logger}, 2026 d.opts.Comparer, d.mu.versions.currentVersion(), 1, d.mu.compact.inProgress, meta, 2027 suggestSplit) 2028 if err != nil { 2029 return err.Error() 2030 } 2031 if overlapFile != nil { 2032 fmt.Fprintf(&buf, "%d (split file: %s)\n", level, overlapFile.FileNum) 2033 } else { 2034 fmt.Fprintf(&buf, "%d\n", level) 2035 } 2036 } 2037 return buf.String() 2038 2039 default: 2040 return fmt.Sprintf("unknown command: %s", td.Cmd) 2041 } 2042 }) 2043 } 2044 2045 func TestIngest(t *testing.T) { 2046 var mem vfs.FS 2047 var d *DB 2048 var flushed bool 2049 defer func() { 2050 require.NoError(t, d.Close()) 2051 }() 2052 2053 reset := func(split bool) { 2054 if d != nil { 2055 require.NoError(t, d.Close()) 2056 } 2057 2058 mem = vfs.NewMem() 2059 require.NoError(t, mem.MkdirAll("ext", 0755)) 2060 opts := &Options{ 2061 FS: mem, 2062 L0CompactionThreshold: 100, 2063 L0StopWritesThreshold: 100, 2064 DebugCheck: DebugCheckLevels, 2065 EventListener: &EventListener{FlushEnd: func(info FlushInfo) { 2066 flushed = true 2067 }}, 2068 FormatMajorVersion: internalFormatNewest, 2069 } 2070 opts.Experimental.IngestSplit = func() bool { 2071 return split 2072 } 2073 // Disable automatic compactions because otherwise we'll race with 2074 // delete-only compactions triggered by ingesting range tombstones. 2075 opts.DisableAutomaticCompactions = true 2076 2077 var err error 2078 d, err = Open("", opts) 2079 require.NoError(t, err) 2080 } 2081 reset(false /* split */) 2082 2083 datadriven.RunTest(t, "testdata/ingest", func(t *testing.T, td *datadriven.TestData) string { 2084 switch td.Cmd { 2085 case "reset": 2086 split := false 2087 for _, cmd := range td.CmdArgs { 2088 switch cmd.Key { 2089 case "enable-split": 2090 split = true 2091 default: 2092 return fmt.Sprintf("unexpected key: %s", cmd.Key) 2093 } 2094 } 2095 reset(split) 2096 return "" 2097 case "batch": 2098 b := d.NewIndexedBatch() 2099 if err := runBatchDefineCmd(td, b); err != nil { 2100 return err.Error() 2101 } 2102 if err := b.Commit(nil); err != nil { 2103 return err.Error() 2104 } 2105 return "" 2106 2107 case "build": 2108 if err := runBuildCmd(td, d, mem); err != nil { 2109 return err.Error() 2110 } 2111 return "" 2112 2113 case "ingest": 2114 flushed = false 2115 if err := runIngestCmd(td, d, mem); err != nil { 2116 return err.Error() 2117 } 2118 // Wait for a possible flush. 2119 d.mu.Lock() 2120 for d.mu.compact.flushing { 2121 d.mu.compact.cond.Wait() 2122 } 2123 d.mu.Unlock() 2124 if flushed { 2125 return "memtable flushed" 2126 } 2127 return "" 2128 2129 case "get": 2130 return runGetCmd(t, td, d) 2131 2132 case "iter": 2133 iter, _ := d.NewIter(&IterOptions{ 2134 KeyTypes: IterKeyTypePointsAndRanges, 2135 }) 2136 return runIterCmd(td, iter, true) 2137 2138 case "lsm": 2139 return runLSMCmd(td, d) 2140 2141 case "metrics": 2142 // The asynchronous loading of table stats can change metrics, so 2143 // wait for all the tables' stats to be loaded. 2144 d.mu.Lock() 2145 d.waitTableStats() 2146 d.mu.Unlock() 2147 2148 return d.Metrics().StringForTests() 2149 2150 case "wait-pending-table-stats": 2151 return runTableStatsCmd(td, d) 2152 2153 case "compact": 2154 if len(td.CmdArgs) != 2 { 2155 panic("insufficient args for compact command") 2156 } 2157 l := td.CmdArgs[0].Key 2158 r := td.CmdArgs[1].Key 2159 err := d.Compact([]byte(l), []byte(r), false) 2160 if err != nil { 2161 return err.Error() 2162 } 2163 return "" 2164 default: 2165 return fmt.Sprintf("unknown command: %s", td.Cmd) 2166 } 2167 }) 2168 } 2169 2170 func TestIngestError(t *testing.T) { 2171 for i := int32(0); ; i++ { 2172 mem := vfs.NewMem() 2173 2174 f0, err := mem.Create("ext0") 2175 require.NoError(t, err) 2176 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{}) 2177 require.NoError(t, w.Set([]byte("d"), nil)) 2178 require.NoError(t, w.Close()) 2179 f1, err := mem.Create("ext1") 2180 require.NoError(t, err) 2181 w = sstable.NewWriter(objstorageprovider.NewFileWritable(f1), sstable.WriterOptions{}) 2182 require.NoError(t, w.Set([]byte("d"), nil)) 2183 require.NoError(t, w.Close()) 2184 2185 inj := errorfs.OnIndex(-1) 2186 d, err := Open("", &Options{ 2187 FS: errorfs.Wrap(mem, inj), 2188 Logger: panicLogger{}, 2189 L0CompactionThreshold: 8, 2190 }) 2191 require.NoError(t, err) 2192 // Force the creation of an L0 sstable that overlaps with the tables 2193 // we'll attempt to ingest. This ensures that we exercise filesystem 2194 // codepaths when determining the ingest target level. 2195 require.NoError(t, d.Set([]byte("a"), nil, nil)) 2196 require.NoError(t, d.Set([]byte("d"), nil, nil)) 2197 require.NoError(t, d.Flush()) 2198 2199 t.Run(fmt.Sprintf("index-%d", i), func(t *testing.T) { 2200 defer func() { 2201 if r := recover(); r != nil { 2202 if e, ok := r.(error); ok && errors.Is(e, errorfs.ErrInjected) { 2203 return 2204 } 2205 // d.opts.Logger.Fatalf won't propagate ErrInjected 2206 // itself, but should contain the error message. 2207 if strings.HasSuffix(fmt.Sprint(r), errorfs.ErrInjected.Error()) { 2208 return 2209 } 2210 t.Fatal(r) 2211 } 2212 }() 2213 2214 inj.SetIndex(i) 2215 err1 := d.Ingest([]string{"ext0"}) 2216 err2 := d.Ingest([]string{"ext1"}) 2217 err := firstError(err1, err2) 2218 if err != nil && !errors.Is(err, errorfs.ErrInjected) { 2219 t.Fatal(err) 2220 } 2221 }) 2222 2223 // d.Close may error if we failed to flush the manifest. 2224 _ = d.Close() 2225 2226 // If the injector's index is non-negative, the i-th filesystem 2227 // operation was never executed. 2228 if inj.Index() >= 0 { 2229 break 2230 } 2231 } 2232 } 2233 2234 func TestIngestIdempotence(t *testing.T) { 2235 // Use an on-disk filesystem, because Ingest with a MemFS will copy, not 2236 // link the ingested file. 2237 dir, err := os.MkdirTemp("", "ingest-idempotence") 2238 require.NoError(t, err) 2239 defer os.RemoveAll(dir) 2240 fs := vfs.Default 2241 2242 path := fs.PathJoin(dir, "ext") 2243 f, err := fs.Create(fs.PathJoin(dir, "ext")) 2244 require.NoError(t, err) 2245 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2246 require.NoError(t, w.Set([]byte("d"), nil)) 2247 require.NoError(t, w.Close()) 2248 2249 d, err := Open(dir, &Options{ 2250 FS: fs, 2251 }) 2252 require.NoError(t, err) 2253 const count = 4 2254 for i := 0; i < count; i++ { 2255 ingestPath := fs.PathJoin(dir, fmt.Sprintf("ext%d", i)) 2256 require.NoError(t, fs.Link(path, ingestPath)) 2257 require.NoError(t, d.Ingest([]string{ingestPath})) 2258 } 2259 require.NoError(t, d.Close()) 2260 } 2261 2262 func TestIngestCompact(t *testing.T) { 2263 mem := vfs.NewMem() 2264 lel := MakeLoggingEventListener(&base.InMemLogger{}) 2265 d, err := Open("", &Options{ 2266 EventListener: &lel, 2267 FS: mem, 2268 L0CompactionThreshold: 1, 2269 L0StopWritesThreshold: 1, 2270 }) 2271 require.NoError(t, err) 2272 2273 src := func(i int) string { 2274 return fmt.Sprintf("ext%d", i) 2275 } 2276 f, err := mem.Create(src(0)) 2277 require.NoError(t, err) 2278 2279 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2280 key := []byte("a") 2281 require.NoError(t, w.Add(base.MakeInternalKey(key, 0, InternalKeyKindSet), nil)) 2282 require.NoError(t, w.Close()) 2283 2284 // Make N copies of the sstable. 2285 const count = 20 2286 for i := 1; i < count; i++ { 2287 require.NoError(t, vfs.Copy(d.opts.FS, src(0), src(i))) 2288 } 2289 2290 // Ingest the same sstable multiple times. Compaction should take place as 2291 // ingestion happens, preventing an indefinite write stall from occurring. 2292 for i := 0; i < count; i++ { 2293 if i == 10 { 2294 // Half-way through the ingestions, set a key in the memtable to force 2295 // overlap with the memtable which will require the memtable to be 2296 // flushed. 2297 require.NoError(t, d.Set(key, nil, nil)) 2298 } 2299 require.NoError(t, d.Ingest([]string{src(i)})) 2300 } 2301 2302 require.NoError(t, d.Close()) 2303 } 2304 2305 func TestConcurrentIngest(t *testing.T) { 2306 mem := vfs.NewMem() 2307 d, err := Open("", &Options{ 2308 FS: mem, 2309 }) 2310 require.NoError(t, err) 2311 2312 // Create an sstable with 2 keys. This is necessary to trigger the overlap 2313 // bug because an sstable with a single key will not have overlap in internal 2314 // key space and the sequence number assignment had already guaranteed 2315 // correct ordering. 2316 src := func(i int) string { 2317 return fmt.Sprintf("ext%d", i) 2318 } 2319 f, err := mem.Create(src(0)) 2320 require.NoError(t, err) 2321 2322 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2323 require.NoError(t, w.Set([]byte("a"), nil)) 2324 require.NoError(t, w.Set([]byte("b"), nil)) 2325 require.NoError(t, w.Close()) 2326 2327 // Make N copies of the sstable. 2328 errCh := make(chan error, 5) 2329 for i := 1; i < cap(errCh); i++ { 2330 require.NoError(t, vfs.Copy(d.opts.FS, src(0), src(i))) 2331 } 2332 2333 // Perform N ingestions concurrently. 2334 for i := 0; i < cap(errCh); i++ { 2335 go func(i int) { 2336 err := d.Ingest([]string{src(i)}) 2337 if err == nil { 2338 if _, err = d.opts.FS.Stat(src(i)); oserror.IsNotExist(err) { 2339 err = nil 2340 } 2341 } 2342 errCh <- err 2343 }(i) 2344 } 2345 for i := 0; i < cap(errCh); i++ { 2346 require.NoError(t, <-errCh) 2347 } 2348 2349 require.NoError(t, d.Close()) 2350 } 2351 2352 func TestConcurrentIngestCompact(t *testing.T) { 2353 for i := 0; i < 2; i++ { 2354 t.Run("", func(t *testing.T) { 2355 mem := vfs.NewMem() 2356 compactionReady := make(chan struct{}) 2357 compactionBegin := make(chan struct{}) 2358 d, err := Open("", &Options{ 2359 FS: mem, 2360 EventListener: &EventListener{ 2361 TableCreated: func(info TableCreateInfo) { 2362 if info.Reason == "compacting" { 2363 close(compactionReady) 2364 <-compactionBegin 2365 } 2366 }, 2367 }, 2368 }) 2369 require.NoError(t, err) 2370 2371 ingest := func(keys ...string) { 2372 t.Helper() 2373 f, err := mem.Create("ext") 2374 require.NoError(t, err) 2375 2376 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2377 for _, k := range keys { 2378 require.NoError(t, w.Set([]byte(k), nil)) 2379 } 2380 require.NoError(t, w.Close()) 2381 require.NoError(t, d.Ingest([]string{"ext"})) 2382 } 2383 2384 compact := func(start, end string) { 2385 t.Helper() 2386 require.NoError(t, d.Compact([]byte(start), []byte(end), false)) 2387 } 2388 2389 lsm := func() string { 2390 d.mu.Lock() 2391 s := d.mu.versions.currentVersion().String() 2392 d.mu.Unlock() 2393 return s 2394 } 2395 2396 expectLSM := func(expected string) { 2397 t.Helper() 2398 expected = strings.TrimSpace(expected) 2399 actual := strings.TrimSpace(lsm()) 2400 if expected != actual { 2401 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 2402 } 2403 } 2404 2405 ingest("a") 2406 ingest("a") 2407 ingest("c") 2408 ingest("c") 2409 2410 expectLSM(` 2411 0.0: 2412 000005:[a#11,SET-a#11,SET] 2413 000007:[c#13,SET-c#13,SET] 2414 6: 2415 000004:[a#10,SET-a#10,SET] 2416 000006:[c#12,SET-c#12,SET] 2417 `) 2418 2419 // At this point ingestion of an sstable containing only key "b" will be 2420 // targeted at L6. Yet a concurrent compaction of sstables 5 and 7 will 2421 // create a new sstable in L6 spanning ["a"-"c"]. So the ingestion must 2422 // actually target L5. 2423 2424 switch i { 2425 case 0: 2426 // Compact, then ingest. 2427 go func() { 2428 <-compactionReady 2429 2430 ingest("b") 2431 2432 close(compactionBegin) 2433 }() 2434 2435 compact("a", "z") 2436 2437 expectLSM(` 2438 0.0: 2439 000009:[b#14,SET-b#14,SET] 2440 6: 2441 000008:[a#0,SET-c#0,SET] 2442 `) 2443 2444 case 1: 2445 // Ingest, then compact 2446 var wg sync.WaitGroup 2447 wg.Add(1) 2448 go func() { 2449 defer wg.Done() 2450 close(compactionBegin) 2451 compact("a", "z") 2452 }() 2453 2454 ingest("b") 2455 wg.Wait() 2456 2457 // Because we're performing the ingestion and compaction concurrently, 2458 // we can't guarantee any particular LSM structure at this point. The 2459 // test will fail with an assertion error due to overlapping sstables 2460 // if there is insufficient synchronization between ingestion and 2461 // compaction. 2462 } 2463 2464 require.NoError(t, d.Close()) 2465 }) 2466 } 2467 } 2468 2469 func TestIngestFlushQueuedMemTable(t *testing.T) { 2470 // Verify that ingestion forces a flush of a queued memtable. 2471 2472 // Test with a format major version prior to FormatFlushableIngest and one 2473 // after. Both should result in the same statistic calculations. 2474 for _, fmv := range []FormatMajorVersion{FormatFlushableIngest - 1, internalFormatNewest} { 2475 func(fmv FormatMajorVersion) { 2476 mem := vfs.NewMem() 2477 d, err := Open("", &Options{ 2478 FS: mem, 2479 FormatMajorVersion: fmv, 2480 }) 2481 require.NoError(t, err) 2482 2483 // Add the key "a" to the memtable, then fill up the memtable with the key 2484 // "b". The ingested sstable will only overlap with the queued memtable. 2485 require.NoError(t, d.Set([]byte("a"), nil, nil)) 2486 for { 2487 require.NoError(t, d.Set([]byte("b"), nil, nil)) 2488 d.mu.Lock() 2489 done := len(d.mu.mem.queue) == 2 2490 d.mu.Unlock() 2491 if done { 2492 break 2493 } 2494 } 2495 2496 ingest := func(keys ...string) { 2497 t.Helper() 2498 f, err := mem.Create("ext") 2499 require.NoError(t, err) 2500 2501 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 2502 TableFormat: fmv.MinTableFormat(), 2503 }) 2504 for _, k := range keys { 2505 require.NoError(t, w.Set([]byte(k), nil)) 2506 } 2507 require.NoError(t, w.Close()) 2508 stats, err := d.IngestWithStats([]string{"ext"}) 2509 require.NoError(t, err) 2510 require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) 2511 require.Equal(t, stats.MemtableOverlappingFiles, 1) 2512 require.Less(t, uint64(0), stats.Bytes) 2513 } 2514 2515 ingest("a") 2516 2517 require.NoError(t, d.Close()) 2518 }(fmv) 2519 } 2520 } 2521 2522 func TestIngestStats(t *testing.T) { 2523 mem := vfs.NewMem() 2524 d, err := Open("", &Options{ 2525 FS: mem, 2526 }) 2527 require.NoError(t, err) 2528 2529 ingest := func(expectedLevel int, keys ...string) { 2530 t.Helper() 2531 f, err := mem.Create("ext") 2532 require.NoError(t, err) 2533 2534 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2535 for _, k := range keys { 2536 require.NoError(t, w.Set([]byte(k), nil)) 2537 } 2538 require.NoError(t, w.Close()) 2539 stats, err := d.IngestWithStats([]string{"ext"}) 2540 require.NoError(t, err) 2541 if expectedLevel == 0 { 2542 require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) 2543 } else { 2544 require.EqualValues(t, 0, stats.ApproxIngestedIntoL0Bytes) 2545 } 2546 require.Less(t, uint64(0), stats.Bytes) 2547 } 2548 ingest(6, "a") 2549 ingest(0, "a") 2550 ingest(6, "b", "g") 2551 ingest(0, "c") 2552 require.NoError(t, d.Close()) 2553 } 2554 2555 func TestIngestFlushQueuedLargeBatch(t *testing.T) { 2556 // Verify that ingestion forces a flush of a queued large batch. 2557 2558 mem := vfs.NewMem() 2559 d, err := Open("", &Options{ 2560 FS: mem, 2561 }) 2562 require.NoError(t, err) 2563 2564 // The default large batch threshold is slightly less than 1/2 of the 2565 // memtable size which makes triggering a problem with flushing queued large 2566 // batches irritating. Manually adjust the threshold to 1/8 of the memtable 2567 // size in order to more easily create a situation where a large batch is 2568 // queued but not automatically flushed. 2569 d.mu.Lock() 2570 d.largeBatchThreshold = d.opts.MemTableSize / 8 2571 d.mu.Unlock() 2572 2573 // Set a record with a large value. This will be transformed into a large 2574 // batch and placed in the flushable queue. 2575 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), int(d.largeBatchThreshold)), nil)) 2576 2577 ingest := func(keys ...string) { 2578 t.Helper() 2579 f, err := mem.Create("ext") 2580 require.NoError(t, err) 2581 2582 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2583 for _, k := range keys { 2584 require.NoError(t, w.Set([]byte(k), nil)) 2585 } 2586 require.NoError(t, w.Close()) 2587 require.NoError(t, d.Ingest([]string{"ext"})) 2588 } 2589 2590 ingest("a") 2591 2592 require.NoError(t, d.Close()) 2593 } 2594 2595 func TestIngestMemtablePendingOverlap(t *testing.T) { 2596 mem := vfs.NewMem() 2597 d, err := Open("", &Options{ 2598 FS: mem, 2599 }) 2600 require.NoError(t, err) 2601 2602 d.mu.Lock() 2603 // Use a custom commit pipeline apply function to give us control over 2604 // timing of events. 2605 assignedBatch := make(chan struct{}) 2606 applyBatch := make(chan struct{}) 2607 originalApply := d.commit.env.apply 2608 d.commit.env.apply = func(b *Batch, mem *memTable) error { 2609 assignedBatch <- struct{}{} 2610 applyBatch <- struct{}{} 2611 return originalApply(b, mem) 2612 } 2613 d.mu.Unlock() 2614 2615 ingest := func(keys ...string) { 2616 t.Helper() 2617 f, err := mem.Create("ext") 2618 require.NoError(t, err) 2619 2620 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2621 for _, k := range keys { 2622 require.NoError(t, w.Set([]byte(k), nil)) 2623 } 2624 require.NoError(t, w.Close()) 2625 require.NoError(t, d.Ingest([]string{"ext"})) 2626 } 2627 2628 var wg sync.WaitGroup 2629 wg.Add(2) 2630 2631 // First, Set('c') begins. This call will: 2632 // 2633 // * enqueue the batch to the pending queue. 2634 // * allocate a sequence number `x`. 2635 // * write the batch to the WAL. 2636 // 2637 // and then block until we read from the `applyBatch` channel down below. 2638 go func() { 2639 err := d.Set([]byte("c"), nil, nil) 2640 if err != nil { 2641 t.Error(err) 2642 } 2643 wg.Done() 2644 }() 2645 2646 // When the above Set('c') is ready to apply, it sends on the 2647 // `assignedBatch` channel. Once that happens, we start Ingest('a', 'c'). 2648 // The Ingest('a', 'c') allocates sequence number `x + 1`. 2649 go func() { 2650 // Wait until the Set has grabbed a sequence number before ingesting. 2651 <-assignedBatch 2652 ingest("a", "c") 2653 wg.Done() 2654 }() 2655 2656 // The Set('c')#1 and Ingest('a', 'c')#2 are both pending. To maintain 2657 // sequence number invariants, the Set needs to be applied and flushed 2658 // before the Ingest determines its target level. 2659 // 2660 // Sleep a bit to ensure that the Ingest has time to call into 2661 // AllocateSeqNum. Once it allocates its sequence number, it should see 2662 // that there are unpublished sequence numbers below it and spin until the 2663 // Set's sequence number is published. After sleeping, read from 2664 // `applyBatch` to actually allow the Set to apply and publish its 2665 // sequence number. 2666 time.Sleep(100 * time.Millisecond) 2667 <-applyBatch 2668 2669 // Wait for both calls to complete. 2670 wg.Wait() 2671 require.NoError(t, d.Flush()) 2672 require.NoError(t, d.CheckLevels(nil)) 2673 require.NoError(t, d.Close()) 2674 } 2675 2676 type testLogger struct { 2677 t testing.TB 2678 } 2679 2680 func (l testLogger) Infof(format string, args ...interface{}) { 2681 l.t.Logf(format, args...) 2682 } 2683 2684 func (l testLogger) Fatalf(format string, args ...interface{}) { 2685 l.t.Fatalf(format, args...) 2686 } 2687 2688 // TestIngestMemtableOverlapRace is a regression test for the race described in 2689 // #2196. If an ingest that checks for overlap with the mutable memtable and 2690 // finds no overlap, it must not allow overlapping keys with later sequence 2691 // numbers to be applied to the memtable and the memtable to be flushed before 2692 // the ingest completes. 2693 // 2694 // This test operates by committing the same key concurrently: 2695 // - 1 goroutine repeatedly ingests the same sstable writing the key `foo` 2696 // - n goroutines repeatedly apply batches writing the key `foo` and trigger 2697 // flushes. 2698 // 2699 // After a while, the database is closed and the manifest is verified. Version 2700 // edits should contain new files with monotonically increasing sequence 2701 // numbers, since every flush and every ingest conflicts with one another. 2702 func TestIngestMemtableOverlapRace(t *testing.T) { 2703 mem := vfs.NewMem() 2704 el := MakeLoggingEventListener(testLogger{t: t}) 2705 d, err := Open("", &Options{ 2706 FS: mem, 2707 // Disable automatic compactions to keep the manifest clean; only 2708 // flushes and ingests. 2709 DisableAutomaticCompactions: true, 2710 // Disable the WAL to speed up batch commits. 2711 DisableWAL: true, 2712 EventListener: &el, 2713 // We're endlessly appending to L0 without clearing it, so set a maximal 2714 // stop writes threshold. 2715 L0StopWritesThreshold: math.MaxInt, 2716 // Accumulating more than 1 immutable memtable doesn't help us exercise 2717 // the bug, since the committed keys need to be flushed promptly. 2718 MemTableStopWritesThreshold: 2, 2719 }) 2720 require.NoError(t, err) 2721 2722 // Prepare a sstable `ext` deleting foo. 2723 f, err := mem.Create("ext") 2724 require.NoError(t, err) 2725 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2726 require.NoError(t, w.Delete([]byte("foo"))) 2727 require.NoError(t, w.Close()) 2728 2729 var done atomic.Bool 2730 const numSetters = 2 2731 var wg sync.WaitGroup 2732 wg.Add(numSetters + 1) 2733 2734 untilDone := func(fn func()) { 2735 defer wg.Done() 2736 for !done.Load() { 2737 fn() 2738 } 2739 } 2740 2741 // Ingest in the background. 2742 totalIngests := 0 2743 go untilDone(func() { 2744 filename := fmt.Sprintf("ext%d", totalIngests) 2745 require.NoError(t, mem.Link("ext", filename)) 2746 require.NoError(t, d.Ingest([]string{filename})) 2747 totalIngests++ 2748 }) 2749 2750 // Apply batches and trigger flushes in the background. 2751 wo := &WriteOptions{Sync: false} 2752 var localCommits [numSetters]int 2753 for i := 0; i < numSetters; i++ { 2754 i := i 2755 v := []byte(fmt.Sprintf("v%d", i+1)) 2756 go untilDone(func() { 2757 // Commit a batch setting foo=vN. 2758 b := d.NewBatch() 2759 require.NoError(t, b.Set([]byte("foo"), v, nil)) 2760 require.NoError(t, b.Commit(wo)) 2761 localCommits[i]++ 2762 d.AsyncFlush() 2763 }) 2764 } 2765 time.Sleep(100 * time.Millisecond) 2766 done.Store(true) 2767 wg.Wait() 2768 2769 var totalCommits int 2770 for i := 0; i < numSetters; i++ { 2771 totalCommits += localCommits[i] 2772 } 2773 m := d.Metrics() 2774 tot := m.Total() 2775 t.Logf("Committed %d batches.", totalCommits) 2776 t.Logf("Flushed %d times.", m.Flush.Count) 2777 t.Logf("Ingested %d sstables.", tot.TablesIngested) 2778 require.NoError(t, d.CheckLevels(nil)) 2779 require.NoError(t, d.Close()) 2780 2781 // Replay the manifest. Every flush and ingest is a separate version edit. 2782 // Since they all write the same key and compactions are disabled, sequence 2783 // numbers of new files should be monotonically increasing. 2784 // 2785 // This check is necessary because most of these sstables are ingested into 2786 // L0. The L0 sublevels construction will order them by LargestSeqNum, even 2787 // if they're added to L0 out-of-order. The CheckLevels call at the end of 2788 // the test may find that the sublevels are all appropriately ordered, but 2789 // the manifest may reveal they were added to the LSM out-of-order. 2790 dbDesc, err := Peek("", mem) 2791 require.NoError(t, err) 2792 require.True(t, dbDesc.Exists) 2793 f, err = mem.Open(dbDesc.ManifestFilename) 2794 require.NoError(t, err) 2795 defer f.Close() 2796 rr := record.NewReader(f, 0 /* logNum */) 2797 var largest *fileMetadata 2798 for { 2799 r, err := rr.Next() 2800 if err == io.EOF || err == record.ErrInvalidChunk { 2801 break 2802 } 2803 require.NoError(t, err) 2804 var ve manifest.VersionEdit 2805 require.NoError(t, ve.Decode(r)) 2806 t.Log(ve.String()) 2807 for _, f := range ve.NewFiles { 2808 if largest != nil { 2809 require.Equal(t, 0, f.Level) 2810 if largest.LargestSeqNum > f.Meta.LargestSeqNum { 2811 t.Fatalf("previous largest file %s has sequence number > next file %s", largest, f.Meta) 2812 } 2813 } 2814 largest = f.Meta 2815 } 2816 } 2817 } 2818 2819 type ingestCrashFS struct { 2820 vfs.FS 2821 } 2822 2823 func (fs ingestCrashFS) Link(oldname, newname string) error { 2824 if err := fs.FS.Link(oldname, newname); err != nil { 2825 return err 2826 } 2827 panic(errorfs.ErrInjected) 2828 } 2829 2830 type noRemoveFS struct { 2831 vfs.FS 2832 } 2833 2834 func (fs noRemoveFS) Remove(string) error { 2835 return errorfs.ErrInjected 2836 } 2837 2838 func TestIngestFileNumReuseCrash(t *testing.T) { 2839 const count = 10 2840 // Use an on-disk filesystem, because Ingest with a MemFS will copy, not 2841 // link the ingested file. 2842 dir, err := os.MkdirTemp("", "ingest-filenum-reuse") 2843 require.NoError(t, err) 2844 defer os.RemoveAll(dir) 2845 fs := vfs.Default 2846 2847 readFile := func(s string) []byte { 2848 f, err := fs.Open(fs.PathJoin(dir, s)) 2849 require.NoError(t, err) 2850 b, err := io.ReadAll(f) 2851 require.NoError(t, err) 2852 require.NoError(t, f.Close()) 2853 return b 2854 } 2855 2856 // Create sstables to ingest. 2857 var files []string 2858 var fileBytes [][]byte 2859 for i := 0; i < count; i++ { 2860 name := fmt.Sprintf("ext%d", i) 2861 f, err := fs.Create(fs.PathJoin(dir, name)) 2862 require.NoError(t, err) 2863 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2864 require.NoError(t, w.Set([]byte(fmt.Sprintf("foo%d", i)), nil)) 2865 require.NoError(t, w.Close()) 2866 files = append(files, name) 2867 fileBytes = append(fileBytes, readFile(name)) 2868 } 2869 2870 // Open a database with a filesystem that will successfully link the 2871 // ingested files but then panic. This is an approximation of what a crash 2872 // after linking but before updating the manifest would look like. 2873 d, err := Open(dir, &Options{ 2874 FS: ingestCrashFS{FS: fs}, 2875 }) 2876 // A flush here ensures the file num bumps from creating OPTIONS files, 2877 // etc get recorded in the manifest. We want the nextFileNum after the 2878 // restart to be the same as one of our ingested sstables. 2879 require.NoError(t, err) 2880 require.NoError(t, d.Set([]byte("boop"), nil, nil)) 2881 require.NoError(t, d.Flush()) 2882 for _, f := range files { 2883 func() { 2884 defer func() { err = recover().(error) }() 2885 err = d.Ingest([]string{fs.PathJoin(dir, f)}) 2886 }() 2887 if err == nil || !errors.Is(err, errorfs.ErrInjected) { 2888 t.Fatalf("expected injected error, got %v", err) 2889 } 2890 } 2891 // Leave something in the WAL so that Open will flush while replaying the 2892 // WAL. 2893 require.NoError(t, d.Set([]byte("wal"), nil, nil)) 2894 require.NoError(t, d.Close()) 2895 2896 // There are now two links to each external file: the original extX link 2897 // and a numbered sstable link. The sstable files are still not a part of 2898 // the manifest and so they may be overwritten. Open will detect the 2899 // obsolete number sstables and try to remove them. The FS here is wrapped 2900 // to induce errors on Remove calls. Even if we're unsuccessful in 2901 // removing the obsolete files, the external files should not be 2902 // overwritten. 2903 d, err = Open(dir, &Options{FS: noRemoveFS{FS: fs}}) 2904 require.NoError(t, err) 2905 require.NoError(t, d.Set([]byte("bar"), nil, nil)) 2906 require.NoError(t, d.Flush()) 2907 require.NoError(t, d.Close()) 2908 2909 // None of the external files should change despite modifying the linked 2910 // versions. 2911 for i, f := range files { 2912 afterBytes := readFile(f) 2913 require.Equal(t, fileBytes[i], afterBytes) 2914 } 2915 } 2916 2917 func TestIngest_UpdateSequenceNumber(t *testing.T) { 2918 mem := vfs.NewMem() 2919 cmp := base.DefaultComparer.Compare 2920 parse := func(input string) (*sstable.Writer, error) { 2921 f, err := mem.Create("ext") 2922 if err != nil { 2923 return nil, err 2924 } 2925 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 2926 TableFormat: sstable.TableFormatMax, 2927 }) 2928 for _, data := range strings.Split(input, "\n") { 2929 if strings.HasPrefix(data, "rangekey: ") { 2930 data = strings.TrimPrefix(data, "rangekey: ") 2931 s := keyspan.ParseSpan(data) 2932 err := rangekey.Encode(&s, w.AddRangeKey) 2933 if err != nil { 2934 return nil, err 2935 } 2936 continue 2937 } 2938 j := strings.Index(data, ":") 2939 if j < 0 { 2940 return nil, errors.Newf("malformed input: %s\n", data) 2941 } 2942 key := base.ParseInternalKey(data[:j]) 2943 value := []byte(data[j+1:]) 2944 if err := w.Add(key, value); err != nil { 2945 return nil, err 2946 } 2947 } 2948 return w, nil 2949 } 2950 2951 var ( 2952 seqnum uint64 2953 err error 2954 metas []*fileMetadata 2955 ) 2956 datadriven.RunTest(t, "testdata/ingest_update_seqnums", func(t *testing.T, td *datadriven.TestData) string { 2957 switch td.Cmd { 2958 case "starting-seqnum": 2959 seqnum, err = strconv.ParseUint(td.Input, 10, 64) 2960 if err != nil { 2961 return err.Error() 2962 } 2963 return "" 2964 2965 case "reset": 2966 metas = metas[:0] 2967 return "" 2968 2969 case "load": 2970 w, err := parse(td.Input) 2971 if err != nil { 2972 return err.Error() 2973 } 2974 if err = w.Close(); err != nil { 2975 return err.Error() 2976 } 2977 defer w.Close() 2978 2979 // Format the bounds of the table. 2980 wm, err := w.Metadata() 2981 if err != nil { 2982 return err.Error() 2983 } 2984 2985 // Upper bounds for range dels and range keys are expected to be sentinel 2986 // keys. 2987 maybeUpdateUpperBound := func(key base.InternalKey) base.InternalKey { 2988 switch k := key.Kind(); { 2989 case k == base.InternalKeyKindRangeDelete: 2990 key.Trailer = base.InternalKeyRangeDeleteSentinel 2991 case rangekey.IsRangeKey(k): 2992 return base.MakeExclusiveSentinelKey(k, key.UserKey) 2993 } 2994 return key 2995 } 2996 2997 // Construct the file metadata from the writer metadata. 2998 m := &fileMetadata{ 2999 SmallestSeqNum: 0, // Simulate an ingestion. 3000 LargestSeqNum: 0, 3001 } 3002 if wm.HasPointKeys { 3003 m.ExtendPointKeyBounds(cmp, wm.SmallestPoint, wm.LargestPoint) 3004 } 3005 if wm.HasRangeDelKeys { 3006 m.ExtendPointKeyBounds( 3007 cmp, 3008 wm.SmallestRangeDel, 3009 maybeUpdateUpperBound(wm.LargestRangeDel), 3010 ) 3011 } 3012 if wm.HasRangeKeys { 3013 m.ExtendRangeKeyBounds( 3014 cmp, 3015 wm.SmallestRangeKey, 3016 maybeUpdateUpperBound(wm.LargestRangeKey), 3017 ) 3018 } 3019 m.InitPhysicalBacking() 3020 if err := m.Validate(cmp, base.DefaultFormatter); err != nil { 3021 return err.Error() 3022 } 3023 3024 // Collect this file. 3025 metas = append(metas, m) 3026 3027 // Return an index number for the file. 3028 return fmt.Sprintf("file %d\n", len(metas)-1) 3029 3030 case "update-files": 3031 // Update the bounds across all files. 3032 if err = ingestUpdateSeqNum(cmp, base.DefaultFormatter, seqnum, ingestLoadResult{localMeta: metas}); err != nil { 3033 return err.Error() 3034 } 3035 3036 var buf bytes.Buffer 3037 for i, m := range metas { 3038 fmt.Fprintf(&buf, "file %d:\n", i) 3039 fmt.Fprintf(&buf, " combined: %s-%s\n", m.Smallest, m.Largest) 3040 fmt.Fprintf(&buf, " points: %s-%s\n", m.SmallestPointKey, m.LargestPointKey) 3041 fmt.Fprintf(&buf, " ranges: %s-%s\n", m.SmallestRangeKey, m.LargestRangeKey) 3042 } 3043 3044 return buf.String() 3045 3046 default: 3047 return fmt.Sprintf("unknown command %s\n", td.Cmd) 3048 } 3049 }) 3050 } 3051 3052 func TestIngestCleanup(t *testing.T) { 3053 fns := []base.FileNum{0, 1, 2} 3054 3055 testCases := []struct { 3056 closeFiles []base.FileNum 3057 cleanupFiles []base.FileNum 3058 wantErr string 3059 }{ 3060 // Close and remove all files. 3061 { 3062 closeFiles: fns, 3063 cleanupFiles: fns, 3064 }, 3065 // Remove a non-existent file. 3066 { 3067 closeFiles: fns, 3068 cleanupFiles: []base.FileNum{3}, 3069 wantErr: "unknown to the objstorage provider", 3070 }, 3071 // Remove a file that has not been closed. 3072 { 3073 closeFiles: []base.FileNum{0, 2}, 3074 cleanupFiles: fns, 3075 wantErr: oserror.ErrInvalid.Error(), 3076 }, 3077 // Remove all files, one of which is still open, plus a file that does not exist. 3078 { 3079 closeFiles: []base.FileNum{0, 2}, 3080 cleanupFiles: []base.FileNum{0, 1, 2, 3}, 3081 wantErr: oserror.ErrInvalid.Error(), // The first error encountered is due to the open file. 3082 }, 3083 } 3084 3085 for _, tc := range testCases { 3086 t.Run("", func(t *testing.T) { 3087 mem := vfs.NewMem() 3088 mem.UseWindowsSemantics(true) 3089 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) 3090 require.NoError(t, err) 3091 defer objProvider.Close() 3092 3093 // Create the files in the VFS. 3094 metaMap := make(map[base.FileNum]objstorage.Writable) 3095 for _, fn := range fns { 3096 w, _, err := objProvider.Create(context.Background(), base.FileTypeTable, fn.DiskFileNum(), objstorage.CreateOptions{}) 3097 require.NoError(t, err) 3098 3099 metaMap[fn] = w 3100 } 3101 3102 // Close a select number of files. 3103 for _, m := range tc.closeFiles { 3104 w, ok := metaMap[m] 3105 if !ok { 3106 continue 3107 } 3108 require.NoError(t, w.Finish()) 3109 } 3110 3111 // Cleanup the set of files in the FS. 3112 var toRemove []*fileMetadata 3113 for _, fn := range tc.cleanupFiles { 3114 m := &fileMetadata{FileNum: fn} 3115 m.InitPhysicalBacking() 3116 toRemove = append(toRemove, m) 3117 } 3118 3119 err = ingestCleanup(objProvider, toRemove) 3120 if tc.wantErr != "" { 3121 require.Error(t, err, "got no error, expected %s", tc.wantErr) 3122 require.Contains(t, err.Error(), tc.wantErr) 3123 } else { 3124 require.NoError(t, err) 3125 } 3126 }) 3127 } 3128 } 3129 3130 // fatalCapturingLogger captures a fatal error instead of panicking. 3131 type fatalCapturingLogger struct { 3132 t testing.TB 3133 err error 3134 } 3135 3136 // Infof implements the Logger interface. 3137 func (l *fatalCapturingLogger) Infof(fmt string, args ...interface{}) { 3138 l.t.Logf(fmt, args...) 3139 } 3140 3141 // Fatalf implements the Logger interface. 3142 func (l *fatalCapturingLogger) Fatalf(_ string, args ...interface{}) { 3143 l.err = args[0].(error) 3144 } 3145 3146 func TestIngestValidation(t *testing.T) { 3147 type keyVal struct { 3148 key, val []byte 3149 } 3150 type corruptionLocation int 3151 const ( 3152 corruptionLocationNone corruptionLocation = iota 3153 corruptionLocationStart 3154 corruptionLocationEnd 3155 corruptionLocationInternal 3156 ) 3157 type errLocation int 3158 const ( 3159 errLocationNone errLocation = iota 3160 errLocationIngest 3161 errLocationValidation 3162 ) 3163 const ( 3164 nKeys = 1_000 3165 keySize = 16 3166 valSize = 100 3167 blockSize = 100 3168 3169 ingestTableName = "ext" 3170 ) 3171 ingestPath := filepath.Join(t.TempDir(), ingestTableName) 3172 3173 seed := uint64(time.Now().UnixNano()) 3174 rng := rand.New(rand.NewSource(seed)) 3175 t.Logf("rng seed = %d", seed) 3176 3177 testCases := []struct { 3178 description string 3179 cLoc corruptionLocation 3180 wantErrType errLocation 3181 }{ 3182 { 3183 description: "no corruption", 3184 cLoc: corruptionLocationNone, 3185 wantErrType: errLocationNone, 3186 }, 3187 { 3188 description: "start block", 3189 cLoc: corruptionLocationStart, 3190 wantErrType: errLocationIngest, 3191 }, 3192 { 3193 description: "end block", 3194 cLoc: corruptionLocationEnd, 3195 wantErrType: errLocationIngest, 3196 }, 3197 { 3198 description: "non-end block", 3199 cLoc: corruptionLocationInternal, 3200 wantErrType: errLocationValidation, 3201 }, 3202 } 3203 3204 for _, tc := range testCases { 3205 t.Run(tc.description, func(t *testing.T) { 3206 var wg sync.WaitGroup 3207 wg.Add(1) 3208 3209 fs := vfs.NewMem() 3210 logger := &fatalCapturingLogger{t: t} 3211 opts := &Options{ 3212 FS: fs, 3213 Logger: logger, 3214 EventListener: &EventListener{ 3215 TableValidated: func(i TableValidatedInfo) { 3216 wg.Done() 3217 }, 3218 }, 3219 } 3220 opts.Experimental.ValidateOnIngest = true 3221 d, err := Open("", opts) 3222 require.NoError(t, err) 3223 defer func() { require.NoError(t, d.Close()) }() 3224 3225 corrupt := func(f vfs.File) { 3226 readable, err := sstable.NewSimpleReadable(f) 3227 require.NoError(t, err) 3228 // Compute the layout of the sstable in order to find the 3229 // appropriate block locations to corrupt. 3230 r, err := sstable.NewReader(readable, sstable.ReaderOptions{}) 3231 require.NoError(t, err) 3232 l, err := r.Layout() 3233 require.NoError(t, err) 3234 require.NoError(t, r.Close()) 3235 3236 // Select an appropriate data block to corrupt. 3237 var blockIdx int 3238 switch tc.cLoc { 3239 case corruptionLocationStart: 3240 blockIdx = 0 3241 case corruptionLocationEnd: 3242 blockIdx = len(l.Data) - 1 3243 case corruptionLocationInternal: 3244 blockIdx = 1 + rng.Intn(len(l.Data)-2) 3245 default: 3246 t.Fatalf("unknown corruptionLocation: %T", tc.cLoc) 3247 } 3248 bh := l.Data[blockIdx] 3249 3250 osF, err := os.OpenFile(ingestPath, os.O_RDWR, 0600) 3251 require.NoError(t, err) 3252 defer func() { require.NoError(t, osF.Close()) }() 3253 3254 // Corrupting a key will cause the ingestion to fail due to a 3255 // malformed key, rather than a block checksum mismatch. 3256 // Instead, we corrupt the last byte in the selected block, 3257 // before the trailer, which corresponds to a value. 3258 offset := bh.Offset + bh.Length - 1 3259 _, err = osF.WriteAt([]byte("\xff"), int64(offset)) 3260 require.NoError(t, err) 3261 } 3262 3263 type errT struct { 3264 errLoc errLocation 3265 err error 3266 } 3267 runIngest := func(keyVals []keyVal) (et errT) { 3268 // The vfs.File does not allow for random reads and writes. 3269 // Create a disk-backed file outside of the DB FS that we can 3270 // open as a regular os.File, if required. 3271 tmpFS := vfs.Default 3272 f, err := tmpFS.Create(ingestPath) 3273 require.NoError(t, err) 3274 defer func() { _ = tmpFS.Remove(ingestPath) }() 3275 3276 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 3277 BlockSize: blockSize, // Create many smaller blocks. 3278 Compression: NoCompression, // For simpler debugging. 3279 }) 3280 for _, kv := range keyVals { 3281 require.NoError(t, w.Set(kv.key, kv.val)) 3282 } 3283 require.NoError(t, w.Close()) 3284 3285 // Possibly corrupt the file. 3286 if tc.cLoc != corruptionLocationNone { 3287 f, err = tmpFS.Open(ingestPath) 3288 require.NoError(t, err) 3289 corrupt(f) 3290 } 3291 3292 // Copy the file into the DB's FS. 3293 _, err = vfs.Clone(tmpFS, fs, ingestPath, ingestTableName) 3294 require.NoError(t, err) 3295 3296 // Ingest the external table. 3297 err = d.Ingest([]string{ingestTableName}) 3298 if err != nil { 3299 et.errLoc = errLocationIngest 3300 et.err = err 3301 return 3302 } 3303 3304 // Wait for the validation on the sstable to complete. 3305 wg.Wait() 3306 3307 // Return any error encountered during validation. 3308 if logger.err != nil { 3309 et.errLoc = errLocationValidation 3310 et.err = logger.err 3311 } 3312 3313 return 3314 } 3315 3316 // Construct a set of keys to ingest. 3317 var keyVals []keyVal 3318 for i := 0; i < nKeys; i++ { 3319 key := make([]byte, keySize) 3320 _, err = rng.Read(key) 3321 require.NoError(t, err) 3322 3323 val := make([]byte, valSize) 3324 _, err = rng.Read(val) 3325 require.NoError(t, err) 3326 3327 keyVals = append(keyVals, keyVal{key, val}) 3328 } 3329 3330 // Keys must be sorted. 3331 sort.Slice(keyVals, func(i, j int) bool { 3332 return d.cmp(keyVals[i].key, keyVals[j].key) <= 0 3333 }) 3334 3335 // Run the ingestion. 3336 et := runIngest(keyVals) 3337 3338 // Assert we saw the errors we expect. 3339 switch tc.wantErrType { 3340 case errLocationNone: 3341 require.Equal(t, errLocationNone, et.errLoc) 3342 require.NoError(t, et.err) 3343 case errLocationIngest: 3344 require.Equal(t, errLocationIngest, et.errLoc) 3345 require.Error(t, et.err) 3346 require.True(t, errors.Is(et.err, base.ErrCorruption)) 3347 case errLocationValidation: 3348 require.Equal(t, errLocationValidation, et.errLoc) 3349 require.Error(t, et.err) 3350 require.True(t, errors.Is(et.err, base.ErrCorruption)) 3351 default: 3352 t.Fatalf("unknown wantErrType %T", tc.wantErrType) 3353 } 3354 }) 3355 } 3356 } 3357 3358 // BenchmarkManySSTables measures the cost of various operations with various 3359 // counts of SSTables within the database. 3360 func BenchmarkManySSTables(b *testing.B) { 3361 counts := []int{10, 1_000, 10_000, 100_000, 1_000_000} 3362 ops := []string{"ingest", "calculateInuseKeyRanges"} 3363 for _, op := range ops { 3364 b.Run(op, func(b *testing.B) { 3365 for _, count := range counts { 3366 b.Run(fmt.Sprintf("sstables=%d", count), func(b *testing.B) { 3367 mem := vfs.NewMem() 3368 d, err := Open("", &Options{ 3369 FS: mem, 3370 }) 3371 require.NoError(b, err) 3372 3373 var paths []string 3374 for i := 0; i < count; i++ { 3375 n := fmt.Sprintf("%07d", i) 3376 f, err := mem.Create(n) 3377 require.NoError(b, err) 3378 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3379 require.NoError(b, w.Set([]byte(n), nil)) 3380 require.NoError(b, w.Close()) 3381 paths = append(paths, n) 3382 } 3383 require.NoError(b, d.Ingest(paths)) 3384 3385 { 3386 const broadIngest = "broad.sst" 3387 f, err := mem.Create(broadIngest) 3388 require.NoError(b, err) 3389 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3390 require.NoError(b, w.Set([]byte("0"), nil)) 3391 require.NoError(b, w.Set([]byte("Z"), nil)) 3392 require.NoError(b, w.Close()) 3393 require.NoError(b, d.Ingest([]string{broadIngest})) 3394 } 3395 3396 switch op { 3397 case "ingest": 3398 runBenchmarkManySSTablesIngest(b, d, mem, count) 3399 case "calculateInuseKeyRanges": 3400 runBenchmarkManySSTablesInUseKeyRanges(b, d, count) 3401 } 3402 require.NoError(b, d.Close()) 3403 }) 3404 } 3405 }) 3406 } 3407 } 3408 3409 func runBenchmarkManySSTablesIngest(b *testing.B, d *DB, fs vfs.FS, count int) { 3410 b.ResetTimer() 3411 for i := 0; i < b.N; i++ { 3412 n := fmt.Sprintf("%07d", count+i) 3413 f, err := fs.Create(n) 3414 require.NoError(b, err) 3415 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3416 require.NoError(b, w.Set([]byte(n), nil)) 3417 require.NoError(b, w.Close()) 3418 require.NoError(b, d.Ingest([]string{n})) 3419 } 3420 } 3421 3422 func runBenchmarkManySSTablesInUseKeyRanges(b *testing.B, d *DB, count int) { 3423 // This benchmark is pretty contrived, but it's not easy to write a 3424 // microbenchmark for this in a more natural way. L6 has many files, and 3425 // L5 has 1 file spanning the entire breadth of L5. 3426 d.mu.Lock() 3427 defer d.mu.Unlock() 3428 v := d.mu.versions.currentVersion() 3429 b.ResetTimer() 3430 3431 smallest := []byte("0") 3432 largest := []byte("z") 3433 for i := 0; i < b.N; i++ { 3434 _ = calculateInuseKeyRanges(v, d.cmp, 0, numLevels-1, smallest, largest) 3435 } 3436 }