github.com/cockroachdb/pebble@v1.1.2/ingest_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "math" 13 "os" 14 "path/filepath" 15 "runtime" 16 "sort" 17 "strconv" 18 "strings" 19 "sync" 20 "sync/atomic" 21 "testing" 22 "time" 23 24 "github.com/cockroachdb/datadriven" 25 "github.com/cockroachdb/errors" 26 "github.com/cockroachdb/errors/oserror" 27 "github.com/cockroachdb/pebble/internal/base" 28 "github.com/cockroachdb/pebble/internal/keyspan" 29 "github.com/cockroachdb/pebble/internal/manifest" 30 "github.com/cockroachdb/pebble/internal/rangekey" 31 "github.com/cockroachdb/pebble/internal/testkeys" 32 "github.com/cockroachdb/pebble/objstorage" 33 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 34 "github.com/cockroachdb/pebble/objstorage/remote" 35 "github.com/cockroachdb/pebble/record" 36 "github.com/cockroachdb/pebble/sstable" 37 "github.com/cockroachdb/pebble/vfs" 38 "github.com/cockroachdb/pebble/vfs/errorfs" 39 "github.com/kr/pretty" 40 "github.com/stretchr/testify/require" 41 "golang.org/x/exp/rand" 42 ) 43 44 func TestSSTableKeyCompare(t *testing.T) { 45 var buf bytes.Buffer 46 datadriven.RunTest(t, "testdata/sstable_key_compare", func(t *testing.T, td *datadriven.TestData) string { 47 switch td.Cmd { 48 case "cmp": 49 buf.Reset() 50 for _, line := range strings.Split(td.Input, "\n") { 51 fields := strings.Fields(line) 52 a := base.ParseInternalKey(fields[0]) 53 b := base.ParseInternalKey(fields[1]) 54 got := sstableKeyCompare(testkeys.Comparer.Compare, a, b) 55 fmt.Fprintf(&buf, "%38s", fmt.Sprint(a.Pretty(base.DefaultFormatter))) 56 switch got { 57 case -1: 58 fmt.Fprint(&buf, " < ") 59 case +1: 60 fmt.Fprint(&buf, " > ") 61 case 0: 62 fmt.Fprint(&buf, " = ") 63 } 64 fmt.Fprintf(&buf, "%s\n", fmt.Sprint(b.Pretty(base.DefaultFormatter))) 65 } 66 return buf.String() 67 default: 68 return fmt.Sprintf("unrecognized command %q", td.Cmd) 69 } 70 }) 71 } 72 73 func TestIngestLoad(t *testing.T) { 74 mem := vfs.NewMem() 75 76 datadriven.RunTest(t, "testdata/ingest_load", func(t *testing.T, td *datadriven.TestData) string { 77 switch td.Cmd { 78 case "load": 79 writerOpts := sstable.WriterOptions{} 80 var dbVersion FormatMajorVersion 81 for _, cmdArgs := range td.CmdArgs { 82 v, err := strconv.Atoi(cmdArgs.Vals[0]) 83 if err != nil { 84 return err.Error() 85 } 86 switch k := cmdArgs.Key; k { 87 case "writer-version": 88 fmv := FormatMajorVersion(v) 89 writerOpts.TableFormat = fmv.MaxTableFormat() 90 case "db-version": 91 dbVersion = FormatMajorVersion(v) 92 default: 93 return fmt.Sprintf("unknown cmd %s\n", k) 94 } 95 } 96 f, err := mem.Create("ext") 97 if err != nil { 98 return err.Error() 99 } 100 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writerOpts) 101 for _, data := range strings.Split(td.Input, "\n") { 102 if strings.HasPrefix(data, "rangekey: ") { 103 data = strings.TrimPrefix(data, "rangekey: ") 104 s := keyspan.ParseSpan(data) 105 err := rangekey.Encode(&s, w.AddRangeKey) 106 if err != nil { 107 return err.Error() 108 } 109 continue 110 } 111 112 j := strings.Index(data, ":") 113 if j < 0 { 114 return fmt.Sprintf("malformed input: %s\n", data) 115 } 116 key := base.ParseInternalKey(data[:j]) 117 value := []byte(data[j+1:]) 118 if err := w.Add(key, value); err != nil { 119 return err.Error() 120 } 121 } 122 if err := w.Close(); err != nil { 123 return err.Error() 124 } 125 126 opts := (&Options{ 127 Comparer: DefaultComparer, 128 FS: mem, 129 }).WithFSDefaults() 130 lr, err := ingestLoad(opts, dbVersion, []string{"ext"}, nil, nil, 0, []base.DiskFileNum{base.FileNum(1).DiskFileNum()}, nil, 0) 131 if err != nil { 132 return err.Error() 133 } 134 var buf bytes.Buffer 135 for _, m := range lr.localMeta { 136 fmt.Fprintf(&buf, "%d: %s-%s\n", m.FileNum, m.Smallest, m.Largest) 137 fmt.Fprintf(&buf, " points: %s-%s\n", m.SmallestPointKey, m.LargestPointKey) 138 fmt.Fprintf(&buf, " ranges: %s-%s\n", m.SmallestRangeKey, m.LargestRangeKey) 139 } 140 return buf.String() 141 142 default: 143 return fmt.Sprintf("unknown command: %s", td.Cmd) 144 } 145 }) 146 } 147 148 func TestIngestLoadRand(t *testing.T) { 149 mem := vfs.NewMem() 150 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 151 cmp := DefaultComparer.Compare 152 version := internalFormatNewest 153 154 randBytes := func(size int) []byte { 155 data := make([]byte, size) 156 for i := range data { 157 data[i] = byte(rng.Int() & 0xff) 158 } 159 return data 160 } 161 162 paths := make([]string, 1+rng.Intn(10)) 163 pending := make([]base.DiskFileNum, len(paths)) 164 expected := make([]*fileMetadata, len(paths)) 165 for i := range paths { 166 paths[i] = fmt.Sprint(i) 167 pending[i] = base.FileNum(rng.Uint64()).DiskFileNum() 168 expected[i] = &fileMetadata{ 169 FileNum: pending[i].FileNum(), 170 } 171 expected[i].StatsMarkValid() 172 173 func() { 174 f, err := mem.Create(paths[i]) 175 require.NoError(t, err) 176 177 keys := make([]InternalKey, 1+rng.Intn(100)) 178 for i := range keys { 179 keys[i] = base.MakeInternalKey( 180 randBytes(1+rng.Intn(10)), 181 0, 182 InternalKeyKindSet) 183 } 184 sort.Slice(keys, func(i, j int) bool { 185 return base.InternalCompare(cmp, keys[i], keys[j]) < 0 186 }) 187 188 expected[i].ExtendPointKeyBounds(cmp, keys[0], keys[len(keys)-1]) 189 190 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 191 TableFormat: version.MaxTableFormat(), 192 }) 193 var count uint64 194 for i := range keys { 195 if i > 0 && base.InternalCompare(cmp, keys[i-1], keys[i]) == 0 { 196 // Duplicate key, ignore. 197 continue 198 } 199 w.Add(keys[i], nil) 200 count++ 201 } 202 expected[i].Stats.NumEntries = count 203 require.NoError(t, w.Close()) 204 205 meta, err := w.Metadata() 206 require.NoError(t, err) 207 208 expected[i].Size = meta.Size 209 expected[i].InitPhysicalBacking() 210 }() 211 } 212 213 opts := (&Options{ 214 Comparer: DefaultComparer, 215 FS: mem, 216 }).WithFSDefaults() 217 lr, err := ingestLoad(opts, version, paths, nil, nil, 0, pending, nil, 0) 218 require.NoError(t, err) 219 220 for _, m := range lr.localMeta { 221 m.CreationTime = 0 222 } 223 t.Log(strings.Join(pretty.Diff(expected, lr.localMeta), "\n")) 224 require.Equal(t, expected, lr.localMeta) 225 } 226 227 func TestIngestLoadInvalid(t *testing.T) { 228 mem := vfs.NewMem() 229 f, err := mem.Create("invalid") 230 require.NoError(t, err) 231 require.NoError(t, f.Close()) 232 233 opts := (&Options{ 234 Comparer: DefaultComparer, 235 FS: mem, 236 }).WithFSDefaults() 237 if _, err := ingestLoad(opts, internalFormatNewest, []string{"invalid"}, nil, nil, 0, []base.DiskFileNum{base.FileNum(1).DiskFileNum()}, nil, 0); err == nil { 238 t.Fatalf("expected error, but found success") 239 } 240 } 241 242 func TestIngestSortAndVerify(t *testing.T) { 243 comparers := map[string]Compare{ 244 "default": DefaultComparer.Compare, 245 "reverse": func(a, b []byte) int { 246 return DefaultComparer.Compare(b, a) 247 }, 248 } 249 250 t.Run("", func(t *testing.T) { 251 datadriven.RunTest(t, "testdata/ingest_sort_and_verify", func(t *testing.T, d *datadriven.TestData) string { 252 switch d.Cmd { 253 case "ingest": 254 var buf bytes.Buffer 255 var meta []*fileMetadata 256 var paths []string 257 var cmpName string 258 d.ScanArgs(t, "cmp", &cmpName) 259 cmp := comparers[cmpName] 260 if cmp == nil { 261 return fmt.Sprintf("%s unknown comparer: %s", d.Cmd, cmpName) 262 } 263 for i, data := range strings.Split(d.Input, "\n") { 264 parts := strings.Split(data, "-") 265 if len(parts) != 2 { 266 return fmt.Sprintf("malformed test case: %s", d.Input) 267 } 268 smallest := base.ParseInternalKey(parts[0]) 269 largest := base.ParseInternalKey(parts[1]) 270 if cmp(smallest.UserKey, largest.UserKey) > 0 { 271 return fmt.Sprintf("range %v-%v is not valid", smallest, largest) 272 } 273 m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest) 274 m.InitPhysicalBacking() 275 meta = append(meta, m) 276 paths = append(paths, strconv.Itoa(i)) 277 } 278 lr := ingestLoadResult{localPaths: paths, localMeta: meta} 279 err := ingestSortAndVerify(cmp, lr, KeyRange{}) 280 if err != nil { 281 return fmt.Sprintf("%v\n", err) 282 } 283 for i := range meta { 284 fmt.Fprintf(&buf, "%s: %v-%v\n", paths[i], meta[i].Smallest, meta[i].Largest) 285 } 286 return buf.String() 287 288 default: 289 return fmt.Sprintf("unknown command: %s", d.Cmd) 290 } 291 }) 292 }) 293 } 294 295 func TestIngestLink(t *testing.T) { 296 // Test linking of tables into the DB directory. Test cleanup when one of the 297 // tables cannot be linked. 298 299 const dir = "db" 300 const count = 10 301 for i := 0; i <= count; i++ { 302 t.Run("", func(t *testing.T) { 303 opts := &Options{FS: vfs.NewMem()} 304 opts.EnsureDefaults().WithFSDefaults() 305 require.NoError(t, opts.FS.MkdirAll(dir, 0755)) 306 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(opts.FS, dir)) 307 require.NoError(t, err) 308 defer objProvider.Close() 309 310 paths := make([]string, 10) 311 meta := make([]*fileMetadata, len(paths)) 312 contents := make([][]byte, len(paths)) 313 for j := range paths { 314 paths[j] = fmt.Sprintf("external%d", j) 315 meta[j] = &fileMetadata{} 316 meta[j].FileNum = FileNum(j) 317 meta[j].InitPhysicalBacking() 318 f, err := opts.FS.Create(paths[j]) 319 require.NoError(t, err) 320 321 contents[j] = []byte(fmt.Sprintf("data%d", j)) 322 // memFile.Write will modify the supplied buffer when invariants are 323 // enabled, so provide a throw-away copy. 324 _, err = f.Write(append([]byte(nil), contents[j]...)) 325 require.NoError(t, err) 326 require.NoError(t, f.Close()) 327 } 328 329 if i < count { 330 opts.FS.Remove(paths[i]) 331 } 332 333 lr := ingestLoadResult{localMeta: meta, localPaths: paths} 334 err = ingestLink(0 /* jobID */, opts, objProvider, lr, nil /* shared */) 335 if i < count { 336 if err == nil { 337 t.Fatalf("expected error, but found success") 338 } 339 } else { 340 require.NoError(t, err) 341 } 342 343 files, err := opts.FS.List(dir) 344 require.NoError(t, err) 345 346 sort.Strings(files) 347 348 if i < count { 349 if len(files) > 0 { 350 t.Fatalf("expected all of the files to be cleaned up, but found:\n%s", 351 strings.Join(files, "\n")) 352 } 353 } else { 354 if len(files) != count { 355 t.Fatalf("expected %d files, but found:\n%s", count, strings.Join(files, "\n")) 356 } 357 for j := range files { 358 ftype, fileNum, ok := base.ParseFilename(opts.FS, files[j]) 359 if !ok { 360 t.Fatalf("unable to parse filename: %s", files[j]) 361 } 362 if fileTypeTable != ftype { 363 t.Fatalf("expected table, but found %d", ftype) 364 } 365 if j != int(fileNum.FileNum()) { 366 t.Fatalf("expected table %d, but found %d", j, fileNum) 367 } 368 f, err := opts.FS.Open(opts.FS.PathJoin(dir, files[j])) 369 require.NoError(t, err) 370 371 data, err := io.ReadAll(f) 372 require.NoError(t, err) 373 require.NoError(t, f.Close()) 374 if !bytes.Equal(contents[j], data) { 375 t.Fatalf("expected %s, but found %s", contents[j], data) 376 } 377 } 378 } 379 }) 380 } 381 } 382 383 func TestIngestLinkFallback(t *testing.T) { 384 // Verify that ingestLink succeeds if linking fails by falling back to 385 // copying. 386 mem := vfs.NewMem() 387 src, err := mem.Create("source") 388 require.NoError(t, err) 389 390 opts := &Options{FS: errorfs.Wrap(mem, errorfs.OnIndex(1))} 391 opts.EnsureDefaults().WithFSDefaults() 392 objSettings := objstorageprovider.DefaultSettings(opts.FS, "") 393 // Prevent the provider from listing the dir (where we may get an injected error). 394 objSettings.FSDirInitialListing = []string{} 395 objProvider, err := objstorageprovider.Open(objSettings) 396 require.NoError(t, err) 397 defer objProvider.Close() 398 399 meta := []*fileMetadata{{FileNum: 1}} 400 meta[0].InitPhysicalBacking() 401 lr := ingestLoadResult{localMeta: meta, localPaths: []string{"source"}} 402 err = ingestLink(0, opts, objProvider, lr, nil /* shared */) 403 require.NoError(t, err) 404 405 dest, err := mem.Open("000001.sst") 406 require.NoError(t, err) 407 408 // We should be able to write bytes to src, and not have them show up in 409 // dest. 410 _, _ = src.Write([]byte("test")) 411 data, err := io.ReadAll(dest) 412 require.NoError(t, err) 413 if len(data) != 0 { 414 t.Fatalf("expected copy, but files appear to be hard linked: [%s] unexpectedly found", data) 415 } 416 } 417 418 func TestOverlappingIngestedSSTs(t *testing.T) { 419 dir := "" 420 var ( 421 mem vfs.FS 422 d *DB 423 opts *Options 424 closed = false 425 blockFlush = false 426 ) 427 defer func() { 428 if !closed { 429 require.NoError(t, d.Close()) 430 } 431 }() 432 433 reset := func(strictMem bool) { 434 if d != nil && !closed { 435 require.NoError(t, d.Close()) 436 } 437 blockFlush = false 438 439 if strictMem { 440 mem = vfs.NewStrictMem() 441 } else { 442 mem = vfs.NewMem() 443 } 444 445 require.NoError(t, mem.MkdirAll("ext", 0755)) 446 opts = (&Options{ 447 FS: mem, 448 MemTableStopWritesThreshold: 4, 449 L0CompactionThreshold: 100, 450 L0StopWritesThreshold: 100, 451 DebugCheck: DebugCheckLevels, 452 FormatMajorVersion: internalFormatNewest, 453 }).WithFSDefaults() 454 // Disable automatic compactions because otherwise we'll race with 455 // delete-only compactions triggered by ingesting range tombstones. 456 opts.DisableAutomaticCompactions = true 457 458 var err error 459 d, err = Open(dir, opts) 460 require.NoError(t, err) 461 d.TestOnlyWaitForCleaning() 462 } 463 waitForFlush := func() { 464 if d == nil { 465 return 466 } 467 d.mu.Lock() 468 for d.mu.compact.flushing { 469 d.mu.compact.cond.Wait() 470 } 471 d.mu.Unlock() 472 } 473 reset(false) 474 475 datadriven.RunTest(t, "testdata/flushable_ingest", func(t *testing.T, td *datadriven.TestData) string { 476 switch td.Cmd { 477 case "reset": 478 reset(td.HasArg("strictMem")) 479 return "" 480 481 case "ignoreSyncs": 482 var ignoreSyncs bool 483 if len(td.CmdArgs) == 1 && td.CmdArgs[0].String() == "true" { 484 ignoreSyncs = true 485 } 486 mem.(*vfs.MemFS).SetIgnoreSyncs(ignoreSyncs) 487 return "" 488 489 case "resetToSynced": 490 mem.(*vfs.MemFS).ResetToSyncedState() 491 files, err := mem.List(dir) 492 sort.Strings(files) 493 require.NoError(t, err) 494 return strings.Join(files, "\n") 495 496 case "batch": 497 b := d.NewIndexedBatch() 498 if err := runBatchDefineCmd(td, b); err != nil { 499 return err.Error() 500 } 501 if err := b.Commit(nil); err != nil { 502 return err.Error() 503 } 504 return "" 505 506 case "build": 507 if err := runBuildCmd(td, d, mem); err != nil { 508 return err.Error() 509 } 510 return "" 511 512 case "ingest": 513 if err := runIngestCmd(td, d, mem); err != nil { 514 return err.Error() 515 } 516 if !blockFlush { 517 waitForFlush() 518 } 519 return "" 520 521 case "iter": 522 iter, _ := d.NewIter(nil) 523 return runIterCmd(td, iter, true) 524 525 case "lsm": 526 return runLSMCmd(td, d) 527 528 case "close": 529 if closed { 530 return "already closed" 531 } 532 require.NoError(t, d.Close()) 533 closed = true 534 return "" 535 536 case "ls": 537 files, err := mem.List(dir) 538 sort.Strings(files) 539 require.NoError(t, err) 540 return strings.Join(files, "\n") 541 542 case "open": 543 opts.ReadOnly = td.HasArg("readOnly") 544 var err error 545 d, err = Open(dir, opts) 546 closed = false 547 require.NoError(t, err) 548 waitForFlush() 549 d.TestOnlyWaitForCleaning() 550 return "" 551 552 case "blockFlush": 553 blockFlush = true 554 d.mu.Lock() 555 d.mu.compact.flushing = true 556 d.mu.Unlock() 557 return "" 558 559 case "allowFlush": 560 blockFlush = false 561 d.mu.Lock() 562 d.mu.compact.flushing = false 563 d.mu.Unlock() 564 return "" 565 566 case "flush": 567 d.maybeScheduleFlush() 568 waitForFlush() 569 d.TestOnlyWaitForCleaning() 570 return "" 571 572 case "get": 573 return runGetCmd(t, td, d) 574 575 default: 576 return fmt.Sprintf("unknown command: %s", td.Cmd) 577 } 578 }) 579 } 580 581 func TestExcise(t *testing.T) { 582 var mem vfs.FS 583 var d *DB 584 var flushed bool 585 defer func() { 586 require.NoError(t, d.Close()) 587 }() 588 589 var opts *Options 590 reset := func() { 591 if d != nil { 592 require.NoError(t, d.Close()) 593 } 594 595 mem = vfs.NewMem() 596 require.NoError(t, mem.MkdirAll("ext", 0755)) 597 opts = &Options{ 598 FS: mem, 599 L0CompactionThreshold: 100, 600 L0StopWritesThreshold: 100, 601 DebugCheck: DebugCheckLevels, 602 EventListener: &EventListener{FlushEnd: func(info FlushInfo) { 603 flushed = true 604 }}, 605 FormatMajorVersion: FormatVirtualSSTables, 606 Comparer: testkeys.Comparer, 607 } 608 // Disable automatic compactions because otherwise we'll race with 609 // delete-only compactions triggered by ingesting range tombstones. 610 opts.DisableAutomaticCompactions = true 611 // Set this to true to add some testing for the virtual sstable validation 612 // code paths. 613 opts.Experimental.ValidateOnIngest = true 614 615 var err error 616 d, err = Open("", opts) 617 require.NoError(t, err) 618 } 619 reset() 620 621 datadriven.RunTest(t, "testdata/excise", func(t *testing.T, td *datadriven.TestData) string { 622 switch td.Cmd { 623 case "reset": 624 reset() 625 return "" 626 case "reopen": 627 require.NoError(t, d.Close()) 628 var err error 629 d, err = Open("", opts) 630 require.NoError(t, err) 631 632 return "" 633 case "batch": 634 b := d.NewIndexedBatch() 635 if err := runBatchDefineCmd(td, b); err != nil { 636 return err.Error() 637 } 638 if err := b.Commit(nil); err != nil { 639 return err.Error() 640 } 641 return "" 642 case "build": 643 if err := runBuildCmd(td, d, mem); err != nil { 644 return err.Error() 645 } 646 return "" 647 648 case "flush": 649 if err := d.Flush(); err != nil { 650 return err.Error() 651 } 652 return "" 653 654 case "ingest": 655 flushed = false 656 if err := runIngestCmd(td, d, mem); err != nil { 657 return err.Error() 658 } 659 // Wait for a possible flush. 660 d.mu.Lock() 661 for d.mu.compact.flushing { 662 d.mu.compact.cond.Wait() 663 } 664 d.mu.Unlock() 665 if flushed { 666 return "memtable flushed" 667 } 668 return "" 669 670 case "ingest-and-excise": 671 flushed = false 672 if err := runIngestAndExciseCmd(td, d, mem); err != nil { 673 return err.Error() 674 } 675 // Wait for a possible flush. 676 d.mu.Lock() 677 for d.mu.compact.flushing { 678 d.mu.compact.cond.Wait() 679 } 680 d.mu.Unlock() 681 if flushed { 682 return "memtable flushed" 683 } 684 return "" 685 686 case "get": 687 return runGetCmd(t, td, d) 688 689 case "iter": 690 iter, _ := d.NewIter(&IterOptions{ 691 KeyTypes: IterKeyTypePointsAndRanges, 692 }) 693 return runIterCmd(td, iter, true) 694 695 case "lsm": 696 return runLSMCmd(td, d) 697 698 case "metrics": 699 // The asynchronous loading of table stats can change metrics, so 700 // wait for all the tables' stats to be loaded. 701 d.mu.Lock() 702 d.waitTableStats() 703 d.mu.Unlock() 704 705 return d.Metrics().StringForTests() 706 707 case "wait-pending-table-stats": 708 return runTableStatsCmd(td, d) 709 710 case "excise": 711 ve := &versionEdit{ 712 DeletedFiles: map[deletedFileEntry]*fileMetadata{}, 713 } 714 var exciseSpan KeyRange 715 if len(td.CmdArgs) != 2 { 716 panic("insufficient args for compact command") 717 } 718 exciseSpan.Start = []byte(td.CmdArgs[0].Key) 719 exciseSpan.End = []byte(td.CmdArgs[1].Key) 720 721 d.mu.Lock() 722 d.mu.versions.logLock() 723 d.mu.Unlock() 724 current := d.mu.versions.currentVersion() 725 for level := range current.Levels { 726 iter := current.Levels[level].Iter() 727 for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { 728 _, err := d.excise(exciseSpan, m, ve, level) 729 if err != nil { 730 d.mu.Lock() 731 d.mu.versions.logUnlock() 732 d.mu.Unlock() 733 return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) 734 } 735 } 736 } 737 d.mu.Lock() 738 d.mu.versions.logUnlock() 739 d.mu.Unlock() 740 return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.DebugString(base.DefaultFormatter)) 741 742 case "confirm-backing": 743 // Confirms that the files have the same FileBacking. 744 fileNums := make(map[base.FileNum]struct{}) 745 for i := range td.CmdArgs { 746 fNum, err := strconv.Atoi(td.CmdArgs[i].Key) 747 if err != nil { 748 panic("invalid file number") 749 } 750 fileNums[base.FileNum(fNum)] = struct{}{} 751 } 752 d.mu.Lock() 753 currVersion := d.mu.versions.currentVersion() 754 var ptr *manifest.FileBacking 755 for _, level := range currVersion.Levels { 756 lIter := level.Iter() 757 for f := lIter.First(); f != nil; f = lIter.Next() { 758 if _, ok := fileNums[f.FileNum]; ok { 759 if ptr == nil { 760 ptr = f.FileBacking 761 continue 762 } 763 if f.FileBacking != ptr { 764 d.mu.Unlock() 765 return "file backings are not the same" 766 } 767 } 768 } 769 } 770 d.mu.Unlock() 771 return "file backings are the same" 772 case "compact": 773 if len(td.CmdArgs) != 2 { 774 panic("insufficient args for compact command") 775 } 776 l := td.CmdArgs[0].Key 777 r := td.CmdArgs[1].Key 778 err := d.Compact([]byte(l), []byte(r), false) 779 if err != nil { 780 return err.Error() 781 } 782 return "" 783 default: 784 return fmt.Sprintf("unknown command: %s", td.Cmd) 785 } 786 }) 787 } 788 789 func testIngestSharedImpl( 790 t *testing.T, createOnShared remote.CreateOnSharedStrategy, fileName string, 791 ) { 792 var d, d1, d2 *DB 793 var efos map[string]*EventuallyFileOnlySnapshot 794 defer func() { 795 for _, e := range efos { 796 require.NoError(t, e.Close()) 797 } 798 if d1 != nil { 799 require.NoError(t, d1.Close()) 800 } 801 if d2 != nil { 802 require.NoError(t, d2.Close()) 803 } 804 }() 805 creatorIDCounter := uint64(1) 806 replicateCounter := 1 807 808 reset := func() { 809 for _, e := range efos { 810 require.NoError(t, e.Close()) 811 } 812 if d1 != nil { 813 require.NoError(t, d1.Close()) 814 } 815 if d2 != nil { 816 require.NoError(t, d2.Close()) 817 } 818 efos = make(map[string]*EventuallyFileOnlySnapshot) 819 820 sstorage := remote.NewInMem() 821 mem1 := vfs.NewMem() 822 mem2 := vfs.NewMem() 823 require.NoError(t, mem1.MkdirAll("ext", 0755)) 824 require.NoError(t, mem2.MkdirAll("ext", 0755)) 825 opts1 := &Options{ 826 Comparer: testkeys.Comparer, 827 FS: mem1, 828 LBaseMaxBytes: 1, 829 L0CompactionThreshold: 100, 830 L0StopWritesThreshold: 100, 831 DebugCheck: DebugCheckLevels, 832 FormatMajorVersion: FormatVirtualSSTables, 833 } 834 // lel. 835 lel := MakeLoggingEventListener(DefaultLogger) 836 opts1.EventListener = &lel 837 opts1.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 838 "": sstorage, 839 }) 840 opts1.Experimental.CreateOnShared = createOnShared 841 opts1.Experimental.CreateOnSharedLocator = "" 842 // Disable automatic compactions because otherwise we'll race with 843 // delete-only compactions triggered by ingesting range tombstones. 844 opts1.DisableAutomaticCompactions = true 845 846 opts2 := &Options{} 847 *opts2 = *opts1 848 opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 849 "": sstorage, 850 }) 851 opts2.Experimental.CreateOnShared = createOnShared 852 opts2.Experimental.CreateOnSharedLocator = "" 853 opts2.FS = mem2 854 855 var err error 856 d1, err = Open("", opts1) 857 require.NoError(t, err) 858 require.NoError(t, d1.SetCreatorID(creatorIDCounter)) 859 creatorIDCounter++ 860 d2, err = Open("", opts2) 861 require.NoError(t, err) 862 require.NoError(t, d2.SetCreatorID(creatorIDCounter)) 863 creatorIDCounter++ 864 d = d1 865 } 866 reset() 867 868 datadriven.RunTest(t, fmt.Sprintf("testdata/%s", fileName), func(t *testing.T, td *datadriven.TestData) string { 869 switch td.Cmd { 870 case "reset": 871 reset() 872 return "" 873 case "switch": 874 if len(td.CmdArgs) != 1 { 875 return "usage: switch <1 or 2>" 876 } 877 switch td.CmdArgs[0].Key { 878 case "1": 879 d = d1 880 case "2": 881 d = d2 882 default: 883 return "usage: switch <1 or 2>" 884 } 885 return "ok" 886 case "batch": 887 b := d.NewIndexedBatch() 888 if err := runBatchDefineCmd(td, b); err != nil { 889 return err.Error() 890 } 891 if err := b.Commit(nil); err != nil { 892 return err.Error() 893 } 894 return "" 895 case "build": 896 if err := runBuildCmd(td, d, d.opts.FS); err != nil { 897 return err.Error() 898 } 899 return "" 900 901 case "flush": 902 if err := d.Flush(); err != nil { 903 return err.Error() 904 } 905 return "" 906 907 case "ingest": 908 if err := runIngestCmd(td, d, d.opts.FS); err != nil { 909 return err.Error() 910 } 911 // Wait for a possible flush. 912 d.mu.Lock() 913 for d.mu.compact.flushing { 914 d.mu.compact.cond.Wait() 915 } 916 d.mu.Unlock() 917 return "" 918 919 case "ingest-and-excise": 920 if err := runIngestAndExciseCmd(td, d, d.opts.FS); err != nil { 921 return err.Error() 922 } 923 // Wait for a possible flush. 924 d.mu.Lock() 925 for d.mu.compact.flushing { 926 d.mu.compact.cond.Wait() 927 } 928 d.mu.Unlock() 929 return "" 930 931 case "replicate": 932 if len(td.CmdArgs) != 4 { 933 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 934 } 935 var from, to *DB 936 switch td.CmdArgs[0].Key { 937 case "1": 938 from = d1 939 case "2": 940 from = d2 941 default: 942 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 943 } 944 switch td.CmdArgs[1].Key { 945 case "1": 946 to = d1 947 case "2": 948 to = d2 949 default: 950 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 951 } 952 startKey := []byte(td.CmdArgs[2].Key) 953 endKey := []byte(td.CmdArgs[3].Key) 954 955 writeOpts := d.opts.MakeWriterOptions(0 /* level */, to.opts.FormatMajorVersion.MaxTableFormat()) 956 sstPath := fmt.Sprintf("ext/replicate%d.sst", replicateCounter) 957 f, err := to.opts.FS.Create(sstPath) 958 require.NoError(t, err) 959 replicateCounter++ 960 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) 961 962 var sharedSSTs []SharedSSTMeta 963 err = from.ScanInternal(context.TODO(), startKey, endKey, 964 func(key *InternalKey, value LazyValue, _ IteratorLevel) error { 965 val, _, err := value.Value(nil) 966 require.NoError(t, err) 967 require.NoError(t, w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)) 968 return nil 969 }, 970 func(start, end []byte, seqNum uint64) error { 971 require.NoError(t, w.DeleteRange(start, end)) 972 return nil 973 }, 974 func(start, end []byte, keys []keyspan.Key) error { 975 s := keyspan.Span{ 976 Start: start, 977 End: end, 978 Keys: keys, 979 KeysOrder: 0, 980 } 981 require.NoError(t, rangekey.Encode(&s, func(k base.InternalKey, v []byte) error { 982 return w.AddRangeKey(base.MakeInternalKey(k.UserKey, 0, k.Kind()), v) 983 })) 984 return nil 985 }, 986 func(sst *SharedSSTMeta) error { 987 sharedSSTs = append(sharedSSTs, *sst) 988 return nil 989 }, 990 ) 991 require.NoError(t, err) 992 require.NoError(t, w.Close()) 993 994 _, err = to.IngestAndExcise([]string{sstPath}, sharedSSTs, KeyRange{Start: startKey, End: endKey}) 995 require.NoError(t, err) 996 return fmt.Sprintf("replicated %d shared SSTs", len(sharedSSTs)) 997 998 case "get": 999 return runGetCmd(t, td, d) 1000 1001 case "iter": 1002 o := &IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 1003 var reader Reader 1004 reader = d 1005 for _, arg := range td.CmdArgs { 1006 switch arg.Key { 1007 case "mask-suffix": 1008 o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) 1009 case "mask-filter": 1010 o.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { 1011 return sstable.NewTestKeysMaskingFilter() 1012 } 1013 case "snapshot": 1014 reader = efos[arg.Vals[0]] 1015 } 1016 } 1017 iter, err := reader.NewIter(o) 1018 if err != nil { 1019 return err.Error() 1020 } 1021 return runIterCmd(td, iter, true) 1022 1023 case "lsm": 1024 return runLSMCmd(td, d) 1025 1026 case "metrics": 1027 // The asynchronous loading of table stats can change metrics, so 1028 // wait for all the tables' stats to be loaded. 1029 d.mu.Lock() 1030 d.waitTableStats() 1031 d.mu.Unlock() 1032 1033 return d.Metrics().StringForTests() 1034 1035 case "wait-pending-table-stats": 1036 return runTableStatsCmd(td, d) 1037 1038 case "excise": 1039 ve := &versionEdit{ 1040 DeletedFiles: map[deletedFileEntry]*fileMetadata{}, 1041 } 1042 var exciseSpan KeyRange 1043 if len(td.CmdArgs) != 2 { 1044 panic("insufficient args for excise command") 1045 } 1046 exciseSpan.Start = []byte(td.CmdArgs[0].Key) 1047 exciseSpan.End = []byte(td.CmdArgs[1].Key) 1048 1049 d.mu.Lock() 1050 d.mu.versions.logLock() 1051 d.mu.Unlock() 1052 current := d.mu.versions.currentVersion() 1053 for level := range current.Levels { 1054 iter := current.Levels[level].Iter() 1055 for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { 1056 _, err := d.excise(exciseSpan, m, ve, level) 1057 if err != nil { 1058 d.mu.Lock() 1059 d.mu.versions.logUnlock() 1060 d.mu.Unlock() 1061 return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) 1062 } 1063 } 1064 } 1065 d.mu.Lock() 1066 d.mu.versions.logUnlock() 1067 d.mu.Unlock() 1068 return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.String()) 1069 1070 case "file-only-snapshot": 1071 if len(td.CmdArgs) != 1 { 1072 panic("insufficient args for file-only-snapshot command") 1073 } 1074 name := td.CmdArgs[0].Key 1075 var keyRanges []KeyRange 1076 for _, line := range strings.Split(td.Input, "\n") { 1077 fields := strings.Fields(line) 1078 if len(fields) != 2 { 1079 return "expected two fields for file-only snapshot KeyRanges" 1080 } 1081 kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} 1082 keyRanges = append(keyRanges, kr) 1083 } 1084 1085 s := d.NewEventuallyFileOnlySnapshot(keyRanges) 1086 efos[name] = s 1087 return "ok" 1088 1089 case "wait-for-file-only-snapshot": 1090 if len(td.CmdArgs) != 1 { 1091 panic("insufficient args for file-only-snapshot command") 1092 } 1093 name := td.CmdArgs[0].Key 1094 err := efos[name].WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond) 1095 if err != nil { 1096 return err.Error() 1097 } 1098 return "ok" 1099 1100 case "compact": 1101 err := runCompactCmd(td, d) 1102 if err != nil { 1103 return err.Error() 1104 } 1105 return "ok" 1106 default: 1107 return fmt.Sprintf("unknown command: %s", td.Cmd) 1108 } 1109 }) 1110 } 1111 1112 func TestIngestShared(t *testing.T) { 1113 for _, strategy := range []remote.CreateOnSharedStrategy{remote.CreateOnSharedAll, remote.CreateOnSharedLower} { 1114 strategyStr := "all" 1115 if strategy == remote.CreateOnSharedLower { 1116 strategyStr = "lower" 1117 } 1118 t.Run(fmt.Sprintf("createOnShared=%s", strategyStr), func(t *testing.T) { 1119 fileName := "ingest_shared" 1120 if strategy == remote.CreateOnSharedLower { 1121 fileName = "ingest_shared_lower" 1122 } 1123 testIngestSharedImpl(t, strategy, fileName) 1124 }) 1125 } 1126 } 1127 1128 func TestSimpleIngestShared(t *testing.T) { 1129 mem := vfs.NewMem() 1130 var d *DB 1131 var provider2 objstorage.Provider 1132 opts2 := Options{FS: vfs.NewMem(), FormatMajorVersion: FormatVirtualSSTables} 1133 opts2.EnsureDefaults() 1134 1135 // Create an objProvider where we will fake-create some sstables that can 1136 // then be shared back to the db instance. 1137 providerSettings := objstorageprovider.Settings{ 1138 Logger: opts2.Logger, 1139 FS: opts2.FS, 1140 FSDirName: "", 1141 FSDirInitialListing: nil, 1142 FSCleaner: opts2.Cleaner, 1143 NoSyncOnClose: opts2.NoSyncOnClose, 1144 BytesPerSync: opts2.BytesPerSync, 1145 } 1146 providerSettings.Remote.StorageFactory = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1147 "": remote.NewInMem(), 1148 }) 1149 providerSettings.Remote.CreateOnShared = remote.CreateOnSharedAll 1150 providerSettings.Remote.CreateOnSharedLocator = "" 1151 1152 provider2, err := objstorageprovider.Open(providerSettings) 1153 require.NoError(t, err) 1154 creatorIDCounter := uint64(1) 1155 provider2.SetCreatorID(objstorage.CreatorID(creatorIDCounter)) 1156 creatorIDCounter++ 1157 1158 defer func() { 1159 require.NoError(t, d.Close()) 1160 }() 1161 1162 reset := func() { 1163 if d != nil { 1164 require.NoError(t, d.Close()) 1165 } 1166 1167 mem = vfs.NewMem() 1168 require.NoError(t, mem.MkdirAll("ext", 0755)) 1169 opts := &Options{ 1170 FormatMajorVersion: FormatVirtualSSTables, 1171 FS: mem, 1172 L0CompactionThreshold: 100, 1173 L0StopWritesThreshold: 100, 1174 } 1175 opts.Experimental.RemoteStorage = providerSettings.Remote.StorageFactory 1176 opts.Experimental.CreateOnShared = providerSettings.Remote.CreateOnShared 1177 opts.Experimental.CreateOnSharedLocator = providerSettings.Remote.CreateOnSharedLocator 1178 1179 var err error 1180 d, err = Open("", opts) 1181 require.NoError(t, err) 1182 require.NoError(t, d.SetCreatorID(creatorIDCounter)) 1183 creatorIDCounter++ 1184 } 1185 reset() 1186 1187 metaMap := map[base.DiskFileNum]objstorage.ObjectMetadata{} 1188 1189 require.NoError(t, d.Set([]byte("d"), []byte("unexpected"), nil)) 1190 require.NoError(t, d.Set([]byte("e"), []byte("unexpected"), nil)) 1191 require.NoError(t, d.Set([]byte("a"), []byte("unexpected"), nil)) 1192 require.NoError(t, d.Set([]byte("f"), []byte("unexpected"), nil)) 1193 d.Flush() 1194 1195 { 1196 // Create a shared file. 1197 fn := base.FileNum(2) 1198 f, meta, err := provider2.Create(context.TODO(), fileTypeTable, fn.DiskFileNum(), objstorage.CreateOptions{PreferSharedStorage: true}) 1199 require.NoError(t, err) 1200 w := sstable.NewWriter(f, d.opts.MakeWriterOptions(0, d.opts.FormatMajorVersion.MaxTableFormat())) 1201 w.Set([]byte("d"), []byte("shared")) 1202 w.Set([]byte("e"), []byte("shared")) 1203 w.Close() 1204 metaMap[fn.DiskFileNum()] = meta 1205 } 1206 1207 m := metaMap[base.FileNum(2).DiskFileNum()] 1208 handle, err := provider2.RemoteObjectBacking(&m) 1209 require.NoError(t, err) 1210 size, err := provider2.Size(m) 1211 require.NoError(t, err) 1212 1213 sharedSSTMeta := SharedSSTMeta{ 1214 Backing: handle, 1215 Smallest: base.MakeInternalKey([]byte("d"), 0, InternalKeyKindSet), 1216 Largest: base.MakeInternalKey([]byte("e"), 0, InternalKeyKindSet), 1217 SmallestPointKey: base.MakeInternalKey([]byte("d"), 0, InternalKeyKindSet), 1218 LargestPointKey: base.MakeInternalKey([]byte("e"), 0, InternalKeyKindSet), 1219 Level: 6, 1220 Size: uint64(size + 5), 1221 } 1222 _, err = d.IngestAndExcise([]string{}, []SharedSSTMeta{sharedSSTMeta}, KeyRange{Start: []byte("d"), End: []byte("ee")}) 1223 require.NoError(t, err) 1224 1225 // TODO(bilal): Once reading of shared sstables is in, verify that the values 1226 // of d and e have been updated. 1227 } 1228 1229 type blockedCompaction struct { 1230 startBlock, unblock chan struct{} 1231 } 1232 1233 func TestConcurrentExcise(t *testing.T) { 1234 var d, d1, d2 *DB 1235 var efos map[string]*EventuallyFileOnlySnapshot 1236 backgroundErrs := make(chan error, 5) 1237 var compactions map[string]*blockedCompaction 1238 defer func() { 1239 for _, e := range efos { 1240 require.NoError(t, e.Close()) 1241 } 1242 if d1 != nil { 1243 require.NoError(t, d1.Close()) 1244 } 1245 if d2 != nil { 1246 require.NoError(t, d2.Close()) 1247 } 1248 }() 1249 creatorIDCounter := uint64(1) 1250 replicateCounter := 1 1251 1252 var wg sync.WaitGroup 1253 defer wg.Wait() 1254 var blockNextCompaction bool 1255 var blockedJobID int 1256 var blockedCompactionName string 1257 var blockedCompactionsMu sync.Mutex // protects the above three variables. 1258 1259 reset := func() { 1260 wg.Wait() 1261 for _, e := range efos { 1262 require.NoError(t, e.Close()) 1263 } 1264 if d1 != nil { 1265 require.NoError(t, d1.Close()) 1266 } 1267 if d2 != nil { 1268 require.NoError(t, d2.Close()) 1269 } 1270 efos = make(map[string]*EventuallyFileOnlySnapshot) 1271 compactions = make(map[string]*blockedCompaction) 1272 backgroundErrs = make(chan error, 5) 1273 1274 var el EventListener 1275 el.EnsureDefaults(testLogger{t: t}) 1276 el.FlushBegin = func(info FlushInfo) { 1277 // Don't block flushes 1278 } 1279 el.BackgroundError = func(err error) { 1280 backgroundErrs <- err 1281 } 1282 el.CompactionBegin = func(info CompactionInfo) { 1283 if info.Reason == "move" { 1284 return 1285 } 1286 blockedCompactionsMu.Lock() 1287 defer blockedCompactionsMu.Unlock() 1288 if blockNextCompaction { 1289 blockNextCompaction = false 1290 blockedJobID = info.JobID 1291 } 1292 } 1293 el.TableCreated = func(info TableCreateInfo) { 1294 blockedCompactionsMu.Lock() 1295 if info.JobID != blockedJobID { 1296 blockedCompactionsMu.Unlock() 1297 return 1298 } 1299 blockedJobID = 0 1300 c := compactions[blockedCompactionName] 1301 blockedCompactionName = "" 1302 blockedCompactionsMu.Unlock() 1303 c.startBlock <- struct{}{} 1304 <-c.unblock 1305 } 1306 1307 sstorage := remote.NewInMem() 1308 mem1 := vfs.NewMem() 1309 mem2 := vfs.NewMem() 1310 require.NoError(t, mem1.MkdirAll("ext", 0755)) 1311 require.NoError(t, mem2.MkdirAll("ext", 0755)) 1312 opts1 := &Options{ 1313 Comparer: testkeys.Comparer, 1314 LBaseMaxBytes: 1, 1315 FS: mem1, 1316 L0CompactionThreshold: 100, 1317 L0StopWritesThreshold: 100, 1318 DebugCheck: DebugCheckLevels, 1319 FormatMajorVersion: FormatVirtualSSTables, 1320 } 1321 // lel. 1322 lel := MakeLoggingEventListener(DefaultLogger) 1323 tel := TeeEventListener(lel, el) 1324 opts1.EventListener = &tel 1325 opts1.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1326 "": sstorage, 1327 }) 1328 opts1.Experimental.CreateOnShared = remote.CreateOnSharedAll 1329 opts1.Experimental.CreateOnSharedLocator = "" 1330 // Disable automatic compactions because otherwise we'll race with 1331 // delete-only compactions triggered by ingesting range tombstones. 1332 opts1.DisableAutomaticCompactions = true 1333 1334 opts2 := &Options{} 1335 *opts2 = *opts1 1336 opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1337 "": sstorage, 1338 }) 1339 opts2.Experimental.CreateOnShared = remote.CreateOnSharedAll 1340 opts2.Experimental.CreateOnSharedLocator = "" 1341 opts2.FS = mem2 1342 1343 var err error 1344 d1, err = Open("", opts1) 1345 require.NoError(t, err) 1346 require.NoError(t, d1.SetCreatorID(creatorIDCounter)) 1347 creatorIDCounter++ 1348 d2, err = Open("", opts2) 1349 require.NoError(t, err) 1350 require.NoError(t, d2.SetCreatorID(creatorIDCounter)) 1351 creatorIDCounter++ 1352 d = d1 1353 } 1354 reset() 1355 1356 datadriven.RunTest(t, "testdata/concurrent_excise", func(t *testing.T, td *datadriven.TestData) string { 1357 switch td.Cmd { 1358 case "reset": 1359 reset() 1360 return "" 1361 case "switch": 1362 if len(td.CmdArgs) != 1 { 1363 return "usage: switch <1 or 2>" 1364 } 1365 switch td.CmdArgs[0].Key { 1366 case "1": 1367 d = d1 1368 case "2": 1369 d = d2 1370 default: 1371 return "usage: switch <1 or 2>" 1372 } 1373 return "ok" 1374 case "batch": 1375 b := d.NewIndexedBatch() 1376 if err := runBatchDefineCmd(td, b); err != nil { 1377 return err.Error() 1378 } 1379 if err := b.Commit(nil); err != nil { 1380 return err.Error() 1381 } 1382 return "" 1383 case "build": 1384 if err := runBuildCmd(td, d, d.opts.FS); err != nil { 1385 return err.Error() 1386 } 1387 return "" 1388 1389 case "flush": 1390 if err := d.Flush(); err != nil { 1391 return err.Error() 1392 } 1393 return "" 1394 1395 case "ingest": 1396 if err := runIngestCmd(td, d, d.opts.FS); err != nil { 1397 return err.Error() 1398 } 1399 // Wait for a possible flush. 1400 d.mu.Lock() 1401 for d.mu.compact.flushing { 1402 d.mu.compact.cond.Wait() 1403 } 1404 d.mu.Unlock() 1405 return "" 1406 1407 case "ingest-and-excise": 1408 if err := runIngestAndExciseCmd(td, d, d.opts.FS); err != nil { 1409 return err.Error() 1410 } 1411 // Wait for a possible flush. 1412 d.mu.Lock() 1413 for d.mu.compact.flushing { 1414 d.mu.compact.cond.Wait() 1415 } 1416 d.mu.Unlock() 1417 return "" 1418 1419 case "replicate": 1420 if len(td.CmdArgs) != 4 { 1421 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 1422 } 1423 var from, to *DB 1424 switch td.CmdArgs[0].Key { 1425 case "1": 1426 from = d1 1427 case "2": 1428 from = d2 1429 default: 1430 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 1431 } 1432 switch td.CmdArgs[1].Key { 1433 case "1": 1434 to = d1 1435 case "2": 1436 to = d2 1437 default: 1438 return "usage: replicate <from-db-num> <to-db-num> <start-key> <end-key>" 1439 } 1440 startKey := []byte(td.CmdArgs[2].Key) 1441 endKey := []byte(td.CmdArgs[3].Key) 1442 1443 writeOpts := d.opts.MakeWriterOptions(0 /* level */, to.opts.FormatMajorVersion.MaxTableFormat()) 1444 sstPath := fmt.Sprintf("ext/replicate%d.sst", replicateCounter) 1445 f, err := to.opts.FS.Create(sstPath) 1446 require.NoError(t, err) 1447 replicateCounter++ 1448 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) 1449 1450 var sharedSSTs []SharedSSTMeta 1451 err = from.ScanInternal(context.TODO(), startKey, endKey, 1452 func(key *InternalKey, value LazyValue, _ IteratorLevel) error { 1453 val, _, err := value.Value(nil) 1454 require.NoError(t, err) 1455 require.NoError(t, w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)) 1456 return nil 1457 }, 1458 func(start, end []byte, seqNum uint64) error { 1459 require.NoError(t, w.DeleteRange(start, end)) 1460 return nil 1461 }, 1462 func(start, end []byte, keys []keyspan.Key) error { 1463 s := keyspan.Span{ 1464 Start: start, 1465 End: end, 1466 Keys: keys, 1467 KeysOrder: 0, 1468 } 1469 require.NoError(t, rangekey.Encode(&s, func(k base.InternalKey, v []byte) error { 1470 return w.AddRangeKey(base.MakeInternalKey(k.UserKey, 0, k.Kind()), v) 1471 })) 1472 return nil 1473 }, 1474 func(sst *SharedSSTMeta) error { 1475 sharedSSTs = append(sharedSSTs, *sst) 1476 return nil 1477 }, 1478 ) 1479 require.NoError(t, err) 1480 require.NoError(t, w.Close()) 1481 1482 _, err = to.IngestAndExcise([]string{sstPath}, sharedSSTs, KeyRange{Start: startKey, End: endKey}) 1483 require.NoError(t, err) 1484 return fmt.Sprintf("replicated %d shared SSTs", len(sharedSSTs)) 1485 1486 case "get": 1487 return runGetCmd(t, td, d) 1488 1489 case "iter": 1490 o := &IterOptions{KeyTypes: IterKeyTypePointsAndRanges} 1491 var reader Reader 1492 reader = d 1493 for _, arg := range td.CmdArgs { 1494 switch arg.Key { 1495 case "mask-suffix": 1496 o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) 1497 case "mask-filter": 1498 o.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { 1499 return sstable.NewTestKeysMaskingFilter() 1500 } 1501 case "snapshot": 1502 reader = efos[arg.Vals[0]] 1503 } 1504 } 1505 iter, err := reader.NewIter(o) 1506 if err != nil { 1507 return err.Error() 1508 } 1509 return runIterCmd(td, iter, true) 1510 1511 case "lsm": 1512 return runLSMCmd(td, d) 1513 1514 case "metrics": 1515 // The asynchronous loading of table stats can change metrics, so 1516 // wait for all the tables' stats to be loaded. 1517 d.mu.Lock() 1518 d.waitTableStats() 1519 d.mu.Unlock() 1520 1521 return d.Metrics().StringForTests() 1522 1523 case "wait-pending-table-stats": 1524 return runTableStatsCmd(td, d) 1525 1526 case "excise": 1527 ve := &versionEdit{ 1528 DeletedFiles: map[deletedFileEntry]*fileMetadata{}, 1529 } 1530 var exciseSpan KeyRange 1531 if len(td.CmdArgs) != 2 { 1532 panic("insufficient args for excise command") 1533 } 1534 exciseSpan.Start = []byte(td.CmdArgs[0].Key) 1535 exciseSpan.End = []byte(td.CmdArgs[1].Key) 1536 1537 d.mu.Lock() 1538 d.mu.versions.logLock() 1539 d.mu.Unlock() 1540 current := d.mu.versions.currentVersion() 1541 for level := range current.Levels { 1542 iter := current.Levels[level].Iter() 1543 for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { 1544 _, err := d.excise(exciseSpan, m, ve, level) 1545 if err != nil { 1546 d.mu.Lock() 1547 d.mu.versions.logUnlock() 1548 d.mu.Unlock() 1549 return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) 1550 } 1551 } 1552 } 1553 d.mu.Lock() 1554 d.mu.versions.logUnlock() 1555 d.mu.Unlock() 1556 return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.String()) 1557 1558 case "file-only-snapshot": 1559 if len(td.CmdArgs) != 1 { 1560 panic("insufficient args for file-only-snapshot command") 1561 } 1562 name := td.CmdArgs[0].Key 1563 var keyRanges []KeyRange 1564 for _, line := range strings.Split(td.Input, "\n") { 1565 fields := strings.Fields(line) 1566 if len(fields) != 2 { 1567 return "expected two fields for file-only snapshot KeyRanges" 1568 } 1569 kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} 1570 keyRanges = append(keyRanges, kr) 1571 } 1572 1573 s := d.NewEventuallyFileOnlySnapshot(keyRanges) 1574 efos[name] = s 1575 return "ok" 1576 1577 case "wait-for-file-only-snapshot": 1578 if len(td.CmdArgs) != 1 { 1579 panic("insufficient args for file-only-snapshot command") 1580 } 1581 name := td.CmdArgs[0].Key 1582 err := efos[name].WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond) 1583 if err != nil { 1584 return err.Error() 1585 } 1586 return "ok" 1587 1588 case "unblock": 1589 name := td.CmdArgs[0].Key 1590 blockedCompactionsMu.Lock() 1591 c := compactions[name] 1592 delete(compactions, name) 1593 blockedCompactionsMu.Unlock() 1594 c.unblock <- struct{}{} 1595 return "ok" 1596 1597 case "compact": 1598 async := false 1599 var otherArgs []datadriven.CmdArg 1600 var bc *blockedCompaction 1601 for i := range td.CmdArgs { 1602 switch td.CmdArgs[i].Key { 1603 case "block": 1604 name := td.CmdArgs[i].Vals[0] 1605 bc = &blockedCompaction{startBlock: make(chan struct{}), unblock: make(chan struct{})} 1606 blockedCompactionsMu.Lock() 1607 compactions[name] = bc 1608 blockNextCompaction = true 1609 blockedCompactionName = name 1610 blockedCompactionsMu.Unlock() 1611 async = true 1612 default: 1613 otherArgs = append(otherArgs, td.CmdArgs[i]) 1614 } 1615 } 1616 var tdClone datadriven.TestData 1617 tdClone = *td 1618 tdClone.CmdArgs = otherArgs 1619 if !async { 1620 err := runCompactCmd(td, d) 1621 if err != nil { 1622 return err.Error() 1623 } 1624 } else { 1625 wg.Add(1) 1626 go func() { 1627 defer wg.Done() 1628 _ = runCompactCmd(&tdClone, d) 1629 }() 1630 <-bc.startBlock 1631 return "spun off in separate goroutine" 1632 } 1633 return "ok" 1634 case "wait-for-background-error": 1635 err := <-backgroundErrs 1636 return err.Error() 1637 default: 1638 return fmt.Sprintf("unknown command: %s", td.Cmd) 1639 } 1640 }) 1641 } 1642 1643 func TestIngestExternal(t *testing.T) { 1644 var mem vfs.FS 1645 var d *DB 1646 var flushed bool 1647 defer func() { 1648 require.NoError(t, d.Close()) 1649 }() 1650 1651 var remoteStorage remote.Storage 1652 1653 reset := func() { 1654 if d != nil { 1655 require.NoError(t, d.Close()) 1656 } 1657 1658 mem = vfs.NewMem() 1659 require.NoError(t, mem.MkdirAll("ext", 0755)) 1660 remoteStorage = remote.NewInMem() 1661 opts := &Options{ 1662 FS: mem, 1663 L0CompactionThreshold: 100, 1664 L0StopWritesThreshold: 100, 1665 DebugCheck: DebugCheckLevels, 1666 EventListener: &EventListener{FlushEnd: func(info FlushInfo) { 1667 flushed = true 1668 }}, 1669 FormatMajorVersion: FormatVirtualSSTables, 1670 } 1671 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 1672 "external-locator": remoteStorage, 1673 }) 1674 opts.Experimental.CreateOnShared = remote.CreateOnSharedNone 1675 // Disable automatic compactions because otherwise we'll race with 1676 // delete-only compactions triggered by ingesting range tombstones. 1677 opts.DisableAutomaticCompactions = true 1678 1679 var err error 1680 d, err = Open("", opts) 1681 require.NoError(t, err) 1682 require.NoError(t, d.SetCreatorID(1)) 1683 } 1684 reset() 1685 1686 datadriven.RunTest(t, "testdata/ingest_external", func(t *testing.T, td *datadriven.TestData) string { 1687 switch td.Cmd { 1688 case "reset": 1689 reset() 1690 return "" 1691 case "batch": 1692 b := d.NewIndexedBatch() 1693 if err := runBatchDefineCmd(td, b); err != nil { 1694 return err.Error() 1695 } 1696 if err := b.Commit(nil); err != nil { 1697 return err.Error() 1698 } 1699 return "" 1700 case "build-remote": 1701 if err := runBuildRemoteCmd(td, d, remoteStorage); err != nil { 1702 return err.Error() 1703 } 1704 return "" 1705 1706 case "flush": 1707 if err := d.Flush(); err != nil { 1708 return err.Error() 1709 } 1710 return "" 1711 1712 case "ingest-external": 1713 flushed = false 1714 if err := runIngestExternalCmd(td, d, "external-locator"); err != nil { 1715 return err.Error() 1716 } 1717 // Wait for a possible flush. 1718 d.mu.Lock() 1719 for d.mu.compact.flushing { 1720 d.mu.compact.cond.Wait() 1721 } 1722 d.mu.Unlock() 1723 if flushed { 1724 return "memtable flushed" 1725 } 1726 return "" 1727 1728 case "get": 1729 return runGetCmd(t, td, d) 1730 1731 case "iter": 1732 iter, _ := d.NewIter(&IterOptions{ 1733 KeyTypes: IterKeyTypePointsAndRanges, 1734 }) 1735 return runIterCmd(td, iter, true) 1736 1737 case "lsm": 1738 return runLSMCmd(td, d) 1739 1740 case "metrics": 1741 // The asynchronous loading of table stats can change metrics, so 1742 // wait for all the tables' stats to be loaded. 1743 d.mu.Lock() 1744 d.waitTableStats() 1745 d.mu.Unlock() 1746 1747 return d.Metrics().StringForTests() 1748 1749 case "wait-pending-table-stats": 1750 return runTableStatsCmd(td, d) 1751 1752 case "compact": 1753 if len(td.CmdArgs) != 2 { 1754 panic("insufficient args for compact command") 1755 } 1756 l := td.CmdArgs[0].Key 1757 r := td.CmdArgs[1].Key 1758 err := d.Compact([]byte(l), []byte(r), false) 1759 if err != nil { 1760 return err.Error() 1761 } 1762 return "" 1763 default: 1764 return fmt.Sprintf("unknown command: %s", td.Cmd) 1765 } 1766 }) 1767 } 1768 1769 func TestIngestMemtableOverlaps(t *testing.T) { 1770 comparers := []Comparer{ 1771 {Name: "default", Compare: DefaultComparer.Compare, FormatKey: DefaultComparer.FormatKey}, 1772 { 1773 Name: "reverse", 1774 Compare: func(a, b []byte) int { return DefaultComparer.Compare(b, a) }, 1775 FormatKey: DefaultComparer.FormatKey, 1776 }, 1777 } 1778 m := make(map[string]*Comparer) 1779 for i := range comparers { 1780 c := &comparers[i] 1781 m[c.Name] = c 1782 } 1783 1784 for _, comparer := range comparers { 1785 t.Run(comparer.Name, func(t *testing.T) { 1786 var mem *memTable 1787 1788 parseMeta := func(s string) *fileMetadata { 1789 parts := strings.Split(s, "-") 1790 meta := &fileMetadata{} 1791 if len(parts) != 2 { 1792 t.Fatalf("malformed table spec: %s", s) 1793 } 1794 var smallest, largest base.InternalKey 1795 if strings.Contains(parts[0], ".") { 1796 if !strings.Contains(parts[1], ".") { 1797 t.Fatalf("malformed table spec: %s", s) 1798 } 1799 smallest = base.ParseInternalKey(parts[0]) 1800 largest = base.ParseInternalKey(parts[1]) 1801 } else { 1802 smallest = InternalKey{UserKey: []byte(parts[0])} 1803 largest = InternalKey{UserKey: []byte(parts[1])} 1804 } 1805 // If we're using a reverse comparer, flip the file bounds. 1806 if mem.cmp(smallest.UserKey, largest.UserKey) > 0 { 1807 smallest, largest = largest, smallest 1808 } 1809 meta.ExtendPointKeyBounds(comparer.Compare, smallest, largest) 1810 meta.InitPhysicalBacking() 1811 return meta 1812 } 1813 1814 datadriven.RunTest(t, "testdata/ingest_memtable_overlaps", func(t *testing.T, d *datadriven.TestData) string { 1815 switch d.Cmd { 1816 case "define": 1817 b := newBatch(nil) 1818 if err := runBatchDefineCmd(d, b); err != nil { 1819 return err.Error() 1820 } 1821 1822 opts := &Options{ 1823 Comparer: &comparer, 1824 } 1825 opts.EnsureDefaults().WithFSDefaults() 1826 if len(d.CmdArgs) > 1 { 1827 return fmt.Sprintf("%s expects at most 1 argument", d.Cmd) 1828 } 1829 if len(d.CmdArgs) == 1 { 1830 opts.Comparer = m[d.CmdArgs[0].String()] 1831 if opts.Comparer == nil { 1832 return fmt.Sprintf("%s unknown comparer: %s", d.Cmd, d.CmdArgs[0].String()) 1833 } 1834 } 1835 1836 mem = newMemTable(memTableOptions{Options: opts}) 1837 if err := mem.apply(b, 0); err != nil { 1838 return err.Error() 1839 } 1840 return "" 1841 1842 case "overlaps": 1843 var buf bytes.Buffer 1844 for _, data := range strings.Split(d.Input, "\n") { 1845 var keyRanges []internalKeyRange 1846 for _, part := range strings.Fields(data) { 1847 meta := parseMeta(part) 1848 keyRanges = append(keyRanges, internalKeyRange{smallest: meta.Smallest, largest: meta.Largest}) 1849 } 1850 fmt.Fprintf(&buf, "%t\n", ingestMemtableOverlaps(mem.cmp, mem, keyRanges)) 1851 } 1852 return buf.String() 1853 1854 default: 1855 return fmt.Sprintf("unknown command: %s", d.Cmd) 1856 } 1857 }) 1858 }) 1859 } 1860 } 1861 1862 func TestKeyRangeBasic(t *testing.T) { 1863 cmp := base.DefaultComparer.Compare 1864 k1 := KeyRange{Start: []byte("b"), End: []byte("c")} 1865 1866 // Tests for Contains() 1867 require.True(t, k1.Contains(cmp, base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet))) 1868 require.False(t, k1.Contains(cmp, base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet))) 1869 require.True(t, k1.Contains(cmp, base.MakeInternalKey([]byte("bb"), 1, InternalKeyKindSet))) 1870 require.True(t, k1.Contains(cmp, base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, []byte("c")))) 1871 1872 m1 := &fileMetadata{ 1873 Smallest: base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet), 1874 Largest: base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet), 1875 } 1876 require.True(t, k1.Overlaps(cmp, m1)) 1877 m2 := &fileMetadata{ 1878 Smallest: base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet), 1879 Largest: base.MakeInternalKey([]byte("d"), 1, InternalKeyKindSet), 1880 } 1881 require.False(t, k1.Overlaps(cmp, m2)) 1882 m3 := &fileMetadata{ 1883 Smallest: base.MakeInternalKey([]byte("a"), 1, InternalKeyKindSet), 1884 Largest: base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, []byte("b")), 1885 } 1886 require.False(t, k1.Overlaps(cmp, m3)) 1887 m4 := &fileMetadata{ 1888 Smallest: base.MakeInternalKey([]byte("a"), 1, InternalKeyKindSet), 1889 Largest: base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet), 1890 } 1891 require.True(t, k1.Overlaps(cmp, m4)) 1892 } 1893 1894 func BenchmarkIngestOverlappingMemtable(b *testing.B) { 1895 assertNoError := func(err error) { 1896 b.Helper() 1897 if err != nil { 1898 b.Fatal(err) 1899 } 1900 } 1901 1902 for count := 1; count < 6; count++ { 1903 b.Run(fmt.Sprintf("memtables=%d", count), func(b *testing.B) { 1904 for i := 0; i < b.N; i++ { 1905 b.StopTimer() 1906 mem := vfs.NewMem() 1907 d, err := Open("", &Options{ 1908 FS: mem, 1909 }) 1910 assertNoError(err) 1911 1912 // Create memtables. 1913 for { 1914 assertNoError(d.Set([]byte("a"), nil, nil)) 1915 d.mu.Lock() 1916 done := len(d.mu.mem.queue) == count 1917 d.mu.Unlock() 1918 if done { 1919 break 1920 } 1921 } 1922 1923 // Create the overlapping sstable that will force a flush when ingested. 1924 f, err := mem.Create("ext") 1925 assertNoError(err) 1926 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 1927 assertNoError(w.Set([]byte("a"), nil)) 1928 assertNoError(w.Close()) 1929 1930 b.StartTimer() 1931 assertNoError(d.Ingest([]string{"ext"})) 1932 } 1933 }) 1934 } 1935 } 1936 1937 func TestIngestTargetLevel(t *testing.T) { 1938 var d *DB 1939 defer func() { 1940 if d != nil { 1941 // Ignore errors because this test defines fake in-progress transactions 1942 // that prohibit clean shutdown. 1943 _ = d.Close() 1944 } 1945 }() 1946 1947 parseMeta := func(s string) *fileMetadata { 1948 var rkey bool 1949 if len(s) >= 4 && s[0:4] == "rkey" { 1950 rkey = true 1951 s = s[5:] 1952 } 1953 parts := strings.Split(s, "-") 1954 if len(parts) != 2 { 1955 t.Fatalf("malformed table spec: %s", s) 1956 } 1957 var m *fileMetadata 1958 if rkey { 1959 m = (&fileMetadata{}).ExtendRangeKeyBounds( 1960 d.cmp, 1961 InternalKey{UserKey: []byte(parts[0])}, 1962 InternalKey{UserKey: []byte(parts[1])}, 1963 ) 1964 } else { 1965 m = (&fileMetadata{}).ExtendPointKeyBounds( 1966 d.cmp, 1967 InternalKey{UserKey: []byte(parts[0])}, 1968 InternalKey{UserKey: []byte(parts[1])}, 1969 ) 1970 } 1971 m.InitPhysicalBacking() 1972 return m 1973 } 1974 1975 datadriven.RunTest(t, "testdata/ingest_target_level", func(t *testing.T, td *datadriven.TestData) string { 1976 switch td.Cmd { 1977 case "define": 1978 if d != nil { 1979 // Ignore errors because this test defines fake in-progress 1980 // transactions that prohibit clean shutdown. 1981 _ = d.Close() 1982 } 1983 1984 var err error 1985 opts := Options{ 1986 FormatMajorVersion: internalFormatNewest, 1987 } 1988 opts.WithFSDefaults() 1989 if d, err = runDBDefineCmd(td, &opts); err != nil { 1990 return err.Error() 1991 } 1992 1993 readState := d.loadReadState() 1994 c := &checkConfig{ 1995 logger: d.opts.Logger, 1996 comparer: d.opts.Comparer, 1997 readState: readState, 1998 newIters: d.newIters, 1999 // TODO: runDBDefineCmd doesn't properly update the visible 2000 // sequence number. So we have to explicitly configure level checker with a very large 2001 // sequence number, otherwise the DB appears empty. 2002 seqNum: InternalKeySeqNumMax, 2003 } 2004 if err := checkLevelsInternal(c); err != nil { 2005 return err.Error() 2006 } 2007 readState.unref() 2008 2009 d.mu.Lock() 2010 s := d.mu.versions.currentVersion().String() 2011 d.mu.Unlock() 2012 return s 2013 2014 case "target": 2015 var buf bytes.Buffer 2016 suggestSplit := false 2017 for _, cmd := range td.CmdArgs { 2018 switch cmd.Key { 2019 case "suggest-split": 2020 suggestSplit = true 2021 } 2022 } 2023 for _, target := range strings.Split(td.Input, "\n") { 2024 meta := parseMeta(target) 2025 level, overlapFile, err := ingestTargetLevel( 2026 d.newIters, d.tableNewRangeKeyIter, IterOptions{logger: d.opts.Logger}, 2027 d.opts.Comparer, d.mu.versions.currentVersion(), 1, d.mu.compact.inProgress, meta, 2028 suggestSplit) 2029 if err != nil { 2030 return err.Error() 2031 } 2032 if overlapFile != nil { 2033 fmt.Fprintf(&buf, "%d (split file: %s)\n", level, overlapFile.FileNum) 2034 } else { 2035 fmt.Fprintf(&buf, "%d\n", level) 2036 } 2037 } 2038 return buf.String() 2039 2040 default: 2041 return fmt.Sprintf("unknown command: %s", td.Cmd) 2042 } 2043 }) 2044 } 2045 2046 func TestIngest(t *testing.T) { 2047 var mem vfs.FS 2048 var d *DB 2049 var flushed bool 2050 if runtime.GOARCH == "386" { 2051 t.Skip("skipped on 32-bit due to slightly varied output") 2052 } 2053 defer func() { 2054 require.NoError(t, d.Close()) 2055 }() 2056 2057 reset := func(split bool) { 2058 if d != nil { 2059 require.NoError(t, d.Close()) 2060 } 2061 2062 mem = vfs.NewMem() 2063 require.NoError(t, mem.MkdirAll("ext", 0755)) 2064 opts := &Options{ 2065 FS: mem, 2066 L0CompactionThreshold: 100, 2067 L0StopWritesThreshold: 100, 2068 DebugCheck: DebugCheckLevels, 2069 EventListener: &EventListener{FlushEnd: func(info FlushInfo) { 2070 flushed = true 2071 }}, 2072 FormatMajorVersion: internalFormatNewest, 2073 } 2074 opts.Experimental.IngestSplit = func() bool { 2075 return split 2076 } 2077 // Disable automatic compactions because otherwise we'll race with 2078 // delete-only compactions triggered by ingesting range tombstones. 2079 opts.DisableAutomaticCompactions = true 2080 2081 var err error 2082 d, err = Open("", opts) 2083 require.NoError(t, err) 2084 } 2085 reset(false /* split */) 2086 2087 datadriven.RunTest(t, "testdata/ingest", func(t *testing.T, td *datadriven.TestData) string { 2088 switch td.Cmd { 2089 case "reset": 2090 split := false 2091 for _, cmd := range td.CmdArgs { 2092 switch cmd.Key { 2093 case "enable-split": 2094 split = true 2095 default: 2096 return fmt.Sprintf("unexpected key: %s", cmd.Key) 2097 } 2098 } 2099 reset(split) 2100 return "" 2101 case "batch": 2102 b := d.NewIndexedBatch() 2103 if err := runBatchDefineCmd(td, b); err != nil { 2104 return err.Error() 2105 } 2106 if err := b.Commit(nil); err != nil { 2107 return err.Error() 2108 } 2109 return "" 2110 2111 case "build": 2112 if err := runBuildCmd(td, d, mem); err != nil { 2113 return err.Error() 2114 } 2115 return "" 2116 2117 case "ingest": 2118 flushed = false 2119 if err := runIngestCmd(td, d, mem); err != nil { 2120 return err.Error() 2121 } 2122 // Wait for a possible flush. 2123 d.mu.Lock() 2124 for d.mu.compact.flushing { 2125 d.mu.compact.cond.Wait() 2126 } 2127 d.mu.Unlock() 2128 if flushed { 2129 return "memtable flushed" 2130 } 2131 return "" 2132 2133 case "get": 2134 return runGetCmd(t, td, d) 2135 2136 case "iter": 2137 iter, _ := d.NewIter(&IterOptions{ 2138 KeyTypes: IterKeyTypePointsAndRanges, 2139 }) 2140 return runIterCmd(td, iter, true) 2141 2142 case "lsm": 2143 return runLSMCmd(td, d) 2144 2145 case "metrics": 2146 // The asynchronous loading of table stats can change metrics, so 2147 // wait for all the tables' stats to be loaded. 2148 d.mu.Lock() 2149 d.waitTableStats() 2150 d.mu.Unlock() 2151 2152 return d.Metrics().StringForTests() 2153 2154 case "wait-pending-table-stats": 2155 return runTableStatsCmd(td, d) 2156 2157 case "compact": 2158 if len(td.CmdArgs) != 2 { 2159 panic("insufficient args for compact command") 2160 } 2161 l := td.CmdArgs[0].Key 2162 r := td.CmdArgs[1].Key 2163 err := d.Compact([]byte(l), []byte(r), false) 2164 if err != nil { 2165 return err.Error() 2166 } 2167 return "" 2168 default: 2169 return fmt.Sprintf("unknown command: %s", td.Cmd) 2170 } 2171 }) 2172 } 2173 2174 func TestIngestError(t *testing.T) { 2175 for i := int32(0); ; i++ { 2176 mem := vfs.NewMem() 2177 2178 f0, err := mem.Create("ext0") 2179 require.NoError(t, err) 2180 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{}) 2181 require.NoError(t, w.Set([]byte("d"), nil)) 2182 require.NoError(t, w.Close()) 2183 f1, err := mem.Create("ext1") 2184 require.NoError(t, err) 2185 w = sstable.NewWriter(objstorageprovider.NewFileWritable(f1), sstable.WriterOptions{}) 2186 require.NoError(t, w.Set([]byte("d"), nil)) 2187 require.NoError(t, w.Close()) 2188 2189 inj := errorfs.OnIndex(-1) 2190 d, err := Open("", &Options{ 2191 FS: errorfs.Wrap(mem, inj), 2192 Logger: panicLogger{}, 2193 L0CompactionThreshold: 8, 2194 }) 2195 require.NoError(t, err) 2196 // Force the creation of an L0 sstable that overlaps with the tables 2197 // we'll attempt to ingest. This ensures that we exercise filesystem 2198 // codepaths when determining the ingest target level. 2199 require.NoError(t, d.Set([]byte("a"), nil, nil)) 2200 require.NoError(t, d.Set([]byte("d"), nil, nil)) 2201 require.NoError(t, d.Flush()) 2202 2203 t.Run(fmt.Sprintf("index-%d", i), func(t *testing.T) { 2204 defer func() { 2205 if r := recover(); r != nil { 2206 if e, ok := r.(error); ok && errors.Is(e, errorfs.ErrInjected) { 2207 return 2208 } 2209 // d.opts.Logger.Fatalf won't propagate ErrInjected 2210 // itself, but should contain the error message. 2211 if strings.HasSuffix(fmt.Sprint(r), errorfs.ErrInjected.Error()) { 2212 return 2213 } 2214 t.Fatal(r) 2215 } 2216 }() 2217 2218 inj.SetIndex(i) 2219 err1 := d.Ingest([]string{"ext0"}) 2220 err2 := d.Ingest([]string{"ext1"}) 2221 err := firstError(err1, err2) 2222 if err != nil && !errors.Is(err, errorfs.ErrInjected) { 2223 t.Fatal(err) 2224 } 2225 }) 2226 2227 // d.Close may error if we failed to flush the manifest. 2228 _ = d.Close() 2229 2230 // If the injector's index is non-negative, the i-th filesystem 2231 // operation was never executed. 2232 if inj.Index() >= 0 { 2233 break 2234 } 2235 } 2236 } 2237 2238 func TestIngestIdempotence(t *testing.T) { 2239 // Use an on-disk filesystem, because Ingest with a MemFS will copy, not 2240 // link the ingested file. 2241 dir, err := os.MkdirTemp("", "ingest-idempotence") 2242 require.NoError(t, err) 2243 defer os.RemoveAll(dir) 2244 fs := vfs.Default 2245 2246 path := fs.PathJoin(dir, "ext") 2247 f, err := fs.Create(fs.PathJoin(dir, "ext")) 2248 require.NoError(t, err) 2249 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2250 require.NoError(t, w.Set([]byte("d"), nil)) 2251 require.NoError(t, w.Close()) 2252 2253 d, err := Open(dir, &Options{ 2254 FS: fs, 2255 }) 2256 require.NoError(t, err) 2257 const count = 4 2258 for i := 0; i < count; i++ { 2259 ingestPath := fs.PathJoin(dir, fmt.Sprintf("ext%d", i)) 2260 require.NoError(t, fs.Link(path, ingestPath)) 2261 require.NoError(t, d.Ingest([]string{ingestPath})) 2262 } 2263 require.NoError(t, d.Close()) 2264 } 2265 2266 func TestIngestCompact(t *testing.T) { 2267 mem := vfs.NewMem() 2268 lel := MakeLoggingEventListener(&base.InMemLogger{}) 2269 d, err := Open("", &Options{ 2270 EventListener: &lel, 2271 FS: mem, 2272 L0CompactionThreshold: 1, 2273 L0StopWritesThreshold: 1, 2274 }) 2275 require.NoError(t, err) 2276 2277 src := func(i int) string { 2278 return fmt.Sprintf("ext%d", i) 2279 } 2280 f, err := mem.Create(src(0)) 2281 require.NoError(t, err) 2282 2283 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2284 key := []byte("a") 2285 require.NoError(t, w.Add(base.MakeInternalKey(key, 0, InternalKeyKindSet), nil)) 2286 require.NoError(t, w.Close()) 2287 2288 // Make N copies of the sstable. 2289 const count = 20 2290 for i := 1; i < count; i++ { 2291 require.NoError(t, vfs.Copy(d.opts.FS, src(0), src(i))) 2292 } 2293 2294 // Ingest the same sstable multiple times. Compaction should take place as 2295 // ingestion happens, preventing an indefinite write stall from occurring. 2296 for i := 0; i < count; i++ { 2297 if i == 10 { 2298 // Half-way through the ingestions, set a key in the memtable to force 2299 // overlap with the memtable which will require the memtable to be 2300 // flushed. 2301 require.NoError(t, d.Set(key, nil, nil)) 2302 } 2303 require.NoError(t, d.Ingest([]string{src(i)})) 2304 } 2305 2306 require.NoError(t, d.Close()) 2307 } 2308 2309 func TestConcurrentIngest(t *testing.T) { 2310 mem := vfs.NewMem() 2311 d, err := Open("", &Options{ 2312 FS: mem, 2313 }) 2314 require.NoError(t, err) 2315 2316 // Create an sstable with 2 keys. This is necessary to trigger the overlap 2317 // bug because an sstable with a single key will not have overlap in internal 2318 // key space and the sequence number assignment had already guaranteed 2319 // correct ordering. 2320 src := func(i int) string { 2321 return fmt.Sprintf("ext%d", i) 2322 } 2323 f, err := mem.Create(src(0)) 2324 require.NoError(t, err) 2325 2326 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2327 require.NoError(t, w.Set([]byte("a"), nil)) 2328 require.NoError(t, w.Set([]byte("b"), nil)) 2329 require.NoError(t, w.Close()) 2330 2331 // Make N copies of the sstable. 2332 errCh := make(chan error, 5) 2333 for i := 1; i < cap(errCh); i++ { 2334 require.NoError(t, vfs.Copy(d.opts.FS, src(0), src(i))) 2335 } 2336 2337 // Perform N ingestions concurrently. 2338 for i := 0; i < cap(errCh); i++ { 2339 go func(i int) { 2340 err := d.Ingest([]string{src(i)}) 2341 if err == nil { 2342 if _, err = d.opts.FS.Stat(src(i)); oserror.IsNotExist(err) { 2343 err = nil 2344 } 2345 } 2346 errCh <- err 2347 }(i) 2348 } 2349 for i := 0; i < cap(errCh); i++ { 2350 require.NoError(t, <-errCh) 2351 } 2352 2353 require.NoError(t, d.Close()) 2354 } 2355 2356 func TestConcurrentIngestCompact(t *testing.T) { 2357 for i := 0; i < 2; i++ { 2358 t.Run("", func(t *testing.T) { 2359 mem := vfs.NewMem() 2360 compactionReady := make(chan struct{}) 2361 compactionBegin := make(chan struct{}) 2362 d, err := Open("", &Options{ 2363 FS: mem, 2364 EventListener: &EventListener{ 2365 TableCreated: func(info TableCreateInfo) { 2366 if info.Reason == "compacting" { 2367 close(compactionReady) 2368 <-compactionBegin 2369 } 2370 }, 2371 }, 2372 }) 2373 require.NoError(t, err) 2374 2375 ingest := func(keys ...string) { 2376 t.Helper() 2377 f, err := mem.Create("ext") 2378 require.NoError(t, err) 2379 2380 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2381 for _, k := range keys { 2382 require.NoError(t, w.Set([]byte(k), nil)) 2383 } 2384 require.NoError(t, w.Close()) 2385 require.NoError(t, d.Ingest([]string{"ext"})) 2386 } 2387 2388 compact := func(start, end string) { 2389 t.Helper() 2390 require.NoError(t, d.Compact([]byte(start), []byte(end), false)) 2391 } 2392 2393 lsm := func() string { 2394 d.mu.Lock() 2395 s := d.mu.versions.currentVersion().String() 2396 d.mu.Unlock() 2397 return s 2398 } 2399 2400 expectLSM := func(expected string) { 2401 t.Helper() 2402 expected = strings.TrimSpace(expected) 2403 actual := strings.TrimSpace(lsm()) 2404 if expected != actual { 2405 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 2406 } 2407 } 2408 2409 ingest("a") 2410 ingest("a") 2411 ingest("c") 2412 ingest("c") 2413 2414 expectLSM(` 2415 0.0: 2416 000005:[a#11,SET-a#11,SET] 2417 000007:[c#13,SET-c#13,SET] 2418 6: 2419 000004:[a#10,SET-a#10,SET] 2420 000006:[c#12,SET-c#12,SET] 2421 `) 2422 2423 // At this point ingestion of an sstable containing only key "b" will be 2424 // targeted at L6. Yet a concurrent compaction of sstables 5 and 7 will 2425 // create a new sstable in L6 spanning ["a"-"c"]. So the ingestion must 2426 // actually target L5. 2427 2428 switch i { 2429 case 0: 2430 // Compact, then ingest. 2431 go func() { 2432 <-compactionReady 2433 2434 ingest("b") 2435 2436 close(compactionBegin) 2437 }() 2438 2439 compact("a", "z") 2440 2441 expectLSM(` 2442 0.0: 2443 000009:[b#14,SET-b#14,SET] 2444 6: 2445 000008:[a#0,SET-c#0,SET] 2446 `) 2447 2448 case 1: 2449 // Ingest, then compact 2450 var wg sync.WaitGroup 2451 wg.Add(1) 2452 go func() { 2453 defer wg.Done() 2454 close(compactionBegin) 2455 compact("a", "z") 2456 }() 2457 2458 ingest("b") 2459 wg.Wait() 2460 2461 // Because we're performing the ingestion and compaction concurrently, 2462 // we can't guarantee any particular LSM structure at this point. The 2463 // test will fail with an assertion error due to overlapping sstables 2464 // if there is insufficient synchronization between ingestion and 2465 // compaction. 2466 } 2467 2468 require.NoError(t, d.Close()) 2469 }) 2470 } 2471 } 2472 2473 func TestIngestFlushQueuedMemTable(t *testing.T) { 2474 // Verify that ingestion forces a flush of a queued memtable. 2475 2476 // Test with a format major version prior to FormatFlushableIngest and one 2477 // after. Both should result in the same statistic calculations. 2478 for _, fmv := range []FormatMajorVersion{FormatFlushableIngest - 1, internalFormatNewest} { 2479 func(fmv FormatMajorVersion) { 2480 mem := vfs.NewMem() 2481 d, err := Open("", &Options{ 2482 FS: mem, 2483 FormatMajorVersion: fmv, 2484 }) 2485 require.NoError(t, err) 2486 2487 // Add the key "a" to the memtable, then fill up the memtable with the key 2488 // "b". The ingested sstable will only overlap with the queued memtable. 2489 require.NoError(t, d.Set([]byte("a"), nil, nil)) 2490 for { 2491 require.NoError(t, d.Set([]byte("b"), nil, nil)) 2492 d.mu.Lock() 2493 done := len(d.mu.mem.queue) == 2 2494 d.mu.Unlock() 2495 if done { 2496 break 2497 } 2498 } 2499 2500 ingest := func(keys ...string) { 2501 t.Helper() 2502 f, err := mem.Create("ext") 2503 require.NoError(t, err) 2504 2505 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 2506 TableFormat: fmv.MinTableFormat(), 2507 }) 2508 for _, k := range keys { 2509 require.NoError(t, w.Set([]byte(k), nil)) 2510 } 2511 require.NoError(t, w.Close()) 2512 stats, err := d.IngestWithStats([]string{"ext"}) 2513 require.NoError(t, err) 2514 require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) 2515 require.Equal(t, stats.MemtableOverlappingFiles, 1) 2516 require.Less(t, uint64(0), stats.Bytes) 2517 } 2518 2519 ingest("a") 2520 2521 require.NoError(t, d.Close()) 2522 }(fmv) 2523 } 2524 } 2525 2526 func TestIngestStats(t *testing.T) { 2527 mem := vfs.NewMem() 2528 d, err := Open("", &Options{ 2529 FS: mem, 2530 }) 2531 require.NoError(t, err) 2532 2533 ingest := func(expectedLevel int, keys ...string) { 2534 t.Helper() 2535 f, err := mem.Create("ext") 2536 require.NoError(t, err) 2537 2538 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2539 for _, k := range keys { 2540 require.NoError(t, w.Set([]byte(k), nil)) 2541 } 2542 require.NoError(t, w.Close()) 2543 stats, err := d.IngestWithStats([]string{"ext"}) 2544 require.NoError(t, err) 2545 if expectedLevel == 0 { 2546 require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) 2547 } else { 2548 require.EqualValues(t, 0, stats.ApproxIngestedIntoL0Bytes) 2549 } 2550 require.Less(t, uint64(0), stats.Bytes) 2551 } 2552 ingest(6, "a") 2553 ingest(0, "a") 2554 ingest(6, "b", "g") 2555 ingest(0, "c") 2556 require.NoError(t, d.Close()) 2557 } 2558 2559 func TestIngestFlushQueuedLargeBatch(t *testing.T) { 2560 // Verify that ingestion forces a flush of a queued large batch. 2561 2562 mem := vfs.NewMem() 2563 d, err := Open("", &Options{ 2564 FS: mem, 2565 }) 2566 require.NoError(t, err) 2567 2568 // The default large batch threshold is slightly less than 1/2 of the 2569 // memtable size which makes triggering a problem with flushing queued large 2570 // batches irritating. Manually adjust the threshold to 1/8 of the memtable 2571 // size in order to more easily create a situation where a large batch is 2572 // queued but not automatically flushed. 2573 d.mu.Lock() 2574 d.largeBatchThreshold = d.opts.MemTableSize / 8 2575 d.mu.Unlock() 2576 2577 // Set a record with a large value. This will be transformed into a large 2578 // batch and placed in the flushable queue. 2579 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), int(d.largeBatchThreshold)), nil)) 2580 2581 ingest := func(keys ...string) { 2582 t.Helper() 2583 f, err := mem.Create("ext") 2584 require.NoError(t, err) 2585 2586 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2587 for _, k := range keys { 2588 require.NoError(t, w.Set([]byte(k), nil)) 2589 } 2590 require.NoError(t, w.Close()) 2591 require.NoError(t, d.Ingest([]string{"ext"})) 2592 } 2593 2594 ingest("a") 2595 2596 require.NoError(t, d.Close()) 2597 } 2598 2599 func TestIngestMemtablePendingOverlap(t *testing.T) { 2600 mem := vfs.NewMem() 2601 d, err := Open("", &Options{ 2602 FS: mem, 2603 }) 2604 require.NoError(t, err) 2605 2606 d.mu.Lock() 2607 // Use a custom commit pipeline apply function to give us control over 2608 // timing of events. 2609 assignedBatch := make(chan struct{}) 2610 applyBatch := make(chan struct{}) 2611 originalApply := d.commit.env.apply 2612 d.commit.env.apply = func(b *Batch, mem *memTable) error { 2613 assignedBatch <- struct{}{} 2614 applyBatch <- struct{}{} 2615 return originalApply(b, mem) 2616 } 2617 d.mu.Unlock() 2618 2619 ingest := func(keys ...string) { 2620 t.Helper() 2621 f, err := mem.Create("ext") 2622 require.NoError(t, err) 2623 2624 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2625 for _, k := range keys { 2626 require.NoError(t, w.Set([]byte(k), nil)) 2627 } 2628 require.NoError(t, w.Close()) 2629 require.NoError(t, d.Ingest([]string{"ext"})) 2630 } 2631 2632 var wg sync.WaitGroup 2633 wg.Add(2) 2634 2635 // First, Set('c') begins. This call will: 2636 // 2637 // * enqueue the batch to the pending queue. 2638 // * allocate a sequence number `x`. 2639 // * write the batch to the WAL. 2640 // 2641 // and then block until we read from the `applyBatch` channel down below. 2642 go func() { 2643 err := d.Set([]byte("c"), nil, nil) 2644 if err != nil { 2645 t.Error(err) 2646 } 2647 wg.Done() 2648 }() 2649 2650 // When the above Set('c') is ready to apply, it sends on the 2651 // `assignedBatch` channel. Once that happens, we start Ingest('a', 'c'). 2652 // The Ingest('a', 'c') allocates sequence number `x + 1`. 2653 go func() { 2654 // Wait until the Set has grabbed a sequence number before ingesting. 2655 <-assignedBatch 2656 ingest("a", "c") 2657 wg.Done() 2658 }() 2659 2660 // The Set('c')#1 and Ingest('a', 'c')#2 are both pending. To maintain 2661 // sequence number invariants, the Set needs to be applied and flushed 2662 // before the Ingest determines its target level. 2663 // 2664 // Sleep a bit to ensure that the Ingest has time to call into 2665 // AllocateSeqNum. Once it allocates its sequence number, it should see 2666 // that there are unpublished sequence numbers below it and spin until the 2667 // Set's sequence number is published. After sleeping, read from 2668 // `applyBatch` to actually allow the Set to apply and publish its 2669 // sequence number. 2670 time.Sleep(100 * time.Millisecond) 2671 <-applyBatch 2672 2673 // Wait for both calls to complete. 2674 wg.Wait() 2675 require.NoError(t, d.Flush()) 2676 require.NoError(t, d.CheckLevels(nil)) 2677 require.NoError(t, d.Close()) 2678 } 2679 2680 type testLogger struct { 2681 t testing.TB 2682 } 2683 2684 func (l testLogger) Infof(format string, args ...interface{}) { 2685 l.t.Logf(format, args...) 2686 } 2687 2688 func (l testLogger) Fatalf(format string, args ...interface{}) { 2689 l.t.Fatalf(format, args...) 2690 } 2691 2692 // TestIngestMemtableOverlapRace is a regression test for the race described in 2693 // #2196. If an ingest that checks for overlap with the mutable memtable and 2694 // finds no overlap, it must not allow overlapping keys with later sequence 2695 // numbers to be applied to the memtable and the memtable to be flushed before 2696 // the ingest completes. 2697 // 2698 // This test operates by committing the same key concurrently: 2699 // - 1 goroutine repeatedly ingests the same sstable writing the key `foo` 2700 // - n goroutines repeatedly apply batches writing the key `foo` and trigger 2701 // flushes. 2702 // 2703 // After a while, the database is closed and the manifest is verified. Version 2704 // edits should contain new files with monotonically increasing sequence 2705 // numbers, since every flush and every ingest conflicts with one another. 2706 func TestIngestMemtableOverlapRace(t *testing.T) { 2707 mem := vfs.NewMem() 2708 el := MakeLoggingEventListener(testLogger{t: t}) 2709 d, err := Open("", &Options{ 2710 FS: mem, 2711 // Disable automatic compactions to keep the manifest clean; only 2712 // flushes and ingests. 2713 DisableAutomaticCompactions: true, 2714 // Disable the WAL to speed up batch commits. 2715 DisableWAL: true, 2716 EventListener: &el, 2717 // We're endlessly appending to L0 without clearing it, so set a maximal 2718 // stop writes threshold. 2719 L0StopWritesThreshold: math.MaxInt, 2720 // Accumulating more than 1 immutable memtable doesn't help us exercise 2721 // the bug, since the committed keys need to be flushed promptly. 2722 MemTableStopWritesThreshold: 2, 2723 }) 2724 require.NoError(t, err) 2725 2726 // Prepare a sstable `ext` deleting foo. 2727 f, err := mem.Create("ext") 2728 require.NoError(t, err) 2729 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2730 require.NoError(t, w.Delete([]byte("foo"))) 2731 require.NoError(t, w.Close()) 2732 2733 var done atomic.Bool 2734 const numSetters = 2 2735 var wg sync.WaitGroup 2736 wg.Add(numSetters + 1) 2737 2738 untilDone := func(fn func()) { 2739 defer wg.Done() 2740 for !done.Load() { 2741 fn() 2742 } 2743 } 2744 2745 // Ingest in the background. 2746 totalIngests := 0 2747 go untilDone(func() { 2748 filename := fmt.Sprintf("ext%d", totalIngests) 2749 require.NoError(t, mem.Link("ext", filename)) 2750 require.NoError(t, d.Ingest([]string{filename})) 2751 totalIngests++ 2752 }) 2753 2754 // Apply batches and trigger flushes in the background. 2755 wo := &WriteOptions{Sync: false} 2756 var localCommits [numSetters]int 2757 for i := 0; i < numSetters; i++ { 2758 i := i 2759 v := []byte(fmt.Sprintf("v%d", i+1)) 2760 go untilDone(func() { 2761 // Commit a batch setting foo=vN. 2762 b := d.NewBatch() 2763 require.NoError(t, b.Set([]byte("foo"), v, nil)) 2764 require.NoError(t, b.Commit(wo)) 2765 localCommits[i]++ 2766 d.AsyncFlush() 2767 }) 2768 } 2769 time.Sleep(100 * time.Millisecond) 2770 done.Store(true) 2771 wg.Wait() 2772 2773 var totalCommits int 2774 for i := 0; i < numSetters; i++ { 2775 totalCommits += localCommits[i] 2776 } 2777 m := d.Metrics() 2778 tot := m.Total() 2779 t.Logf("Committed %d batches.", totalCommits) 2780 t.Logf("Flushed %d times.", m.Flush.Count) 2781 t.Logf("Ingested %d sstables.", tot.TablesIngested) 2782 require.NoError(t, d.CheckLevels(nil)) 2783 require.NoError(t, d.Close()) 2784 2785 // Replay the manifest. Every flush and ingest is a separate version edit. 2786 // Since they all write the same key and compactions are disabled, sequence 2787 // numbers of new files should be monotonically increasing. 2788 // 2789 // This check is necessary because most of these sstables are ingested into 2790 // L0. The L0 sublevels construction will order them by LargestSeqNum, even 2791 // if they're added to L0 out-of-order. The CheckLevels call at the end of 2792 // the test may find that the sublevels are all appropriately ordered, but 2793 // the manifest may reveal they were added to the LSM out-of-order. 2794 dbDesc, err := Peek("", mem) 2795 require.NoError(t, err) 2796 require.True(t, dbDesc.Exists) 2797 f, err = mem.Open(dbDesc.ManifestFilename) 2798 require.NoError(t, err) 2799 defer f.Close() 2800 rr := record.NewReader(f, 0 /* logNum */) 2801 var largest *fileMetadata 2802 for { 2803 r, err := rr.Next() 2804 if err == io.EOF || err == record.ErrInvalidChunk { 2805 break 2806 } 2807 require.NoError(t, err) 2808 var ve manifest.VersionEdit 2809 require.NoError(t, ve.Decode(r)) 2810 t.Log(ve.String()) 2811 for _, f := range ve.NewFiles { 2812 if largest != nil { 2813 require.Equal(t, 0, f.Level) 2814 if largest.LargestSeqNum > f.Meta.LargestSeqNum { 2815 t.Fatalf("previous largest file %s has sequence number > next file %s", largest, f.Meta) 2816 } 2817 } 2818 largest = f.Meta 2819 } 2820 } 2821 } 2822 2823 type ingestCrashFS struct { 2824 vfs.FS 2825 } 2826 2827 func (fs ingestCrashFS) Link(oldname, newname string) error { 2828 if err := fs.FS.Link(oldname, newname); err != nil { 2829 return err 2830 } 2831 panic(errorfs.ErrInjected) 2832 } 2833 2834 type noRemoveFS struct { 2835 vfs.FS 2836 } 2837 2838 func (fs noRemoveFS) Remove(string) error { 2839 return errorfs.ErrInjected 2840 } 2841 2842 func TestIngestFileNumReuseCrash(t *testing.T) { 2843 const count = 10 2844 // Use an on-disk filesystem, because Ingest with a MemFS will copy, not 2845 // link the ingested file. 2846 dir, err := os.MkdirTemp("", "ingest-filenum-reuse") 2847 require.NoError(t, err) 2848 defer os.RemoveAll(dir) 2849 fs := vfs.Default 2850 2851 readFile := func(s string) []byte { 2852 f, err := fs.Open(fs.PathJoin(dir, s)) 2853 require.NoError(t, err) 2854 b, err := io.ReadAll(f) 2855 require.NoError(t, err) 2856 require.NoError(t, f.Close()) 2857 return b 2858 } 2859 2860 // Create sstables to ingest. 2861 var files []string 2862 var fileBytes [][]byte 2863 for i := 0; i < count; i++ { 2864 name := fmt.Sprintf("ext%d", i) 2865 f, err := fs.Create(fs.PathJoin(dir, name)) 2866 require.NoError(t, err) 2867 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 2868 require.NoError(t, w.Set([]byte(fmt.Sprintf("foo%d", i)), nil)) 2869 require.NoError(t, w.Close()) 2870 files = append(files, name) 2871 fileBytes = append(fileBytes, readFile(name)) 2872 } 2873 2874 // Open a database with a filesystem that will successfully link the 2875 // ingested files but then panic. This is an approximation of what a crash 2876 // after linking but before updating the manifest would look like. 2877 d, err := Open(dir, &Options{ 2878 FS: ingestCrashFS{FS: fs}, 2879 }) 2880 // A flush here ensures the file num bumps from creating OPTIONS files, 2881 // etc get recorded in the manifest. We want the nextFileNum after the 2882 // restart to be the same as one of our ingested sstables. 2883 require.NoError(t, err) 2884 require.NoError(t, d.Set([]byte("boop"), nil, nil)) 2885 require.NoError(t, d.Flush()) 2886 for _, f := range files { 2887 func() { 2888 defer func() { err = recover().(error) }() 2889 err = d.Ingest([]string{fs.PathJoin(dir, f)}) 2890 }() 2891 if err == nil || !errors.Is(err, errorfs.ErrInjected) { 2892 t.Fatalf("expected injected error, got %v", err) 2893 } 2894 } 2895 // Leave something in the WAL so that Open will flush while replaying the 2896 // WAL. 2897 require.NoError(t, d.Set([]byte("wal"), nil, nil)) 2898 require.NoError(t, d.Close()) 2899 2900 // There are now two links to each external file: the original extX link 2901 // and a numbered sstable link. The sstable files are still not a part of 2902 // the manifest and so they may be overwritten. Open will detect the 2903 // obsolete number sstables and try to remove them. The FS here is wrapped 2904 // to induce errors on Remove calls. Even if we're unsuccessful in 2905 // removing the obsolete files, the external files should not be 2906 // overwritten. 2907 d, err = Open(dir, &Options{FS: noRemoveFS{FS: fs}}) 2908 require.NoError(t, err) 2909 require.NoError(t, d.Set([]byte("bar"), nil, nil)) 2910 require.NoError(t, d.Flush()) 2911 require.NoError(t, d.Close()) 2912 2913 // None of the external files should change despite modifying the linked 2914 // versions. 2915 for i, f := range files { 2916 afterBytes := readFile(f) 2917 require.Equal(t, fileBytes[i], afterBytes) 2918 } 2919 } 2920 2921 func TestIngest_UpdateSequenceNumber(t *testing.T) { 2922 mem := vfs.NewMem() 2923 cmp := base.DefaultComparer.Compare 2924 parse := func(input string) (*sstable.Writer, error) { 2925 f, err := mem.Create("ext") 2926 if err != nil { 2927 return nil, err 2928 } 2929 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 2930 TableFormat: sstable.TableFormatMax, 2931 }) 2932 for _, data := range strings.Split(input, "\n") { 2933 if strings.HasPrefix(data, "rangekey: ") { 2934 data = strings.TrimPrefix(data, "rangekey: ") 2935 s := keyspan.ParseSpan(data) 2936 err := rangekey.Encode(&s, w.AddRangeKey) 2937 if err != nil { 2938 return nil, err 2939 } 2940 continue 2941 } 2942 j := strings.Index(data, ":") 2943 if j < 0 { 2944 return nil, errors.Newf("malformed input: %s\n", data) 2945 } 2946 key := base.ParseInternalKey(data[:j]) 2947 value := []byte(data[j+1:]) 2948 if err := w.Add(key, value); err != nil { 2949 return nil, err 2950 } 2951 } 2952 return w, nil 2953 } 2954 2955 var ( 2956 seqnum uint64 2957 err error 2958 metas []*fileMetadata 2959 ) 2960 datadriven.RunTest(t, "testdata/ingest_update_seqnums", func(t *testing.T, td *datadriven.TestData) string { 2961 switch td.Cmd { 2962 case "starting-seqnum": 2963 seqnum, err = strconv.ParseUint(td.Input, 10, 64) 2964 if err != nil { 2965 return err.Error() 2966 } 2967 return "" 2968 2969 case "reset": 2970 metas = metas[:0] 2971 return "" 2972 2973 case "load": 2974 w, err := parse(td.Input) 2975 if err != nil { 2976 return err.Error() 2977 } 2978 if err = w.Close(); err != nil { 2979 return err.Error() 2980 } 2981 defer w.Close() 2982 2983 // Format the bounds of the table. 2984 wm, err := w.Metadata() 2985 if err != nil { 2986 return err.Error() 2987 } 2988 2989 // Upper bounds for range dels and range keys are expected to be sentinel 2990 // keys. 2991 maybeUpdateUpperBound := func(key base.InternalKey) base.InternalKey { 2992 switch k := key.Kind(); { 2993 case k == base.InternalKeyKindRangeDelete: 2994 key.Trailer = base.InternalKeyRangeDeleteSentinel 2995 case rangekey.IsRangeKey(k): 2996 return base.MakeExclusiveSentinelKey(k, key.UserKey) 2997 } 2998 return key 2999 } 3000 3001 // Construct the file metadata from the writer metadata. 3002 m := &fileMetadata{ 3003 SmallestSeqNum: 0, // Simulate an ingestion. 3004 LargestSeqNum: 0, 3005 } 3006 if wm.HasPointKeys { 3007 m.ExtendPointKeyBounds(cmp, wm.SmallestPoint, wm.LargestPoint) 3008 } 3009 if wm.HasRangeDelKeys { 3010 m.ExtendPointKeyBounds( 3011 cmp, 3012 wm.SmallestRangeDel, 3013 maybeUpdateUpperBound(wm.LargestRangeDel), 3014 ) 3015 } 3016 if wm.HasRangeKeys { 3017 m.ExtendRangeKeyBounds( 3018 cmp, 3019 wm.SmallestRangeKey, 3020 maybeUpdateUpperBound(wm.LargestRangeKey), 3021 ) 3022 } 3023 m.InitPhysicalBacking() 3024 if err := m.Validate(cmp, base.DefaultFormatter); err != nil { 3025 return err.Error() 3026 } 3027 3028 // Collect this file. 3029 metas = append(metas, m) 3030 3031 // Return an index number for the file. 3032 return fmt.Sprintf("file %d\n", len(metas)-1) 3033 3034 case "update-files": 3035 // Update the bounds across all files. 3036 if err = ingestUpdateSeqNum(cmp, base.DefaultFormatter, seqnum, ingestLoadResult{localMeta: metas}); err != nil { 3037 return err.Error() 3038 } 3039 3040 var buf bytes.Buffer 3041 for i, m := range metas { 3042 fmt.Fprintf(&buf, "file %d:\n", i) 3043 fmt.Fprintf(&buf, " combined: %s-%s\n", m.Smallest, m.Largest) 3044 fmt.Fprintf(&buf, " points: %s-%s\n", m.SmallestPointKey, m.LargestPointKey) 3045 fmt.Fprintf(&buf, " ranges: %s-%s\n", m.SmallestRangeKey, m.LargestRangeKey) 3046 } 3047 3048 return buf.String() 3049 3050 default: 3051 return fmt.Sprintf("unknown command %s\n", td.Cmd) 3052 } 3053 }) 3054 } 3055 3056 func TestIngestCleanup(t *testing.T) { 3057 fns := []base.FileNum{0, 1, 2} 3058 3059 testCases := []struct { 3060 closeFiles []base.FileNum 3061 cleanupFiles []base.FileNum 3062 wantErr string 3063 }{ 3064 // Close and remove all files. 3065 { 3066 closeFiles: fns, 3067 cleanupFiles: fns, 3068 }, 3069 // Remove a non-existent file. 3070 { 3071 closeFiles: fns, 3072 cleanupFiles: []base.FileNum{3}, 3073 wantErr: "unknown to the objstorage provider", 3074 }, 3075 // Remove a file that has not been closed. 3076 { 3077 closeFiles: []base.FileNum{0, 2}, 3078 cleanupFiles: fns, 3079 wantErr: oserror.ErrInvalid.Error(), 3080 }, 3081 // Remove all files, one of which is still open, plus a file that does not exist. 3082 { 3083 closeFiles: []base.FileNum{0, 2}, 3084 cleanupFiles: []base.FileNum{0, 1, 2, 3}, 3085 wantErr: oserror.ErrInvalid.Error(), // The first error encountered is due to the open file. 3086 }, 3087 } 3088 3089 for _, tc := range testCases { 3090 t.Run("", func(t *testing.T) { 3091 mem := vfs.NewMem() 3092 mem.UseWindowsSemantics(true) 3093 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) 3094 require.NoError(t, err) 3095 defer objProvider.Close() 3096 3097 // Create the files in the VFS. 3098 metaMap := make(map[base.FileNum]objstorage.Writable) 3099 for _, fn := range fns { 3100 w, _, err := objProvider.Create(context.Background(), base.FileTypeTable, fn.DiskFileNum(), objstorage.CreateOptions{}) 3101 require.NoError(t, err) 3102 3103 metaMap[fn] = w 3104 } 3105 3106 // Close a select number of files. 3107 for _, m := range tc.closeFiles { 3108 w, ok := metaMap[m] 3109 if !ok { 3110 continue 3111 } 3112 require.NoError(t, w.Finish()) 3113 } 3114 3115 // Cleanup the set of files in the FS. 3116 var toRemove []*fileMetadata 3117 for _, fn := range tc.cleanupFiles { 3118 m := &fileMetadata{FileNum: fn} 3119 m.InitPhysicalBacking() 3120 toRemove = append(toRemove, m) 3121 } 3122 3123 err = ingestCleanup(objProvider, toRemove) 3124 if tc.wantErr != "" { 3125 require.Error(t, err, "got no error, expected %s", tc.wantErr) 3126 require.Contains(t, err.Error(), tc.wantErr) 3127 } else { 3128 require.NoError(t, err) 3129 } 3130 }) 3131 } 3132 } 3133 3134 // fatalCapturingLogger captures a fatal error instead of panicking. 3135 type fatalCapturingLogger struct { 3136 t testing.TB 3137 err error 3138 } 3139 3140 // Infof implements the Logger interface. 3141 func (l *fatalCapturingLogger) Infof(fmt string, args ...interface{}) { 3142 l.t.Logf(fmt, args...) 3143 } 3144 3145 // Fatalf implements the Logger interface. 3146 func (l *fatalCapturingLogger) Fatalf(_ string, args ...interface{}) { 3147 l.err = args[0].(error) 3148 } 3149 3150 func TestIngestValidation(t *testing.T) { 3151 type keyVal struct { 3152 key, val []byte 3153 } 3154 type corruptionLocation int 3155 const ( 3156 corruptionLocationNone corruptionLocation = iota 3157 corruptionLocationStart 3158 corruptionLocationEnd 3159 corruptionLocationInternal 3160 ) 3161 type errLocation int 3162 const ( 3163 errLocationNone errLocation = iota 3164 errLocationIngest 3165 errLocationValidation 3166 ) 3167 const ( 3168 nKeys = 1_000 3169 keySize = 16 3170 valSize = 100 3171 blockSize = 100 3172 3173 ingestTableName = "ext" 3174 ) 3175 ingestPath := filepath.Join(t.TempDir(), ingestTableName) 3176 3177 seed := uint64(time.Now().UnixNano()) 3178 rng := rand.New(rand.NewSource(seed)) 3179 t.Logf("rng seed = %d", seed) 3180 3181 testCases := []struct { 3182 description string 3183 cLoc corruptionLocation 3184 wantErrType errLocation 3185 }{ 3186 { 3187 description: "no corruption", 3188 cLoc: corruptionLocationNone, 3189 wantErrType: errLocationNone, 3190 }, 3191 { 3192 description: "start block", 3193 cLoc: corruptionLocationStart, 3194 wantErrType: errLocationIngest, 3195 }, 3196 { 3197 description: "end block", 3198 cLoc: corruptionLocationEnd, 3199 wantErrType: errLocationIngest, 3200 }, 3201 { 3202 description: "non-end block", 3203 cLoc: corruptionLocationInternal, 3204 wantErrType: errLocationValidation, 3205 }, 3206 } 3207 3208 for _, tc := range testCases { 3209 t.Run(tc.description, func(t *testing.T) { 3210 var wg sync.WaitGroup 3211 wg.Add(1) 3212 3213 fs := vfs.NewMem() 3214 logger := &fatalCapturingLogger{t: t} 3215 opts := &Options{ 3216 FS: fs, 3217 Logger: logger, 3218 EventListener: &EventListener{ 3219 TableValidated: func(i TableValidatedInfo) { 3220 wg.Done() 3221 }, 3222 }, 3223 } 3224 opts.Experimental.ValidateOnIngest = true 3225 d, err := Open("", opts) 3226 require.NoError(t, err) 3227 defer func() { require.NoError(t, d.Close()) }() 3228 3229 corrupt := func(f vfs.File) { 3230 readable, err := sstable.NewSimpleReadable(f) 3231 require.NoError(t, err) 3232 // Compute the layout of the sstable in order to find the 3233 // appropriate block locations to corrupt. 3234 r, err := sstable.NewReader(readable, sstable.ReaderOptions{}) 3235 require.NoError(t, err) 3236 l, err := r.Layout() 3237 require.NoError(t, err) 3238 require.NoError(t, r.Close()) 3239 3240 // Select an appropriate data block to corrupt. 3241 var blockIdx int 3242 switch tc.cLoc { 3243 case corruptionLocationStart: 3244 blockIdx = 0 3245 case corruptionLocationEnd: 3246 blockIdx = len(l.Data) - 1 3247 case corruptionLocationInternal: 3248 blockIdx = 1 + rng.Intn(len(l.Data)-2) 3249 default: 3250 t.Fatalf("unknown corruptionLocation: %T", tc.cLoc) 3251 } 3252 bh := l.Data[blockIdx] 3253 3254 osF, err := os.OpenFile(ingestPath, os.O_RDWR, 0600) 3255 require.NoError(t, err) 3256 defer func() { require.NoError(t, osF.Close()) }() 3257 3258 // Corrupting a key will cause the ingestion to fail due to a 3259 // malformed key, rather than a block checksum mismatch. 3260 // Instead, we corrupt the last byte in the selected block, 3261 // before the trailer, which corresponds to a value. 3262 offset := bh.Offset + bh.Length - 1 3263 _, err = osF.WriteAt([]byte("\xff"), int64(offset)) 3264 require.NoError(t, err) 3265 } 3266 3267 type errT struct { 3268 errLoc errLocation 3269 err error 3270 } 3271 runIngest := func(keyVals []keyVal) (et errT) { 3272 // The vfs.File does not allow for random reads and writes. 3273 // Create a disk-backed file outside of the DB FS that we can 3274 // open as a regular os.File, if required. 3275 tmpFS := vfs.Default 3276 f, err := tmpFS.Create(ingestPath) 3277 require.NoError(t, err) 3278 defer func() { _ = tmpFS.Remove(ingestPath) }() 3279 3280 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 3281 BlockSize: blockSize, // Create many smaller blocks. 3282 Compression: NoCompression, // For simpler debugging. 3283 }) 3284 for _, kv := range keyVals { 3285 require.NoError(t, w.Set(kv.key, kv.val)) 3286 } 3287 require.NoError(t, w.Close()) 3288 3289 // Possibly corrupt the file. 3290 if tc.cLoc != corruptionLocationNone { 3291 f, err = tmpFS.Open(ingestPath) 3292 require.NoError(t, err) 3293 corrupt(f) 3294 } 3295 3296 // Copy the file into the DB's FS. 3297 _, err = vfs.Clone(tmpFS, fs, ingestPath, ingestTableName) 3298 require.NoError(t, err) 3299 3300 // Ingest the external table. 3301 err = d.Ingest([]string{ingestTableName}) 3302 if err != nil { 3303 et.errLoc = errLocationIngest 3304 et.err = err 3305 return 3306 } 3307 3308 // Wait for the validation on the sstable to complete. 3309 wg.Wait() 3310 3311 // Return any error encountered during validation. 3312 if logger.err != nil { 3313 et.errLoc = errLocationValidation 3314 et.err = logger.err 3315 } 3316 3317 return 3318 } 3319 3320 // Construct a set of keys to ingest. 3321 var keyVals []keyVal 3322 for i := 0; i < nKeys; i++ { 3323 key := make([]byte, keySize) 3324 _, err = rng.Read(key) 3325 require.NoError(t, err) 3326 3327 val := make([]byte, valSize) 3328 _, err = rng.Read(val) 3329 require.NoError(t, err) 3330 3331 keyVals = append(keyVals, keyVal{key, val}) 3332 } 3333 3334 // Keys must be sorted. 3335 sort.Slice(keyVals, func(i, j int) bool { 3336 return d.cmp(keyVals[i].key, keyVals[j].key) <= 0 3337 }) 3338 3339 // Run the ingestion. 3340 et := runIngest(keyVals) 3341 3342 // Assert we saw the errors we expect. 3343 switch tc.wantErrType { 3344 case errLocationNone: 3345 require.Equal(t, errLocationNone, et.errLoc) 3346 require.NoError(t, et.err) 3347 case errLocationIngest: 3348 require.Equal(t, errLocationIngest, et.errLoc) 3349 require.Error(t, et.err) 3350 require.True(t, errors.Is(et.err, base.ErrCorruption)) 3351 case errLocationValidation: 3352 require.Equal(t, errLocationValidation, et.errLoc) 3353 require.Error(t, et.err) 3354 require.True(t, errors.Is(et.err, base.ErrCorruption)) 3355 default: 3356 t.Fatalf("unknown wantErrType %T", tc.wantErrType) 3357 } 3358 }) 3359 } 3360 } 3361 3362 // BenchmarkManySSTables measures the cost of various operations with various 3363 // counts of SSTables within the database. 3364 func BenchmarkManySSTables(b *testing.B) { 3365 counts := []int{10, 1_000, 10_000, 100_000, 1_000_000} 3366 ops := []string{"ingest", "calculateInuseKeyRanges"} 3367 for _, op := range ops { 3368 b.Run(op, func(b *testing.B) { 3369 for _, count := range counts { 3370 b.Run(fmt.Sprintf("sstables=%d", count), func(b *testing.B) { 3371 mem := vfs.NewMem() 3372 d, err := Open("", &Options{ 3373 FS: mem, 3374 }) 3375 require.NoError(b, err) 3376 3377 var paths []string 3378 for i := 0; i < count; i++ { 3379 n := fmt.Sprintf("%07d", i) 3380 f, err := mem.Create(n) 3381 require.NoError(b, err) 3382 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3383 require.NoError(b, w.Set([]byte(n), nil)) 3384 require.NoError(b, w.Close()) 3385 paths = append(paths, n) 3386 } 3387 require.NoError(b, d.Ingest(paths)) 3388 3389 { 3390 const broadIngest = "broad.sst" 3391 f, err := mem.Create(broadIngest) 3392 require.NoError(b, err) 3393 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3394 require.NoError(b, w.Set([]byte("0"), nil)) 3395 require.NoError(b, w.Set([]byte("Z"), nil)) 3396 require.NoError(b, w.Close()) 3397 require.NoError(b, d.Ingest([]string{broadIngest})) 3398 } 3399 3400 switch op { 3401 case "ingest": 3402 runBenchmarkManySSTablesIngest(b, d, mem, count) 3403 case "calculateInuseKeyRanges": 3404 runBenchmarkManySSTablesInUseKeyRanges(b, d, count) 3405 } 3406 require.NoError(b, d.Close()) 3407 }) 3408 } 3409 }) 3410 } 3411 } 3412 3413 func runBenchmarkManySSTablesIngest(b *testing.B, d *DB, fs vfs.FS, count int) { 3414 b.ResetTimer() 3415 for i := 0; i < b.N; i++ { 3416 n := fmt.Sprintf("%07d", count+i) 3417 f, err := fs.Create(n) 3418 require.NoError(b, err) 3419 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) 3420 require.NoError(b, w.Set([]byte(n), nil)) 3421 require.NoError(b, w.Close()) 3422 require.NoError(b, d.Ingest([]string{n})) 3423 } 3424 } 3425 3426 func runBenchmarkManySSTablesInUseKeyRanges(b *testing.B, d *DB, count int) { 3427 // This benchmark is pretty contrived, but it's not easy to write a 3428 // microbenchmark for this in a more natural way. L6 has many files, and 3429 // L5 has 1 file spanning the entire breadth of L5. 3430 d.mu.Lock() 3431 defer d.mu.Unlock() 3432 v := d.mu.versions.currentVersion() 3433 b.ResetTimer() 3434 3435 smallest := []byte("0") 3436 largest := []byte("z") 3437 for i := 0; i < b.N; i++ { 3438 _ = calculateInuseKeyRanges(v, d.cmp, 0, numLevels-1, smallest, largest) 3439 } 3440 }