github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/db_test.go (about) 1 // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "path/filepath" 13 "slices" 14 "strconv" 15 "strings" 16 "sync" 17 "testing" 18 "time" 19 20 "github.com/cockroachdb/errors" 21 "github.com/cockroachdb/pebble/internal/base" 22 "github.com/cockroachdb/pebble/internal/cache" 23 "github.com/cockroachdb/pebble/internal/invariants" 24 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 25 "github.com/cockroachdb/pebble/sstable" 26 "github.com/cockroachdb/pebble/vfs" 27 "github.com/stretchr/testify/require" 28 "golang.org/x/exp/rand" 29 ) 30 31 // try repeatedly calls f, sleeping between calls with exponential back-off, 32 // until f returns a nil error or the total sleep time is greater than or equal 33 // to maxTotalSleep. It always calls f at least once. 34 func try(initialSleep, maxTotalSleep time.Duration, f func() error) error { 35 totalSleep := time.Duration(0) 36 for d := initialSleep; ; d *= 2 { 37 time.Sleep(d) 38 totalSleep += d 39 if err := f(); err == nil || totalSleep >= maxTotalSleep { 40 return err 41 } 42 } 43 } 44 45 func TestTry(t *testing.T) { 46 c := make(chan struct{}) 47 go func() { 48 time.Sleep(1 * time.Millisecond) 49 close(c) 50 }() 51 52 attemptsMu := sync.Mutex{} 53 attempts := 0 54 55 err := try(100*time.Microsecond, 20*time.Second, func() error { 56 attemptsMu.Lock() 57 attempts++ 58 attemptsMu.Unlock() 59 60 select { 61 default: 62 return errors.New("timed out") 63 case <-c: 64 return nil 65 } 66 }) 67 require.NoError(t, err) 68 69 attemptsMu.Lock() 70 a := attempts 71 attemptsMu.Unlock() 72 73 if a == 0 { 74 t.Fatalf("attempts: got 0, want > 0") 75 } 76 } 77 78 func TestBasicReads(t *testing.T) { 79 testCases := []struct { 80 dirname string 81 wantMap map[string]string 82 }{ 83 { 84 "db-stage-1", 85 map[string]string{ 86 "aaa": "", 87 "bar": "", 88 "baz": "", 89 "foo": "", 90 "quux": "", 91 "zzz": "", 92 }, 93 }, 94 { 95 "db-stage-2", 96 map[string]string{ 97 "aaa": "", 98 "bar": "", 99 "baz": "three", 100 "foo": "four", 101 "quux": "", 102 "zzz": "", 103 }, 104 }, 105 { 106 "db-stage-3", 107 map[string]string{ 108 "aaa": "", 109 "bar": "", 110 "baz": "three", 111 "foo": "four", 112 "quux": "", 113 "zzz": "", 114 }, 115 }, 116 { 117 "db-stage-4", 118 map[string]string{ 119 "aaa": "", 120 "bar": "", 121 "baz": "", 122 "foo": "five", 123 "quux": "six", 124 "zzz": "", 125 }, 126 }, 127 } 128 for _, tc := range testCases { 129 t.Run(tc.dirname, func(t *testing.T) { 130 fs := vfs.NewMem() 131 _, err := vfs.Clone(vfs.Default, fs, filepath.Join("testdata", tc.dirname), tc.dirname) 132 if err != nil { 133 t.Fatalf("%s: cloneFileSystem failed: %v", tc.dirname, err) 134 } 135 d, err := Open(tc.dirname, testingRandomized(t, &Options{ 136 FS: fs, 137 })) 138 if err != nil { 139 t.Fatalf("%s: Open failed: %v", tc.dirname, err) 140 } 141 for key, want := range tc.wantMap { 142 got, closer, err := d.Get([]byte(key)) 143 if err != nil && err != ErrNotFound { 144 t.Fatalf("%s: Get(%q) failed: %v", tc.dirname, key, err) 145 } 146 if string(got) != string(want) { 147 t.Fatalf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want) 148 } 149 if closer != nil { 150 closer.Close() 151 } 152 } 153 err = d.Close() 154 if err != nil { 155 t.Fatalf("%s: Close failed: %v", tc.dirname, err) 156 } 157 }) 158 } 159 } 160 161 func TestBasicWrites(t *testing.T) { 162 d, err := Open("", testingRandomized(t, &Options{ 163 FS: vfs.NewMem(), 164 })) 165 require.NoError(t, err) 166 167 names := []string{ 168 "Alatar", 169 "Gandalf", 170 "Pallando", 171 "Radagast", 172 "Saruman", 173 "Joe", 174 } 175 wantMap := map[string]string{} 176 177 inBatch, batch, pending := false, &Batch{}, [][]string(nil) 178 set0 := func(k, v string) error { 179 return d.Set([]byte(k), []byte(v), nil) 180 } 181 del0 := func(k string) error { 182 return d.Delete([]byte(k), nil) 183 } 184 set1 := func(k, v string) error { 185 batch.Set([]byte(k), []byte(v), nil) 186 return nil 187 } 188 del1 := func(k string) error { 189 batch.Delete([]byte(k), nil) 190 return nil 191 } 192 set, del := set0, del0 193 194 testCases := []string{ 195 "set Gandalf Grey", 196 "set Saruman White", 197 "set Radagast Brown", 198 "delete Saruman", 199 "set Gandalf White", 200 "batch", 201 " set Alatar AliceBlue", 202 "apply", 203 "delete Pallando", 204 "set Alatar AntiqueWhite", 205 "set Pallando PapayaWhip", 206 "batch", 207 "apply", 208 "set Pallando PaleVioletRed", 209 "batch", 210 " delete Alatar", 211 " set Gandalf GhostWhite", 212 " set Saruman Seashell", 213 " delete Saruman", 214 " set Saruman SeaGreen", 215 " set Radagast RosyBrown", 216 " delete Pallando", 217 "apply", 218 "delete Radagast", 219 "delete Radagast", 220 "delete Radagast", 221 "set Gandalf Goldenrod", 222 "set Pallando PeachPuff", 223 "batch", 224 " delete Joe", 225 " delete Saruman", 226 " delete Radagast", 227 " delete Pallando", 228 " delete Gandalf", 229 " delete Alatar", 230 "apply", 231 "set Joe Plumber", 232 } 233 for i, tc := range testCases { 234 s := strings.Split(strings.TrimSpace(tc), " ") 235 switch s[0] { 236 case "set": 237 if err := set(s[1], s[2]); err != nil { 238 t.Fatalf("#%d %s: %v", i, tc, err) 239 } 240 if inBatch { 241 pending = append(pending, s) 242 } else { 243 wantMap[s[1]] = s[2] 244 } 245 case "delete": 246 if err := del(s[1]); err != nil { 247 t.Fatalf("#%d %s: %v", i, tc, err) 248 } 249 if inBatch { 250 pending = append(pending, s) 251 } else { 252 delete(wantMap, s[1]) 253 } 254 case "batch": 255 inBatch, batch, set, del = true, &Batch{}, set1, del1 256 case "apply": 257 if err := d.Apply(batch, nil); err != nil { 258 t.Fatalf("#%d %s: %v", i, tc, err) 259 } 260 for _, p := range pending { 261 switch p[0] { 262 case "set": 263 wantMap[p[1]] = p[2] 264 case "delete": 265 delete(wantMap, p[1]) 266 } 267 } 268 inBatch, pending, set, del = false, nil, set0, del0 269 default: 270 t.Fatalf("#%d %s: bad test case: %q", i, tc, s) 271 } 272 273 fail := false 274 for _, name := range names { 275 g, closer, err := d.Get([]byte(name)) 276 if err != nil && err != ErrNotFound { 277 t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err) 278 fail = true 279 } 280 got, gOK := string(g), err == nil 281 want, wOK := wantMap[name] 282 if got != want || gOK != wOK { 283 t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t", 284 i, tc, name, got, gOK, want, wOK) 285 fail = true 286 } 287 if closer != nil { 288 closer.Close() 289 } 290 } 291 if fail { 292 return 293 } 294 } 295 296 require.NoError(t, d.Close()) 297 } 298 299 func TestRandomWrites(t *testing.T) { 300 d, err := Open("", testingRandomized(t, &Options{ 301 FS: vfs.NewMem(), 302 MemTableSize: 8 * 1024, 303 })) 304 require.NoError(t, err) 305 306 keys := [64][]byte{} 307 wants := [64]int{} 308 for k := range keys { 309 keys[k] = []byte(strconv.Itoa(k)) 310 wants[k] = -1 311 } 312 xxx := bytes.Repeat([]byte("x"), 512) 313 314 rng := rand.New(rand.NewSource(123)) 315 const N = 1000 316 for i := 0; i < N; i++ { 317 k := rng.Intn(len(keys)) 318 if rng.Intn(20) != 0 { 319 wants[k] = rng.Intn(len(xxx) + 1) 320 if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil { 321 t.Fatalf("i=%d: Set: %v", i, err) 322 } 323 } else { 324 wants[k] = -1 325 if err := d.Delete(keys[k], nil); err != nil { 326 t.Fatalf("i=%d: Delete: %v", i, err) 327 } 328 } 329 330 if i != N-1 || rng.Intn(50) != 0 { 331 continue 332 } 333 for k := range keys { 334 got := -1 335 if v, closer, err := d.Get(keys[k]); err != nil { 336 if err != ErrNotFound { 337 t.Fatalf("Get: %v", err) 338 } 339 } else { 340 got = len(v) 341 closer.Close() 342 } 343 if got != wants[k] { 344 t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k]) 345 } 346 } 347 } 348 349 require.NoError(t, d.Close()) 350 } 351 352 func TestLargeBatch(t *testing.T) { 353 d, err := Open("", testingRandomized(t, &Options{ 354 FS: vfs.NewMem(), 355 MemTableSize: 1400, 356 MemTableStopWritesThreshold: 100, 357 })) 358 require.NoError(t, err) 359 360 verifyLSM := func(expected string) func() error { 361 return func() error { 362 d.mu.Lock() 363 s := d.mu.versions.currentVersion().String() 364 d.mu.Unlock() 365 if expected != s { 366 if testing.Verbose() { 367 fmt.Println(strings.TrimSpace(s)) 368 } 369 return errors.Errorf("expected %s, but found %s", expected, s) 370 } 371 return nil 372 } 373 } 374 375 logNum := func() base.DiskFileNum { 376 d.mu.Lock() 377 defer d.mu.Unlock() 378 return d.mu.log.queue[len(d.mu.log.queue)-1].fileNum 379 } 380 fileSize := func(fileNum base.DiskFileNum) int64 { 381 info, err := d.opts.FS.Stat(base.MakeFilepath(d.opts.FS, "", fileTypeLog, fileNum)) 382 require.NoError(t, err) 383 return info.Size() 384 } 385 memTableCreationSeqNum := func() uint64 { 386 d.mu.Lock() 387 defer d.mu.Unlock() 388 return d.mu.mem.mutable.logSeqNum 389 } 390 391 startLogNum := logNum() 392 startLogStartSize := fileSize(startLogNum) 393 startSeqNum := d.mu.versions.logSeqNum.Load() 394 395 // Write a key with a value larger than the memtable size. 396 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("a"), 512), nil)) 397 398 // Verify that the large batch was written to the WAL that existed before it 399 // was committed. We verify that WAL rotation occurred, where the large batch 400 // was written to, and that the new WAL is empty. 401 endLogNum := logNum() 402 if startLogNum == endLogNum { 403 t.Fatal("expected WAL rotation") 404 } 405 startLogEndSize := fileSize(startLogNum) 406 if startLogEndSize == startLogStartSize { 407 t.Fatalf("expected large batch to be written to %s.log, but file size unchanged at %d", 408 startLogNum, startLogEndSize) 409 } 410 endLogSize := fileSize(endLogNum) 411 if endLogSize != 0 { 412 t.Fatalf("expected %s.log to be empty, but found %d", endLogNum, endLogSize) 413 } 414 if creationSeqNum := memTableCreationSeqNum(); creationSeqNum <= startSeqNum { 415 t.Fatalf("expected memTable.logSeqNum=%d > largeBatch.seqNum=%d", creationSeqNum, startSeqNum) 416 } 417 418 // Verify this results in one L0 table being created. 419 require.NoError(t, try(100*time.Microsecond, 20*time.Second, 420 verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n"))) 421 422 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 512), nil)) 423 424 // Verify this results in a second L0 table being created. 425 require.NoError(t, try(100*time.Microsecond, 20*time.Second, 426 verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n 000007:[b#11,SET-b#11,SET]\n"))) 427 428 // Allocate a bunch of batches to exhaust the batchPool. None of these 429 // batches should have a non-zero count. 430 for i := 0; i < 10; i++ { 431 b := d.NewBatch() 432 require.EqualValues(t, 0, b.Count()) 433 } 434 435 require.NoError(t, d.Close()) 436 } 437 438 func TestGetNoCache(t *testing.T) { 439 cache := NewCache(0) 440 defer cache.Unref() 441 442 d, err := Open("", testingRandomized(t, &Options{ 443 Cache: cache, 444 FS: vfs.NewMem(), 445 })) 446 require.NoError(t, err) 447 448 require.NoError(t, d.Set([]byte("a"), []byte("aa"), nil)) 449 require.NoError(t, d.Flush()) 450 verifyGet(t, d, []byte("a"), []byte("aa")) 451 452 require.NoError(t, d.Close()) 453 } 454 455 func TestGetMerge(t *testing.T) { 456 d, err := Open("", testingRandomized(t, &Options{ 457 FS: vfs.NewMem(), 458 })) 459 require.NoError(t, err) 460 461 key := []byte("a") 462 verify := func(expected string) { 463 val, closer, err := d.Get(key) 464 require.NoError(t, err) 465 466 if expected != string(val) { 467 t.Fatalf("expected %s, but got %s", expected, val) 468 } 469 closer.Close() 470 } 471 472 const val = "1" 473 for i := 1; i <= 3; i++ { 474 require.NoError(t, d.Merge(key, []byte(val), nil)) 475 476 expected := strings.Repeat(val, i) 477 verify(expected) 478 479 require.NoError(t, d.Flush()) 480 verify(expected) 481 } 482 483 require.NoError(t, d.Close()) 484 } 485 486 func TestMergeOrderSameAfterFlush(t *testing.T) { 487 // Ensure compaction iterator (used by flush) and user iterator process merge 488 // operands in the same order 489 d, err := Open("", testingRandomized(t, &Options{ 490 FS: vfs.NewMem(), 491 })) 492 require.NoError(t, err) 493 494 key := []byte("a") 495 verify := func(expected string) { 496 iter, _ := d.NewIter(nil) 497 if !iter.SeekGE([]byte("a")) { 498 t.Fatal("expected one value, but got empty iterator") 499 } 500 if expected != string(iter.Value()) { 501 t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) 502 } 503 if !iter.SeekLT([]byte("b")) { 504 t.Fatal("expected one value, but got empty iterator") 505 } 506 if expected != string(iter.Value()) { 507 t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) 508 } 509 require.NoError(t, iter.Close()) 510 } 511 512 require.NoError(t, d.Merge(key, []byte("0"), nil)) 513 require.NoError(t, d.Merge(key, []byte("1"), nil)) 514 515 verify("01") 516 require.NoError(t, d.Flush()) 517 verify("01") 518 519 require.NoError(t, d.Close()) 520 } 521 522 type closableMerger struct { 523 lastBuf []byte 524 closed bool 525 } 526 527 func (m *closableMerger) MergeNewer(value []byte) error { 528 m.lastBuf = append(m.lastBuf[:0], value...) 529 return nil 530 } 531 532 func (m *closableMerger) MergeOlder(value []byte) error { 533 m.lastBuf = append(m.lastBuf[:0], value...) 534 return nil 535 } 536 537 func (m *closableMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 538 return m.lastBuf, m, nil 539 } 540 541 func (m *closableMerger) Close() error { 542 m.closed = true 543 return nil 544 } 545 546 func TestMergerClosing(t *testing.T) { 547 m := &closableMerger{} 548 549 d, err := Open("", testingRandomized(t, &Options{ 550 FS: vfs.NewMem(), 551 Merger: &Merger{ 552 Merge: func(key, value []byte) (base.ValueMerger, error) { 553 return m, m.MergeNewer(value) 554 }, 555 }, 556 })) 557 require.NoError(t, err) 558 559 defer func() { 560 require.NoError(t, d.Close()) 561 }() 562 563 err = d.Merge([]byte("a"), []byte("b"), nil) 564 require.NoError(t, err) 565 require.False(t, m.closed) 566 567 val, closer, err := d.Get([]byte("a")) 568 require.NoError(t, err) 569 require.Equal(t, []byte("b"), val) 570 require.NotNil(t, closer) 571 require.False(t, m.closed) 572 _ = closer.Close() 573 require.True(t, m.closed) 574 } 575 576 func TestLogData(t *testing.T) { 577 d, err := Open("", testingRandomized(t, &Options{ 578 FS: vfs.NewMem(), 579 })) 580 require.NoError(t, err) 581 582 defer func() { 583 require.NoError(t, d.Close()) 584 }() 585 586 require.NoError(t, d.LogData([]byte("foo"), Sync)) 587 require.NoError(t, d.LogData([]byte("bar"), Sync)) 588 // TODO(itsbilal): Confirm that we wrote some bytes to the WAL. 589 // For now, LogData proceeding ahead without a panic is good enough. 590 } 591 592 func TestSingleDeleteGet(t *testing.T) { 593 d, err := Open("", testingRandomized(t, &Options{ 594 FS: vfs.NewMem(), 595 })) 596 require.NoError(t, err) 597 defer func() { 598 require.NoError(t, d.Close()) 599 }() 600 601 key := []byte("key") 602 val := []byte("val") 603 604 require.NoError(t, d.Set(key, val, nil)) 605 verifyGet(t, d, key, val) 606 607 key2 := []byte("key2") 608 val2 := []byte("val2") 609 610 require.NoError(t, d.Set(key2, val2, nil)) 611 verifyGet(t, d, key2, val2) 612 613 require.NoError(t, d.SingleDelete(key2, nil)) 614 verifyGetNotFound(t, d, key2) 615 } 616 617 func TestSingleDeleteFlush(t *testing.T) { 618 d, err := Open("", testingRandomized(t, &Options{ 619 FS: vfs.NewMem(), 620 })) 621 require.NoError(t, err) 622 defer func() { 623 require.NoError(t, d.Close()) 624 }() 625 626 key := []byte("key") 627 valFirst := []byte("first") 628 valSecond := []byte("second") 629 key2 := []byte("key2") 630 val2 := []byte("val2") 631 632 require.NoError(t, d.Set(key, valFirst, nil)) 633 require.NoError(t, d.Set(key2, val2, nil)) 634 require.NoError(t, d.Flush()) 635 636 require.NoError(t, d.SingleDelete(key, nil)) 637 require.NoError(t, d.Set(key, valSecond, nil)) 638 require.NoError(t, d.Delete(key2, nil)) 639 require.NoError(t, d.Set(key2, val2, nil)) 640 require.NoError(t, d.Flush()) 641 642 require.NoError(t, d.SingleDelete(key, nil)) 643 require.NoError(t, d.Delete(key2, nil)) 644 require.NoError(t, d.Flush()) 645 646 verifyGetNotFound(t, d, key) 647 verifyGetNotFound(t, d, key2) 648 } 649 650 func TestUnremovableSingleDelete(t *testing.T) { 651 d, err := Open("", testingRandomized(t, &Options{ 652 FS: vfs.NewMem(), 653 L0CompactionThreshold: 8, 654 })) 655 require.NoError(t, err) 656 defer func() { 657 require.NoError(t, d.Close()) 658 }() 659 660 key := []byte("key") 661 valFirst := []byte("valFirst") 662 valSecond := []byte("valSecond") 663 664 require.NoError(t, d.Set(key, valFirst, nil)) 665 ss := d.NewSnapshot() 666 defer ss.Close() 667 require.NoError(t, d.SingleDelete(key, nil)) 668 require.NoError(t, d.Set(key, valSecond, nil)) 669 require.NoError(t, d.Flush()) 670 671 verifyGet(t, ss, key, valFirst) 672 verifyGet(t, d, key, valSecond) 673 674 require.NoError(t, d.SingleDelete(key, nil)) 675 676 verifyGet(t, ss, key, valFirst) 677 verifyGetNotFound(t, d, key) 678 679 require.NoError(t, d.Flush()) 680 681 verifyGet(t, ss, key, valFirst) 682 verifyGetNotFound(t, d, key) 683 } 684 685 func TestIterLeak(t *testing.T) { 686 for _, leak := range []bool{true, false} { 687 t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { 688 for _, flush := range []bool{true, false} { 689 t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { 690 d, err := Open("", testingRandomized(t, &Options{ 691 FS: vfs.NewMem(), 692 })) 693 require.NoError(t, err) 694 695 require.NoError(t, d.Set([]byte("a"), []byte("a"), nil)) 696 if flush { 697 require.NoError(t, d.Flush()) 698 } 699 iter, _ := d.NewIter(nil) 700 iter.First() 701 if !leak { 702 require.NoError(t, iter.Close()) 703 require.NoError(t, d.Close()) 704 } else { 705 defer iter.Close() 706 if err := d.Close(); err == nil { 707 t.Fatalf("expected failure, but found success") 708 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 709 t.Fatalf("expected leaked iterators, but found %+v", err) 710 } else { 711 t.Log(err.Error()) 712 } 713 } 714 }) 715 } 716 }) 717 } 718 } 719 720 // Make sure that we detect an iter leak when only one DB closes 721 // while the second db still holds a reference to the TableCache. 722 func TestIterLeakSharedCache(t *testing.T) { 723 for _, leak := range []bool{true, false} { 724 t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { 725 for _, flush := range []bool{true, false} { 726 t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { 727 d1, err := Open("", &Options{ 728 FS: vfs.NewMem(), 729 }) 730 require.NoError(t, err) 731 732 d2, err := Open("", &Options{ 733 FS: vfs.NewMem(), 734 }) 735 require.NoError(t, err) 736 737 require.NoError(t, d1.Set([]byte("a"), []byte("a"), nil)) 738 if flush { 739 require.NoError(t, d1.Flush()) 740 } 741 742 require.NoError(t, d2.Set([]byte("a"), []byte("a"), nil)) 743 if flush { 744 require.NoError(t, d2.Flush()) 745 } 746 747 // Check if leak detection works with only one db closing. 748 { 749 iter1, _ := d1.NewIter(nil) 750 iter1.First() 751 if !leak { 752 require.NoError(t, iter1.Close()) 753 require.NoError(t, d1.Close()) 754 } else { 755 defer iter1.Close() 756 if err := d1.Close(); err == nil { 757 t.Fatalf("expected failure, but found success") 758 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 759 t.Fatalf("expected leaked iterators, but found %+v", err) 760 } else { 761 t.Log(err.Error()) 762 } 763 } 764 } 765 766 { 767 iter2, _ := d2.NewIter(nil) 768 iter2.First() 769 if !leak { 770 require.NoError(t, iter2.Close()) 771 require.NoError(t, d2.Close()) 772 } else { 773 defer iter2.Close() 774 if err := d2.Close(); err == nil { 775 t.Fatalf("expected failure, but found success") 776 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 777 t.Fatalf("expected leaked iterators, but found %+v", err) 778 } else { 779 t.Log(err.Error()) 780 } 781 } 782 } 783 784 }) 785 } 786 }) 787 } 788 } 789 790 func TestMemTableReservation(t *testing.T) { 791 opts := &Options{ 792 Cache: NewCache(128 << 10 /* 128 KB */), 793 MemTableSize: initialMemTableSize, 794 FS: vfs.NewMem(), 795 } 796 defer opts.Cache.Unref() 797 opts.testingRandomized(t) 798 opts.EnsureDefaults() 799 // We're going to be looking at and asserting the global memtable reservation 800 // amount below so we don't want to race with any triggered stats collections. 801 opts.private.disableTableStats = true 802 803 // Add a block to the cache. Note that the memtable size is larger than the 804 // cache size, so opening the DB should cause this block to be evicted. 805 tmpID := opts.Cache.NewID() 806 helloWorld := []byte("hello world") 807 value := cache.Alloc(len(helloWorld)) 808 copy(value.Buf(), helloWorld) 809 opts.Cache.Set(tmpID, base.FileNum(0).DiskFileNum(), 0, value).Release() 810 811 d, err := Open("", opts) 812 require.NoError(t, err) 813 814 checkReserved := func(expected int64) { 815 t.Helper() 816 if reserved := d.memTableReserved.Load(); expected != reserved { 817 t.Fatalf("expected %d reserved, but found %d", expected, reserved) 818 } 819 } 820 821 checkReserved(int64(opts.MemTableSize)) 822 if refs := d.mu.mem.queue[len(d.mu.mem.queue)-1].readerRefs.Load(); refs != 2 { 823 t.Fatalf("expected 2 refs, but found %d", refs) 824 } 825 // Verify the memtable reservation has caused our test block to be evicted. 826 if h := opts.Cache.Get(tmpID, base.FileNum(0).DiskFileNum(), 0); h.Get() != nil { 827 t.Fatalf("expected failure, but found success: %s", h.Get()) 828 } 829 830 // Flush the memtable. The memtable reservation should double because old 831 // memtable will be recycled, saved for the next memtable allocation. 832 require.NoError(t, d.Flush()) 833 checkReserved(int64(2 * opts.MemTableSize)) 834 // Flush again. The memtable reservation should be unchanged because at most 835 // 1 memtable may be preserved for recycling. 836 837 // Flush in the presence of an active iterator. The iterator will hold a 838 // reference to a readState which will in turn hold a reader reference to the 839 // memtable. 840 iter, _ := d.NewIter(nil) 841 require.NoError(t, d.Flush()) 842 // The flush moved the recycled memtable into position as an active mutable 843 // memtable. There are now two allocated memtables: 1 mutable and 1 pinned 844 // by the iterator's read state. 845 checkReserved(2 * int64(opts.MemTableSize)) 846 847 // Flushing again should increase the reservation total to 3x: 1 active 848 // mutable, 1 for recycling, 1 pinned by iterator's read state. 849 require.NoError(t, d.Flush()) 850 checkReserved(3 * int64(opts.MemTableSize)) 851 852 // Closing the iterator will release the iterator's read state, and the old 853 // memtable will be moved into position as the next memtable to recycle. 854 // There was already a memtable ready to be recycled, so that memtable will 855 // be freed and the overall reservation total is reduced to 2x. 856 require.NoError(t, iter.Close()) 857 checkReserved(2 * int64(opts.MemTableSize)) 858 859 require.NoError(t, d.Close()) 860 } 861 862 func TestMemTableReservationLeak(t *testing.T) { 863 d, err := Open("", &Options{FS: vfs.NewMem()}) 864 require.NoError(t, err) 865 866 d.mu.Lock() 867 last := d.mu.mem.queue[len(d.mu.mem.queue)-1] 868 last.readerRef() 869 defer func() { 870 last.readerUnref(true) 871 }() 872 d.mu.Unlock() 873 if err := d.Close(); err == nil { 874 t.Fatalf("expected failure, but found success") 875 } else if !strings.HasPrefix(err.Error(), "leaked memtable reservation:") { 876 t.Fatalf("expected leaked memtable reservation, but found %+v", err) 877 } else { 878 t.Log(err.Error()) 879 } 880 } 881 882 func TestCacheEvict(t *testing.T) { 883 cache := NewCache(10 << 20) 884 defer cache.Unref() 885 886 d, err := Open("", &Options{ 887 Cache: cache, 888 FS: vfs.NewMem(), 889 }) 890 require.NoError(t, err) 891 892 for i := 0; i < 1000; i++ { 893 key := []byte(fmt.Sprintf("%04d", i)) 894 require.NoError(t, d.Set(key, key, nil)) 895 } 896 897 require.NoError(t, d.Flush()) 898 iter, _ := d.NewIter(nil) 899 for iter.First(); iter.Valid(); iter.Next() { 900 } 901 require.NoError(t, iter.Close()) 902 903 if size := cache.Size(); size == 0 { 904 t.Fatalf("expected non-zero cache size") 905 } 906 907 for i := 0; i < 1000; i++ { 908 key := []byte(fmt.Sprintf("%04d", i)) 909 require.NoError(t, d.Delete(key, nil)) 910 } 911 912 require.NoError(t, d.Compact([]byte("0"), []byte("1"), false)) 913 914 require.NoError(t, d.Close()) 915 916 if size := cache.Size(); size != 0 { 917 t.Fatalf("expected empty cache, but found %d", size) 918 } 919 } 920 921 func TestFlushEmpty(t *testing.T) { 922 d, err := Open("", testingRandomized(t, &Options{ 923 FS: vfs.NewMem(), 924 })) 925 require.NoError(t, err) 926 927 // Flushing an empty memtable should not fail. 928 require.NoError(t, d.Flush()) 929 require.NoError(t, d.Close()) 930 } 931 932 func TestRollManifest(t *testing.T) { 933 toPreserve := rand.Int31n(5) + 1 934 opts := &Options{ 935 MaxManifestFileSize: 1, 936 L0CompactionThreshold: 10, 937 L0StopWritesThreshold: 1000, 938 FS: vfs.NewMem(), 939 NumPrevManifest: int(toPreserve), 940 } 941 opts.DisableAutomaticCompactions = true 942 opts.testingRandomized(t) 943 d, err := Open("", opts) 944 require.NoError(t, err) 945 946 manifestFileNumber := func() base.DiskFileNum { 947 d.mu.Lock() 948 defer d.mu.Unlock() 949 return d.mu.versions.manifestFileNum 950 } 951 sizeRolloverState := func() (int64, int64) { 952 d.mu.Lock() 953 defer d.mu.Unlock() 954 return d.mu.versions.rotationHelper.DebugInfo() 955 } 956 957 current := func() string { 958 desc, err := Peek(d.dirname, d.opts.FS) 959 require.NoError(t, err) 960 return desc.ManifestFilename 961 } 962 963 lastManifestNum := manifestFileNumber() 964 manifestNums := []base.DiskFileNum{lastManifestNum} 965 for i := 0; i < 5; i++ { 966 // MaxManifestFileSize is 1, but the rollover logic also counts edits 967 // since the last snapshot to decide on rollover, so do as many flushes as 968 // it demands. 969 lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() 970 var expectedLastSnapshotCount, expectedEditsSinceSnapshotCount int64 971 switch i { 972 case 0: 973 // DB is empty. 974 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 0 975 case 1: 976 // First edit that caused rollover is not in the snapshot. 977 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 1 978 case 2: 979 // One flush is in the snapshot. One flush in the edit. 980 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 1, 1 981 case 3: 982 // Two flushes in the snapshot. One flush in the edit. Will need to do 983 // two more flushes, the first of which will be in the next snapshot. 984 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 2, 1 985 case 4: 986 // Four flushes in the snapshot. One flush in the edit. Will need to do 987 // four more flushes, three of which will be in the snapshot. 988 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 4, 1 989 } 990 require.Equal(t, expectedLastSnapshotCount, lastSnapshotCount) 991 require.Equal(t, expectedEditsSinceSnapshotCount, editsSinceSnapshotCount) 992 // Number of flushes to do to trigger the rollover. 993 steps := int(lastSnapshotCount - editsSinceSnapshotCount + 1) 994 // Steps can be <= 0, but we need to do at least one edit to trigger the 995 // rollover logic. 996 if steps <= 0 { 997 steps = 1 998 } 999 for j := 0; j < steps; j++ { 1000 require.NoError(t, d.Set([]byte("a"), nil, nil)) 1001 require.NoError(t, d.Flush()) 1002 } 1003 d.TestOnlyWaitForCleaning() 1004 num := manifestFileNumber() 1005 if lastManifestNum == num { 1006 t.Fatalf("manifest failed to roll %d: %d == %d", i, lastManifestNum, num) 1007 } 1008 1009 manifestNums = append(manifestNums, num) 1010 lastManifestNum = num 1011 1012 expectedCurrent := fmt.Sprintf("MANIFEST-%s", lastManifestNum) 1013 if v := current(); expectedCurrent != v { 1014 t.Fatalf("expected %s, but found %s", expectedCurrent, v) 1015 } 1016 } 1017 lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() 1018 require.EqualValues(t, 8, lastSnapshotCount) 1019 require.EqualValues(t, 1, editsSinceSnapshotCount) 1020 1021 files, err := d.opts.FS.List("") 1022 require.NoError(t, err) 1023 1024 var manifests []string 1025 for _, filename := range files { 1026 fileType, _, ok := base.ParseFilename(d.opts.FS, filename) 1027 if !ok { 1028 continue 1029 } 1030 if fileType == fileTypeManifest { 1031 manifests = append(manifests, filename) 1032 } 1033 } 1034 slices.Sort(manifests) 1035 1036 var expected []string 1037 for i := len(manifestNums) - int(toPreserve) - 1; i < len(manifestNums); i++ { 1038 expected = append( 1039 expected, 1040 fmt.Sprintf("MANIFEST-%s", manifestNums[i]), 1041 ) 1042 } 1043 require.EqualValues(t, expected, manifests) 1044 1045 // Test the logic that uses the future snapshot size to rollover. 1046 // Reminder: we have a snapshot with 8 files and the manifest has 1 edit 1047 // (flush) with 1 file. 1048 // Add 8 more files with a different key. 1049 lastManifestNum = manifestFileNumber() 1050 for j := 0; j < 8; j++ { 1051 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1052 require.NoError(t, d.Flush()) 1053 } 1054 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1055 // Need 16 more files in edits to trigger a rollover. 1056 require.EqualValues(t, 16, lastSnapshotCount) 1057 require.EqualValues(t, 1, editsSinceSnapshotCount) 1058 require.NotEqual(t, manifestFileNumber(), lastManifestNum) 1059 lastManifestNum = manifestFileNumber() 1060 // Do a compaction that moves 8 of the files from L0 to 1 file in L6. This 1061 // adds 9 files in edits. We still need 6 more files in edits based on the 1062 // last snapshot. But the current version has only 9 L0 files and 1 L6 file, 1063 // for a total of 10 files. So 1 flush should push us over that threshold. 1064 d.Compact([]byte("c"), []byte("d"), false) 1065 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1066 require.EqualValues(t, 16, lastSnapshotCount) 1067 require.EqualValues(t, 10, editsSinceSnapshotCount) 1068 require.Equal(t, manifestFileNumber(), lastManifestNum) 1069 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1070 require.NoError(t, d.Flush()) 1071 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1072 require.EqualValues(t, 10, lastSnapshotCount) 1073 require.EqualValues(t, 1, editsSinceSnapshotCount) 1074 require.NotEqual(t, manifestFileNumber(), lastManifestNum) 1075 1076 require.NoError(t, d.Close()) 1077 } 1078 1079 func TestDBClosed(t *testing.T) { 1080 d, err := Open("", &Options{ 1081 FS: vfs.NewMem(), 1082 }) 1083 require.NoError(t, err) 1084 require.NoError(t, d.Close()) 1085 1086 catch := func(f func()) (err error) { 1087 defer func() { 1088 if r := recover(); r != nil { 1089 err = r.(error) 1090 } 1091 }() 1092 f() 1093 return nil 1094 } 1095 1096 require.True(t, errors.Is(catch(func() { _ = d.Close() }), ErrClosed)) 1097 1098 require.True(t, errors.Is(catch(func() { _ = d.Compact(nil, nil, false) }), ErrClosed)) 1099 require.True(t, errors.Is(catch(func() { _ = d.Flush() }), ErrClosed)) 1100 require.True(t, errors.Is(catch(func() { _, _ = d.AsyncFlush() }), ErrClosed)) 1101 1102 require.True(t, errors.Is(catch(func() { _, _, _ = d.Get(nil) }), ErrClosed)) 1103 require.True(t, errors.Is(catch(func() { _ = d.Delete(nil, nil) }), ErrClosed)) 1104 require.True(t, errors.Is(catch(func() { _ = d.DeleteRange(nil, nil, nil) }), ErrClosed)) 1105 require.True(t, errors.Is(catch(func() { _ = d.Ingest(nil) }), ErrClosed)) 1106 require.True(t, errors.Is(catch(func() { _ = d.LogData(nil, nil) }), ErrClosed)) 1107 require.True(t, errors.Is(catch(func() { _ = d.Merge(nil, nil, nil) }), ErrClosed)) 1108 require.True(t, errors.Is(catch(func() { _ = d.RatchetFormatMajorVersion(internalFormatNewest) }), ErrClosed)) 1109 require.True(t, errors.Is(catch(func() { _ = d.Set(nil, nil, nil) }), ErrClosed)) 1110 1111 require.True(t, errors.Is(catch(func() { _ = d.NewSnapshot() }), ErrClosed)) 1112 1113 b := d.NewIndexedBatch() 1114 require.True(t, errors.Is(catch(func() { _ = b.Commit(nil) }), ErrClosed)) 1115 require.True(t, errors.Is(catch(func() { _ = d.Apply(b, nil) }), ErrClosed)) 1116 require.True(t, errors.Is(catch(func() { _, _ = b.NewIter(nil) }), ErrClosed)) 1117 } 1118 1119 func TestDBConcurrentCommitCompactFlush(t *testing.T) { 1120 d, err := Open("", testingRandomized(t, &Options{ 1121 FS: vfs.NewMem(), 1122 })) 1123 require.NoError(t, err) 1124 1125 // Concurrently commit, compact, and flush in order to stress the locking around 1126 // those operations. 1127 const n = 1000 1128 var wg sync.WaitGroup 1129 wg.Add(n) 1130 for i := 0; i < n; i++ { 1131 go func(i int) { 1132 defer wg.Done() 1133 _ = d.Set([]byte(fmt.Sprint(i)), nil, nil) 1134 var err error 1135 switch i % 3 { 1136 case 0: 1137 err = d.Compact(nil, []byte("\xff"), false) 1138 case 1: 1139 err = d.Flush() 1140 case 2: 1141 _, err = d.AsyncFlush() 1142 } 1143 require.NoError(t, err) 1144 }(i) 1145 } 1146 wg.Wait() 1147 1148 require.NoError(t, d.Close()) 1149 } 1150 1151 func TestDBConcurrentCompactClose(t *testing.T) { 1152 // Test closing while a compaction is ongoing. This ensures compaction code 1153 // detects the close and finishes cleanly. 1154 mem := vfs.NewMem() 1155 for i := 0; i < 100; i++ { 1156 opts := &Options{ 1157 FS: mem, 1158 MaxConcurrentCompactions: func() int { 1159 return 2 1160 }, 1161 } 1162 d, err := Open("", testingRandomized(t, opts)) 1163 require.NoError(t, err) 1164 1165 // Ingest a series of files containing a single key each. As the outer 1166 // loop progresses, these ingestions will build up compaction debt 1167 // causing compactions to be running concurrently with the close below. 1168 for j := 0; j < 10; j++ { 1169 path := fmt.Sprintf("ext%d", j) 1170 f, err := mem.Create(path) 1171 require.NoError(t, err) 1172 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1173 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1174 }) 1175 require.NoError(t, w.Set([]byte(fmt.Sprint(j)), nil)) 1176 require.NoError(t, w.Close()) 1177 require.NoError(t, d.Ingest([]string{path})) 1178 } 1179 1180 require.NoError(t, d.Close()) 1181 } 1182 } 1183 1184 func TestDBApplyBatchNilDB(t *testing.T) { 1185 d, err := Open("", &Options{FS: vfs.NewMem()}) 1186 require.NoError(t, err) 1187 1188 b1 := &Batch{} 1189 b1.Set([]byte("test"), nil, nil) 1190 1191 b2 := &Batch{} 1192 b2.Apply(b1, nil) 1193 if b2.memTableSize != 0 { 1194 t.Fatalf("expected memTableSize to not be set") 1195 } 1196 require.NoError(t, d.Apply(b2, nil)) 1197 if b1.memTableSize != b2.memTableSize { 1198 t.Fatalf("expected memTableSize %d, but found %d", b1.memTableSize, b2.memTableSize) 1199 } 1200 1201 require.NoError(t, d.Close()) 1202 } 1203 1204 func TestDBApplyBatchMismatch(t *testing.T) { 1205 srcDB, err := Open("", &Options{FS: vfs.NewMem()}) 1206 require.NoError(t, err) 1207 1208 applyDB, err := Open("", &Options{FS: vfs.NewMem()}) 1209 require.NoError(t, err) 1210 1211 err = func() (err error) { 1212 defer func() { 1213 if v := recover(); v != nil { 1214 err = errors.Errorf("%v", v) 1215 } 1216 }() 1217 1218 b := srcDB.NewBatch() 1219 b.Set([]byte("test"), nil, nil) 1220 return applyDB.Apply(b, nil) 1221 }() 1222 if err == nil || !strings.Contains(err.Error(), "pebble: batch db mismatch:") { 1223 t.Fatalf("expected error, but found %v", err) 1224 } 1225 1226 require.NoError(t, srcDB.Close()) 1227 require.NoError(t, applyDB.Close()) 1228 } 1229 1230 func TestCloseCleanerRace(t *testing.T) { 1231 mem := vfs.NewMem() 1232 for i := 0; i < 20; i++ { 1233 db, err := Open("", testingRandomized(t, &Options{FS: mem})) 1234 require.NoError(t, err) 1235 require.NoError(t, db.Set([]byte("a"), []byte("something"), Sync)) 1236 require.NoError(t, db.Flush()) 1237 // Ref the sstables so cannot be deleted. 1238 it, _ := db.NewIter(nil) 1239 require.NotNil(t, it) 1240 require.NoError(t, db.DeleteRange([]byte("a"), []byte("b"), Sync)) 1241 require.NoError(t, db.Compact([]byte("a"), []byte("b"), false)) 1242 // Only the iterator is keeping the sstables alive. 1243 files, err := mem.List("/") 1244 require.NoError(t, err) 1245 var found bool 1246 for _, f := range files { 1247 if strings.HasSuffix(f, ".sst") { 1248 found = true 1249 break 1250 } 1251 } 1252 require.True(t, found) 1253 // Close the iterator and the db in succession so file cleaning races with DB.Close() -- 1254 // latter should wait for file cleaning to finish. 1255 require.NoError(t, it.Close()) 1256 require.NoError(t, db.Close()) 1257 files, err = mem.List("/") 1258 require.NoError(t, err) 1259 for _, f := range files { 1260 if strings.HasSuffix(f, ".sst") { 1261 t.Fatalf("found sst: %s", f) 1262 } 1263 } 1264 } 1265 } 1266 1267 func TestSSTablesWithApproximateSpanBytes(t *testing.T) { 1268 d, err := Open("", &Options{ 1269 FS: vfs.NewMem(), 1270 }) 1271 require.NoError(t, err) 1272 defer func() { 1273 if d != nil { 1274 require.NoError(t, d.Close()) 1275 } 1276 }() 1277 1278 // Create two sstables. 1279 // sstable is contained within keyspan (fileNum = 5). 1280 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1281 require.NoError(t, d.Set([]byte("d"), nil, nil)) 1282 require.NoError(t, d.Flush()) 1283 1284 // sstable partially overlaps keyspan (fileNum = 7). 1285 require.NoError(t, d.Set([]byte("d"), nil, nil)) 1286 require.NoError(t, d.Set([]byte("g"), nil, nil)) 1287 require.NoError(t, d.Flush()) 1288 1289 // cannot use WithApproximateSpanBytes without WithProperties. 1290 _, err = d.SSTables(WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) 1291 require.Error(t, err) 1292 1293 // cannot use WithApproximateSpanBytes without WithKeyRangeFilter. 1294 _, err = d.SSTables(WithProperties(), WithApproximateSpanBytes()) 1295 require.Error(t, err) 1296 1297 tableInfos, err := d.SSTables(WithProperties(), WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) 1298 require.NoError(t, err) 1299 1300 for _, levelTables := range tableInfos { 1301 for _, table := range levelTables { 1302 approximateSpanBytes, err := strconv.ParseInt(table.Properties.UserProperties["approximate-span-bytes"], 10, 64) 1303 require.NoError(t, err) 1304 if table.FileNum == 5 { 1305 require.Equal(t, uint64(approximateSpanBytes), table.Size) 1306 } 1307 if table.FileNum == 7 { 1308 require.Less(t, uint64(approximateSpanBytes), table.Size) 1309 } 1310 } 1311 } 1312 } 1313 1314 func TestFilterSSTablesWithOption(t *testing.T) { 1315 d, err := Open("", &Options{ 1316 FS: vfs.NewMem(), 1317 }) 1318 require.NoError(t, err) 1319 defer func() { 1320 if d != nil { 1321 require.NoError(t, d.Close()) 1322 } 1323 }() 1324 1325 // Create two sstables. 1326 require.NoError(t, d.Set([]byte("/Table/5"), nil, nil)) 1327 require.NoError(t, d.Flush()) 1328 require.NoError(t, d.Set([]byte("/Table/10"), nil, nil)) 1329 require.NoError(t, d.Flush()) 1330 1331 tableInfos, err := d.SSTables(WithKeyRangeFilter([]byte("/Table/5"), []byte("/Table/6"))) 1332 require.NoError(t, err) 1333 1334 totalTables := 0 1335 for _, levelTables := range tableInfos { 1336 totalTables += len(levelTables) 1337 } 1338 1339 // with filter second sstable should not be returned 1340 require.EqualValues(t, 1, totalTables) 1341 1342 tableInfos, err = d.SSTables() 1343 require.NoError(t, err) 1344 1345 totalTables = 0 1346 for _, levelTables := range tableInfos { 1347 totalTables += len(levelTables) 1348 } 1349 1350 // without filter 1351 require.EqualValues(t, 2, totalTables) 1352 } 1353 1354 func TestSSTables(t *testing.T) { 1355 d, err := Open("", &Options{ 1356 FS: vfs.NewMem(), 1357 }) 1358 require.NoError(t, err) 1359 defer func() { 1360 if d != nil { 1361 require.NoError(t, d.Close()) 1362 } 1363 }() 1364 1365 // Create two sstables. 1366 require.NoError(t, d.Set([]byte("hello"), nil, nil)) 1367 require.NoError(t, d.Flush()) 1368 require.NoError(t, d.Set([]byte("world"), nil, nil)) 1369 require.NoError(t, d.Flush()) 1370 1371 // by default returned table infos should not contain Properties 1372 tableInfos, err := d.SSTables() 1373 require.NoError(t, err) 1374 for _, levelTables := range tableInfos { 1375 for _, info := range levelTables { 1376 require.Nil(t, info.Properties) 1377 } 1378 } 1379 1380 // with opt `WithProperties()` the `Properties` in table info should not be nil 1381 tableInfos, err = d.SSTables(WithProperties()) 1382 require.NoError(t, err) 1383 for _, levelTables := range tableInfos { 1384 for _, info := range levelTables { 1385 require.NotNil(t, info.Properties) 1386 } 1387 } 1388 } 1389 1390 type testTracer struct { 1391 enabledOnlyForNonBackgroundContext bool 1392 buf strings.Builder 1393 } 1394 1395 func (t *testTracer) Infof(format string, args ...interface{}) {} 1396 func (t *testTracer) Errorf(format string, args ...interface{}) {} 1397 func (t *testTracer) Fatalf(format string, args ...interface{}) {} 1398 1399 func (t *testTracer) Eventf(ctx context.Context, format string, args ...interface{}) { 1400 if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { 1401 return 1402 } 1403 fmt.Fprintf(&t.buf, format, args...) 1404 fmt.Fprint(&t.buf, "\n") 1405 } 1406 1407 func (t *testTracer) IsTracingEnabled(ctx context.Context) bool { 1408 if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { 1409 return false 1410 } 1411 return true 1412 } 1413 1414 func TestTracing(t *testing.T) { 1415 if !invariants.Enabled { 1416 // The test relies on timing behavior injected when invariants.Enabled. 1417 return 1418 } 1419 var tracer testTracer 1420 c := NewCache(0) 1421 defer c.Unref() 1422 d, err := Open("", &Options{ 1423 FS: vfs.NewMem(), 1424 Cache: c, 1425 LoggerAndTracer: &tracer, 1426 }) 1427 require.NoError(t, err) 1428 defer func() { 1429 require.NoError(t, d.Close()) 1430 }() 1431 1432 // Create a sstable. 1433 require.NoError(t, d.Set([]byte("hello"), nil, nil)) 1434 require.NoError(t, d.Flush()) 1435 _, closer, err := d.Get([]byte("hello")) 1436 require.NoError(t, err) 1437 closer.Close() 1438 readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n" 1439 iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n" 1440 require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String()) 1441 1442 // Get again, but since it currently uses context.Background(), no trace 1443 // output is produced. 1444 tracer.buf.Reset() 1445 tracer.enabledOnlyForNonBackgroundContext = true 1446 _, closer, err = d.Get([]byte("hello")) 1447 require.NoError(t, err) 1448 closer.Close() 1449 require.Equal(t, "", tracer.buf.String()) 1450 1451 ctx, cancel := context.WithCancel(context.Background()) 1452 defer cancel() 1453 iter, _ := d.NewIterWithContext(ctx, nil) 1454 iter.SeekGE([]byte("hello")) 1455 iter.Close() 1456 require.Equal(t, iterTraceString, tracer.buf.String()) 1457 1458 tracer.buf.Reset() 1459 snap := d.NewSnapshot() 1460 iter, _ = snap.NewIterWithContext(ctx, nil) 1461 iter.SeekGE([]byte("hello")) 1462 iter.Close() 1463 require.Equal(t, iterTraceString, tracer.buf.String()) 1464 snap.Close() 1465 1466 tracer.buf.Reset() 1467 b := d.NewIndexedBatch() 1468 iter, err = b.NewIterWithContext(ctx, nil) 1469 require.NoError(t, err) 1470 iter.SeekGE([]byte("hello")) 1471 iter.Close() 1472 require.Equal(t, iterTraceString, tracer.buf.String()) 1473 b.Close() 1474 } 1475 1476 func TestMemtableIngestInversion(t *testing.T) { 1477 memFS := vfs.NewMem() 1478 opts := &Options{ 1479 FS: memFS, 1480 MemTableSize: 256 << 10, // 4KB 1481 MemTableStopWritesThreshold: 1000, 1482 L0StopWritesThreshold: 1000, 1483 L0CompactionThreshold: 2, 1484 MaxConcurrentCompactions: func() int { 1485 return 1000 1486 }, 1487 } 1488 1489 const channelTimeout = 5 * time.Second 1490 1491 // We induce delay in compactions by passing in an EventListener that stalls on 1492 // the first TableCreated event for a compaction job we want to block. 1493 // FlushBegin and CompactionBegin has info on compaction start/output levels 1494 // which is what we need to identify what compactions to block. However 1495 // FlushBegin and CompactionBegin are called while holding db.mu, so we cannot 1496 // block those events forever. Instead, we grab the job ID from those events 1497 // and store it. Then during TableCreated, we check if we're creating an output 1498 // for a job we have identified earlier as one to block, and then hold on a 1499 // semaphore there until there's a signal from the test code to resume with the 1500 // compaction. 1501 // 1502 // If nextBlockedCompaction is non-zero, we must block the next compaction 1503 // out of the nextBlockedCompaction - 3 start level. 1 means block the next 1504 // intra-L0 compaction and 2 means block the next flush (as flushes have 1505 // a -1 start level). 1506 var nextBlockedCompaction, blockedJobID int 1507 var blockedCompactionsMu sync.Mutex // protects the above two variables. 1508 nextSem := make(chan chan struct{}, 1) 1509 var el EventListener 1510 el.EnsureDefaults(testLogger{t: t}) 1511 el.FlushBegin = func(info FlushInfo) { 1512 blockedCompactionsMu.Lock() 1513 defer blockedCompactionsMu.Unlock() 1514 if nextBlockedCompaction == 2 { 1515 nextBlockedCompaction = 0 1516 blockedJobID = info.JobID 1517 } 1518 } 1519 el.CompactionBegin = func(info CompactionInfo) { 1520 // 0 = block nothing, 1 = block intra-L0 compaction, 2 = block flush, 1521 // 3 = block L0 -> LBase compaction, 4 = block compaction out of L1, and so on. 1522 blockedCompactionsMu.Lock() 1523 defer blockedCompactionsMu.Unlock() 1524 blockValue := info.Input[0].Level + 3 1525 if info.Input[0].Level == 0 && info.Output.Level == 0 { 1526 // Intra L0 compaction, denoted by casValue of 1. 1527 blockValue = 1 1528 } 1529 if nextBlockedCompaction == blockValue { 1530 nextBlockedCompaction = 0 1531 blockedJobID = info.JobID 1532 } 1533 } 1534 el.TableCreated = func(info TableCreateInfo) { 1535 blockedCompactionsMu.Lock() 1536 if info.JobID != blockedJobID { 1537 blockedCompactionsMu.Unlock() 1538 return 1539 } 1540 blockedJobID = 0 1541 blockedCompactionsMu.Unlock() 1542 sem := make(chan struct{}) 1543 nextSem <- sem 1544 <-sem 1545 } 1546 tel := TeeEventListener(MakeLoggingEventListener(testLogger{t: t}), el) 1547 opts.EventListener = &tel 1548 opts.Experimental.L0CompactionConcurrency = 1 1549 d, err := Open("", opts) 1550 require.NoError(t, err) 1551 defer func() { 1552 if d != nil { 1553 require.NoError(t, d.Close()) 1554 } 1555 }() 1556 1557 printLSM := func() { 1558 d.mu.Lock() 1559 s := d.mu.versions.currentVersion().String() 1560 d.mu.Unlock() 1561 t.Logf("%s", s) 1562 } 1563 1564 // Create some sstables. These should go into L6. These are irrelevant for 1565 // the rest of the test. 1566 require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) 1567 require.NoError(t, d.Flush()) 1568 require.NoError(t, d.Set([]byte("d"), []byte("bar"), nil)) 1569 require.NoError(t, d.Flush()) 1570 require.NoError(t, d.Compact([]byte("a"), []byte("z"), true)) 1571 1572 var baseCompactionSem, flushSem, intraL0Sem chan struct{} 1573 // Block an L0 -> LBase compaction. This is necessary to induce intra-L0 1574 // compactions later on. 1575 blockedCompactionsMu.Lock() 1576 nextBlockedCompaction = 3 1577 blockedCompactionsMu.Unlock() 1578 timeoutSem := time.After(channelTimeout) 1579 t.Log("blocking an L0 -> LBase compaction") 1580 // Write an sstable to L0 until we're blocked on an L0 -> LBase compaction. 1581 breakLoop := false 1582 for !breakLoop { 1583 select { 1584 case sem := <-nextSem: 1585 baseCompactionSem = sem 1586 breakLoop = true 1587 case <-timeoutSem: 1588 t.Fatal("did not get blocked on an LBase compaction") 1589 default: 1590 require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) 1591 require.NoError(t, d.Set([]byte("g"), []byte("bar"), nil)) 1592 require.NoError(t, d.Flush()) 1593 time.Sleep(100 * time.Millisecond) 1594 } 1595 } 1596 printLSM() 1597 1598 // Do 4 ingests, one with the key cc, one with bb and cc, and two with just bb. 1599 // The purpose of the sstable containing cc is to inflate the L0 sublevel 1600 // count of the interval at cc, as that's where we want the intra-L0 compaction 1601 // to be seeded. However we also need a file left of that interval to have 1602 // the same (or higher) sublevel to trigger the bug in 1603 // cockroachdb/cockroach#101896. That's why we ingest a file after it to 1604 // "bridge" the bb/cc intervals, and then ingest a file at bb. These go 1605 // into sublevels like this: 1606 // 1607 // bb 1608 // bb 1609 // bb-----cc 1610 // cc 1611 // 1612 // Eventually, we'll drop an ingested file containing a range del starting at 1613 // cc around here: 1614 // 1615 // bb 1616 // bb cc---... 1617 // bb-----cc 1618 // cc 1619 { 1620 path := "ingest1.sst" 1621 f, err := memFS.Create(path) 1622 require.NoError(t, err) 1623 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1624 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1625 }) 1626 require.NoError(t, w.Set([]byte("cc"), []byte("foo"))) 1627 require.NoError(t, w.Close()) 1628 require.NoError(t, d.Ingest([]string{path})) 1629 } 1630 { 1631 path := "ingest2.sst" 1632 f, err := memFS.Create(path) 1633 require.NoError(t, err) 1634 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1635 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1636 }) 1637 require.NoError(t, w.Set([]byte("bb"), []byte("foo2"))) 1638 require.NoError(t, w.Set([]byte("cc"), []byte("foo2"))) 1639 require.NoError(t, w.Close()) 1640 require.NoError(t, d.Ingest([]string{path})) 1641 } 1642 { 1643 path := "ingest3.sst" 1644 f, err := memFS.Create(path) 1645 require.NoError(t, err) 1646 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1647 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1648 }) 1649 require.NoError(t, w.Set([]byte("bb"), []byte("foo3"))) 1650 require.NoError(t, w.Close()) 1651 require.NoError(t, d.Ingest([]string{path})) 1652 } 1653 { 1654 path := "ingest4.sst" 1655 f, err := memFS.Create(path) 1656 require.NoError(t, err) 1657 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1658 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1659 }) 1660 require.NoError(t, w.Set([]byte("bb"), []byte("foo4"))) 1661 require.NoError(t, w.Close()) 1662 require.NoError(t, d.Ingest([]string{path})) 1663 } 1664 1665 // We now have a base compaction blocked. Block a memtable flush to cause 1666 // memtables to queue up. 1667 // 1668 // Memtable (stuck): 1669 // 1670 // b-----------------g 1671 // 1672 // Relevant L0 ssstables 1673 // 1674 // bb 1675 // bb 1676 // bb-----cc 1677 // cc 1678 blockedCompactionsMu.Lock() 1679 nextBlockedCompaction = 2 1680 blockedCompactionsMu.Unlock() 1681 t.Log("blocking a flush") 1682 require.NoError(t, d.Set([]byte("b"), []byte("foo2"), nil)) 1683 require.NoError(t, d.Set([]byte("g"), []byte("bar2"), nil)) 1684 _, _ = d.AsyncFlush() 1685 select { 1686 case sem := <-nextSem: 1687 flushSem = sem 1688 case <-time.After(channelTimeout): 1689 t.Fatal("did not get blocked on a flush") 1690 } 1691 // Add one memtable to flush queue, and finish it off. 1692 // 1693 // Memtables (stuck): 1694 // 1695 // b-----------------g (waiting to flush) 1696 // b-----------------g (flushing, blocked) 1697 // 1698 // Relevant L0 ssstables 1699 // 1700 // bb 1701 // bb 1702 // bb-----cc 1703 // cc 1704 require.NoError(t, d.Set([]byte("b"), []byte("foo3"), nil)) 1705 require.NoError(t, d.Set([]byte("g"), []byte("bar3"), nil)) 1706 // note: this flush will wait for the earlier, blocked flush, but it closes 1707 // off the memtable which is what we want. 1708 _, _ = d.AsyncFlush() 1709 1710 // Open a new mutable memtable. This gets us an earlier earlierUnflushedSeqNum 1711 // than the ingest below it. 1712 require.NoError(t, d.Set([]byte("c"), []byte("somethingbigishappening"), nil)) 1713 // Block an intra-L0 compaction, as one might happen around this time. 1714 blockedCompactionsMu.Lock() 1715 nextBlockedCompaction = 1 1716 blockedCompactionsMu.Unlock() 1717 t.Log("blocking an intra-L0 compaction") 1718 // Ingest a file containing a cc-e rangedel. 1719 // 1720 // Memtables: 1721 // 1722 // c (mutable) 1723 // b-----------------g (waiting to flush) 1724 // b-----------------g (flushing, blocked) 1725 // 1726 // Relevant L0 ssstables 1727 // 1728 // bb 1729 // bb cc-----e (just ingested) 1730 // bb-----cc 1731 // cc 1732 { 1733 path := "ingest5.sst" 1734 f, err := memFS.Create(path) 1735 require.NoError(t, err) 1736 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1737 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1738 }) 1739 require.NoError(t, w.DeleteRange([]byte("cc"), []byte("e"))) 1740 require.NoError(t, w.Close()) 1741 require.NoError(t, d.Ingest([]string{path})) 1742 } 1743 t.Log("main ingest complete") 1744 printLSM() 1745 t.Logf("%s", d.Metrics().String()) 1746 1747 require.NoError(t, d.Set([]byte("d"), []byte("ThisShouldNotBeDeleted"), nil)) 1748 1749 // Do another ingest with a seqnum newer than d. The purpose of this is to 1750 // increase the LargestSeqNum of the intra-L0 compaction output *beyond* 1751 // the flush that contains d=ThisShouldNotBeDeleted, therefore causing 1752 // that point key to be deleted (in the buggy code). 1753 // 1754 // Memtables: 1755 // 1756 // c-----d (mutable) 1757 // b-----------------g (waiting to flush) 1758 // b-----------------g (flushing, blocked) 1759 // 1760 // Relevant L0 ssstables 1761 // 1762 // bb cc 1763 // bb cc-----e (just ingested) 1764 // bb-----cc 1765 // cc 1766 { 1767 path := "ingest6.sst" 1768 f, err := memFS.Create(path) 1769 require.NoError(t, err) 1770 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1771 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1772 }) 1773 require.NoError(t, w.Set([]byte("cc"), []byte("doesntmatter"))) 1774 require.NoError(t, w.Close()) 1775 require.NoError(t, d.Ingest([]string{path})) 1776 } 1777 1778 // Unblock earlier flushes. We will first finish flushing the blocked 1779 // memtable, and end up in this state: 1780 // 1781 // Memtables: 1782 // 1783 // c-----d (mutable) 1784 // b-----------------g (waiting to flush) 1785 // 1786 // Relevant L0 ssstables 1787 // 1788 // b-------------------g (irrelevant, just flushed) 1789 // bb cc (has LargestSeqNum > earliestUnflushedSeqNum) 1790 // bb cc-----e (has a rangedel) 1791 // bb-----cc 1792 // cc 1793 // 1794 // Note that while b----g is relatively old (and so has a low LargestSeqNum), 1795 // it bridges a bunch of intervals. Had we regenerated sublevels from scratch, 1796 // it'd have gone below the cc-e sstable. But due to #101896, we just slapped 1797 // it on top. Now, as long as our seed interval is the one at cc and our seed 1798 // file is the just-flushed L0 sstable, we will go down and include anything 1799 // in that interval even if it has a LargestSeqNum > earliestUnflushedSeqNum. 1800 // 1801 // All asterisked L0 sstables should now get picked in an intra-L0 compaction 1802 // right after the flush finishes, that we then block: 1803 // 1804 // b-------------------g* 1805 // bb* cc* 1806 // bb* cc-----e* 1807 // bb-----cc* 1808 // cc* 1809 t.Log("unblocking flush") 1810 flushSem <- struct{}{} 1811 printLSM() 1812 1813 select { 1814 case sem := <-nextSem: 1815 intraL0Sem = sem 1816 case <-time.After(channelTimeout): 1817 t.Fatal("did not get blocked on an intra L0 compaction") 1818 } 1819 1820 // Ensure all memtables are flushed. This will mean d=ThisShouldNotBeDeleted 1821 // will land in L0 and since that was the last key written to a memtable, 1822 // and the ingestion at cc came after it, the output of the intra-L0 1823 // compaction will elevate the cc-e rangedel above it and delete it 1824 // (if #101896 is not fixed). 1825 ch, _ := d.AsyncFlush() 1826 <-ch 1827 1828 // Unblock earlier intra-L0 compaction. 1829 t.Log("unblocking intraL0") 1830 intraL0Sem <- struct{}{} 1831 printLSM() 1832 1833 // Try reading d a couple times. 1834 for i := 0; i < 2; i++ { 1835 val, closer, err := d.Get([]byte("d")) 1836 require.NoError(t, err) 1837 require.Equal(t, []byte("ThisShouldNotBeDeleted"), val) 1838 if closer != nil { 1839 closer.Close() 1840 } 1841 time.Sleep(100 * time.Millisecond) 1842 } 1843 1844 // Unblock everything. 1845 baseCompactionSem <- struct{}{} 1846 } 1847 1848 func BenchmarkDelete(b *testing.B) { 1849 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1850 const keyCount = 10000 1851 var keys [keyCount][]byte 1852 for i := 0; i < keyCount; i++ { 1853 keys[i] = []byte(strconv.Itoa(rng.Int())) 1854 } 1855 val := bytes.Repeat([]byte("x"), 10) 1856 1857 benchmark := func(b *testing.B, useSingleDelete bool) { 1858 d, err := Open( 1859 "", 1860 &Options{ 1861 FS: vfs.NewMem(), 1862 }) 1863 if err != nil { 1864 b.Fatal(err) 1865 } 1866 defer func() { 1867 if err := d.Close(); err != nil { 1868 b.Fatal(err) 1869 } 1870 }() 1871 1872 b.StartTimer() 1873 for _, key := range keys { 1874 _ = d.Set(key, val, nil) 1875 if useSingleDelete { 1876 _ = d.SingleDelete(key, nil) 1877 } else { 1878 _ = d.Delete(key, nil) 1879 } 1880 } 1881 // Manually flush as it is flushing/compaction where SingleDelete 1882 // performance shows up. With SingleDelete, we can elide all of the 1883 // SingleDelete and Set records. 1884 if err := d.Flush(); err != nil { 1885 b.Fatal(err) 1886 } 1887 b.StopTimer() 1888 } 1889 1890 b.Run("delete", func(b *testing.B) { 1891 for i := 0; i < b.N; i++ { 1892 benchmark(b, false) 1893 } 1894 }) 1895 1896 b.Run("single-delete", func(b *testing.B) { 1897 for i := 0; i < b.N; i++ { 1898 benchmark(b, true) 1899 } 1900 }) 1901 } 1902 1903 func BenchmarkNewIterReadAmp(b *testing.B) { 1904 for _, readAmp := range []int{10, 100, 1000} { 1905 b.Run(strconv.Itoa(readAmp), func(b *testing.B) { 1906 opts := &Options{ 1907 FS: vfs.NewMem(), 1908 L0StopWritesThreshold: 1000, 1909 } 1910 opts.DisableAutomaticCompactions = true 1911 1912 d, err := Open("", opts) 1913 require.NoError(b, err) 1914 1915 for i := 0; i < readAmp; i++ { 1916 require.NoError(b, d.Set([]byte("a"), []byte("b"), NoSync)) 1917 require.NoError(b, d.Flush()) 1918 } 1919 1920 require.Equal(b, d.Metrics().ReadAmp(), readAmp) 1921 1922 b.StopTimer() 1923 b.ResetTimer() 1924 for i := 0; i < b.N; i++ { 1925 b.StartTimer() 1926 iter, _ := d.NewIter(nil) 1927 b.StopTimer() 1928 require.NoError(b, iter.Close()) 1929 } 1930 1931 require.NoError(b, d.Close()) 1932 }) 1933 } 1934 } 1935 1936 func verifyGet(t *testing.T, r Reader, key, expected []byte) { 1937 val, closer, err := r.Get(key) 1938 require.NoError(t, err) 1939 if !bytes.Equal(expected, val) { 1940 t.Fatalf("expected %s, but got %s", expected, val) 1941 } 1942 closer.Close() 1943 } 1944 1945 func verifyGetNotFound(t *testing.T, r Reader, key []byte) { 1946 val, _, err := r.Get(key) 1947 if err != base.ErrNotFound { 1948 t.Fatalf("expected nil, but got %s", val) 1949 } 1950 } 1951 1952 func BenchmarkRotateMemtables(b *testing.B) { 1953 o := &Options{FS: vfs.NewMem(), MemTableSize: 64 << 20 /* 64 MB */} 1954 d, err := Open("", o) 1955 require.NoError(b, err) 1956 1957 // We want to jump to full-sized memtables. 1958 d.mu.Lock() 1959 d.mu.mem.nextSize = o.MemTableSize 1960 d.mu.Unlock() 1961 require.NoError(b, d.Flush()) 1962 1963 b.ResetTimer() 1964 for i := 0; i < b.N; i++ { 1965 if err := d.Flush(); err != nil { 1966 b.Fatal(err) 1967 } 1968 } 1969 }