github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/db_test.go (about) 1 // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "path/filepath" 13 "sort" 14 "strconv" 15 "strings" 16 "sync" 17 "testing" 18 "time" 19 20 "github.com/cockroachdb/errors" 21 "github.com/cockroachdb/pebble/internal/base" 22 "github.com/cockroachdb/pebble/internal/cache" 23 "github.com/cockroachdb/pebble/internal/invariants" 24 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 25 "github.com/cockroachdb/pebble/sstable" 26 "github.com/cockroachdb/pebble/vfs" 27 "github.com/stretchr/testify/require" 28 "golang.org/x/exp/rand" 29 ) 30 31 // try repeatedly calls f, sleeping between calls with exponential back-off, 32 // until f returns a nil error or the total sleep time is greater than or equal 33 // to maxTotalSleep. It always calls f at least once. 34 func try(initialSleep, maxTotalSleep time.Duration, f func() error) error { 35 totalSleep := time.Duration(0) 36 for d := initialSleep; ; d *= 2 { 37 time.Sleep(d) 38 totalSleep += d 39 if err := f(); err == nil || totalSleep >= maxTotalSleep { 40 return err 41 } 42 } 43 } 44 45 func TestTry(t *testing.T) { 46 c := make(chan struct{}) 47 go func() { 48 time.Sleep(1 * time.Millisecond) 49 close(c) 50 }() 51 52 attemptsMu := sync.Mutex{} 53 attempts := 0 54 55 err := try(100*time.Microsecond, 20*time.Second, func() error { 56 attemptsMu.Lock() 57 attempts++ 58 attemptsMu.Unlock() 59 60 select { 61 default: 62 return errors.New("timed out") 63 case <-c: 64 return nil 65 } 66 }) 67 require.NoError(t, err) 68 69 attemptsMu.Lock() 70 a := attempts 71 attemptsMu.Unlock() 72 73 if a == 0 { 74 t.Fatalf("attempts: got 0, want > 0") 75 } 76 } 77 78 func TestBasicReads(t *testing.T) { 79 testCases := []struct { 80 dirname string 81 wantMap map[string]string 82 }{ 83 { 84 "db-stage-1", 85 map[string]string{ 86 "aaa": "", 87 "bar": "", 88 "baz": "", 89 "foo": "", 90 "quux": "", 91 "zzz": "", 92 }, 93 }, 94 { 95 "db-stage-2", 96 map[string]string{ 97 "aaa": "", 98 "bar": "", 99 "baz": "three", 100 "foo": "four", 101 "quux": "", 102 "zzz": "", 103 }, 104 }, 105 { 106 "db-stage-3", 107 map[string]string{ 108 "aaa": "", 109 "bar": "", 110 "baz": "three", 111 "foo": "four", 112 "quux": "", 113 "zzz": "", 114 }, 115 }, 116 { 117 "db-stage-4", 118 map[string]string{ 119 "aaa": "", 120 "bar": "", 121 "baz": "", 122 "foo": "five", 123 "quux": "six", 124 "zzz": "", 125 }, 126 }, 127 } 128 for _, tc := range testCases { 129 t.Run(tc.dirname, func(t *testing.T) { 130 fs := vfs.NewMem() 131 _, err := vfs.Clone(vfs.Default, fs, filepath.Join("testdata", tc.dirname), tc.dirname) 132 if err != nil { 133 t.Fatalf("%s: cloneFileSystem failed: %v", tc.dirname, err) 134 } 135 d, err := Open(tc.dirname, testingRandomized(t, &Options{ 136 FS: fs, 137 })) 138 if err != nil { 139 t.Fatalf("%s: Open failed: %v", tc.dirname, err) 140 } 141 for key, want := range tc.wantMap { 142 got, closer, err := d.Get([]byte(key)) 143 if err != nil && err != ErrNotFound { 144 t.Fatalf("%s: Get(%q) failed: %v", tc.dirname, key, err) 145 } 146 if string(got) != string(want) { 147 t.Fatalf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want) 148 } 149 if closer != nil { 150 closer.Close() 151 } 152 } 153 err = d.Close() 154 if err != nil { 155 t.Fatalf("%s: Close failed: %v", tc.dirname, err) 156 } 157 }) 158 } 159 } 160 161 func TestBasicWrites(t *testing.T) { 162 d, err := Open("", testingRandomized(t, &Options{ 163 FS: vfs.NewMem(), 164 })) 165 require.NoError(t, err) 166 167 names := []string{ 168 "Alatar", 169 "Gandalf", 170 "Pallando", 171 "Radagast", 172 "Saruman", 173 "Joe", 174 } 175 wantMap := map[string]string{} 176 177 inBatch, batch, pending := false, &Batch{}, [][]string(nil) 178 set0 := func(k, v string) error { 179 return d.Set([]byte(k), []byte(v), nil) 180 } 181 del0 := func(k string) error { 182 return d.Delete([]byte(k), nil) 183 } 184 set1 := func(k, v string) error { 185 batch.Set([]byte(k), []byte(v), nil) 186 return nil 187 } 188 del1 := func(k string) error { 189 batch.Delete([]byte(k), nil) 190 return nil 191 } 192 set, del := set0, del0 193 194 testCases := []string{ 195 "set Gandalf Grey", 196 "set Saruman White", 197 "set Radagast Brown", 198 "delete Saruman", 199 "set Gandalf White", 200 "batch", 201 " set Alatar AliceBlue", 202 "apply", 203 "delete Pallando", 204 "set Alatar AntiqueWhite", 205 "set Pallando PapayaWhip", 206 "batch", 207 "apply", 208 "set Pallando PaleVioletRed", 209 "batch", 210 " delete Alatar", 211 " set Gandalf GhostWhite", 212 " set Saruman Seashell", 213 " delete Saruman", 214 " set Saruman SeaGreen", 215 " set Radagast RosyBrown", 216 " delete Pallando", 217 "apply", 218 "delete Radagast", 219 "delete Radagast", 220 "delete Radagast", 221 "set Gandalf Goldenrod", 222 "set Pallando PeachPuff", 223 "batch", 224 " delete Joe", 225 " delete Saruman", 226 " delete Radagast", 227 " delete Pallando", 228 " delete Gandalf", 229 " delete Alatar", 230 "apply", 231 "set Joe Plumber", 232 } 233 for i, tc := range testCases { 234 s := strings.Split(strings.TrimSpace(tc), " ") 235 switch s[0] { 236 case "set": 237 if err := set(s[1], s[2]); err != nil { 238 t.Fatalf("#%d %s: %v", i, tc, err) 239 } 240 if inBatch { 241 pending = append(pending, s) 242 } else { 243 wantMap[s[1]] = s[2] 244 } 245 case "delete": 246 if err := del(s[1]); err != nil { 247 t.Fatalf("#%d %s: %v", i, tc, err) 248 } 249 if inBatch { 250 pending = append(pending, s) 251 } else { 252 delete(wantMap, s[1]) 253 } 254 case "batch": 255 inBatch, batch, set, del = true, &Batch{}, set1, del1 256 case "apply": 257 if err := d.Apply(batch, nil); err != nil { 258 t.Fatalf("#%d %s: %v", i, tc, err) 259 } 260 for _, p := range pending { 261 switch p[0] { 262 case "set": 263 wantMap[p[1]] = p[2] 264 case "delete": 265 delete(wantMap, p[1]) 266 } 267 } 268 inBatch, pending, set, del = false, nil, set0, del0 269 default: 270 t.Fatalf("#%d %s: bad test case: %q", i, tc, s) 271 } 272 273 fail := false 274 for _, name := range names { 275 g, closer, err := d.Get([]byte(name)) 276 if err != nil && err != ErrNotFound { 277 t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err) 278 fail = true 279 } 280 got, gOK := string(g), err == nil 281 want, wOK := wantMap[name] 282 if got != want || gOK != wOK { 283 t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t", 284 i, tc, name, got, gOK, want, wOK) 285 fail = true 286 } 287 if closer != nil { 288 closer.Close() 289 } 290 } 291 if fail { 292 return 293 } 294 } 295 296 require.NoError(t, d.Close()) 297 } 298 299 func TestRandomWrites(t *testing.T) { 300 d, err := Open("", testingRandomized(t, &Options{ 301 FS: vfs.NewMem(), 302 MemTableSize: 8 * 1024, 303 })) 304 require.NoError(t, err) 305 306 keys := [64][]byte{} 307 wants := [64]int{} 308 for k := range keys { 309 keys[k] = []byte(strconv.Itoa(k)) 310 wants[k] = -1 311 } 312 xxx := bytes.Repeat([]byte("x"), 512) 313 314 rng := rand.New(rand.NewSource(123)) 315 const N = 1000 316 for i := 0; i < N; i++ { 317 k := rng.Intn(len(keys)) 318 if rng.Intn(20) != 0 { 319 wants[k] = rng.Intn(len(xxx) + 1) 320 if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil { 321 t.Fatalf("i=%d: Set: %v", i, err) 322 } 323 } else { 324 wants[k] = -1 325 if err := d.Delete(keys[k], nil); err != nil { 326 t.Fatalf("i=%d: Delete: %v", i, err) 327 } 328 } 329 330 if i != N-1 || rng.Intn(50) != 0 { 331 continue 332 } 333 for k := range keys { 334 got := -1 335 if v, closer, err := d.Get(keys[k]); err != nil { 336 if err != ErrNotFound { 337 t.Fatalf("Get: %v", err) 338 } 339 } else { 340 got = len(v) 341 closer.Close() 342 } 343 if got != wants[k] { 344 t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k]) 345 } 346 } 347 } 348 349 require.NoError(t, d.Close()) 350 } 351 352 func TestLargeBatch(t *testing.T) { 353 d, err := Open("", testingRandomized(t, &Options{ 354 FS: vfs.NewMem(), 355 MemTableSize: 1400, 356 MemTableStopWritesThreshold: 100, 357 })) 358 require.NoError(t, err) 359 360 verifyLSM := func(expected string) func() error { 361 return func() error { 362 d.mu.Lock() 363 s := d.mu.versions.currentVersion().String() 364 d.mu.Unlock() 365 if expected != s { 366 if testing.Verbose() { 367 fmt.Println(strings.TrimSpace(s)) 368 } 369 return errors.Errorf("expected %s, but found %s", expected, s) 370 } 371 return nil 372 } 373 } 374 375 logNum := func() base.DiskFileNum { 376 d.mu.Lock() 377 defer d.mu.Unlock() 378 return d.mu.log.queue[len(d.mu.log.queue)-1].fileNum 379 } 380 fileSize := func(fileNum base.DiskFileNum) int64 { 381 info, err := d.opts.FS.Stat(base.MakeFilepath(d.opts.FS, "", fileTypeLog, fileNum)) 382 require.NoError(t, err) 383 return info.Size() 384 } 385 memTableCreationSeqNum := func() uint64 { 386 d.mu.Lock() 387 defer d.mu.Unlock() 388 return d.mu.mem.mutable.logSeqNum 389 } 390 391 startLogNum := logNum() 392 startLogStartSize := fileSize(startLogNum) 393 startSeqNum := d.mu.versions.logSeqNum.Load() 394 395 // Write a key with a value larger than the memtable size. 396 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("a"), 512), nil)) 397 398 // Verify that the large batch was written to the WAL that existed before it 399 // was committed. We verify that WAL rotation occurred, where the large batch 400 // was written to, and that the new WAL is empty. 401 endLogNum := logNum() 402 if startLogNum == endLogNum { 403 t.Fatal("expected WAL rotation") 404 } 405 startLogEndSize := fileSize(startLogNum) 406 if startLogEndSize == startLogStartSize { 407 t.Fatalf("expected large batch to be written to %s.log, but file size unchanged at %d", 408 startLogNum, startLogEndSize) 409 } 410 endLogSize := fileSize(endLogNum) 411 if endLogSize != 0 { 412 t.Fatalf("expected %s.log to be empty, but found %d", endLogNum, endLogSize) 413 } 414 if creationSeqNum := memTableCreationSeqNum(); creationSeqNum <= startSeqNum { 415 t.Fatalf("expected memTable.logSeqNum=%d > largeBatch.seqNum=%d", creationSeqNum, startSeqNum) 416 } 417 418 // Verify this results in one L0 table being created. 419 require.NoError(t, try(100*time.Microsecond, 20*time.Second, 420 verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n"))) 421 422 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 512), nil)) 423 424 // Verify this results in a second L0 table being created. 425 require.NoError(t, try(100*time.Microsecond, 20*time.Second, 426 verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n 000007:[b#11,SET-b#11,SET]\n"))) 427 428 // Allocate a bunch of batches to exhaust the batchPool. None of these 429 // batches should have a non-zero count. 430 for i := 0; i < 10; i++ { 431 b := d.NewBatch() 432 require.EqualValues(t, 0, b.Count()) 433 } 434 435 require.NoError(t, d.Close()) 436 } 437 438 func TestGetNoCache(t *testing.T) { 439 cache := NewCache(0) 440 defer cache.Unref() 441 442 d, err := Open("", testingRandomized(t, &Options{ 443 Cache: cache, 444 FS: vfs.NewMem(), 445 })) 446 require.NoError(t, err) 447 448 require.NoError(t, d.Set([]byte("a"), []byte("aa"), nil)) 449 require.NoError(t, d.Flush()) 450 verifyGet(t, d, []byte("a"), []byte("aa")) 451 452 require.NoError(t, d.Close()) 453 } 454 455 func TestGetMerge(t *testing.T) { 456 d, err := Open("", testingRandomized(t, &Options{ 457 FS: vfs.NewMem(), 458 })) 459 require.NoError(t, err) 460 461 key := []byte("a") 462 verify := func(expected string) { 463 val, closer, err := d.Get(key) 464 require.NoError(t, err) 465 466 if expected != string(val) { 467 t.Fatalf("expected %s, but got %s", expected, val) 468 } 469 closer.Close() 470 } 471 472 const val = "1" 473 for i := 1; i <= 3; i++ { 474 require.NoError(t, d.Merge(key, []byte(val), nil)) 475 476 expected := strings.Repeat(val, i) 477 verify(expected) 478 479 require.NoError(t, d.Flush()) 480 verify(expected) 481 } 482 483 require.NoError(t, d.Close()) 484 } 485 486 func TestMergeOrderSameAfterFlush(t *testing.T) { 487 // Ensure compaction iterator (used by flush) and user iterator process merge 488 // operands in the same order 489 d, err := Open("", testingRandomized(t, &Options{ 490 FS: vfs.NewMem(), 491 })) 492 require.NoError(t, err) 493 494 key := []byte("a") 495 verify := func(expected string) { 496 iter, _ := d.NewIter(nil) 497 if !iter.SeekGE([]byte("a")) { 498 t.Fatal("expected one value, but got empty iterator") 499 } 500 if expected != string(iter.Value()) { 501 t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) 502 } 503 if !iter.SeekLT([]byte("b")) { 504 t.Fatal("expected one value, but got empty iterator") 505 } 506 if expected != string(iter.Value()) { 507 t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) 508 } 509 require.NoError(t, iter.Close()) 510 } 511 512 require.NoError(t, d.Merge(key, []byte("0"), nil)) 513 require.NoError(t, d.Merge(key, []byte("1"), nil)) 514 515 verify("01") 516 require.NoError(t, d.Flush()) 517 verify("01") 518 519 require.NoError(t, d.Close()) 520 } 521 522 type closableMerger struct { 523 lastBuf []byte 524 closed bool 525 } 526 527 func (m *closableMerger) MergeNewer(value []byte) error { 528 m.lastBuf = append(m.lastBuf[:0], value...) 529 return nil 530 } 531 532 func (m *closableMerger) MergeOlder(value []byte) error { 533 m.lastBuf = append(m.lastBuf[:0], value...) 534 return nil 535 } 536 537 func (m *closableMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 538 return m.lastBuf, m, nil 539 } 540 541 func (m *closableMerger) Close() error { 542 m.closed = true 543 return nil 544 } 545 546 func TestMergerClosing(t *testing.T) { 547 m := &closableMerger{} 548 549 d, err := Open("", testingRandomized(t, &Options{ 550 FS: vfs.NewMem(), 551 Merger: &Merger{ 552 Merge: func(key, value []byte) (base.ValueMerger, error) { 553 return m, m.MergeNewer(value) 554 }, 555 }, 556 })) 557 require.NoError(t, err) 558 559 defer func() { 560 require.NoError(t, d.Close()) 561 }() 562 563 err = d.Merge([]byte("a"), []byte("b"), nil) 564 require.NoError(t, err) 565 require.False(t, m.closed) 566 567 val, closer, err := d.Get([]byte("a")) 568 require.NoError(t, err) 569 require.Equal(t, []byte("b"), val) 570 require.NotNil(t, closer) 571 require.False(t, m.closed) 572 _ = closer.Close() 573 require.True(t, m.closed) 574 } 575 576 func TestLogData(t *testing.T) { 577 d, err := Open("", testingRandomized(t, &Options{ 578 FS: vfs.NewMem(), 579 })) 580 require.NoError(t, err) 581 582 defer func() { 583 require.NoError(t, d.Close()) 584 }() 585 586 require.NoError(t, d.LogData([]byte("foo"), Sync)) 587 require.NoError(t, d.LogData([]byte("bar"), Sync)) 588 // TODO(itsbilal): Confirm that we wrote some bytes to the WAL. 589 // For now, LogData proceeding ahead without a panic is good enough. 590 } 591 592 func TestSingleDeleteGet(t *testing.T) { 593 d, err := Open("", testingRandomized(t, &Options{ 594 FS: vfs.NewMem(), 595 })) 596 require.NoError(t, err) 597 defer func() { 598 require.NoError(t, d.Close()) 599 }() 600 601 key := []byte("key") 602 val := []byte("val") 603 604 require.NoError(t, d.Set(key, val, nil)) 605 verifyGet(t, d, key, val) 606 607 key2 := []byte("key2") 608 val2 := []byte("val2") 609 610 require.NoError(t, d.Set(key2, val2, nil)) 611 verifyGet(t, d, key2, val2) 612 613 require.NoError(t, d.SingleDelete(key2, nil)) 614 verifyGetNotFound(t, d, key2) 615 } 616 617 func TestSingleDeleteFlush(t *testing.T) { 618 d, err := Open("", testingRandomized(t, &Options{ 619 FS: vfs.NewMem(), 620 })) 621 require.NoError(t, err) 622 defer func() { 623 require.NoError(t, d.Close()) 624 }() 625 626 key := []byte("key") 627 valFirst := []byte("first") 628 valSecond := []byte("second") 629 key2 := []byte("key2") 630 val2 := []byte("val2") 631 632 require.NoError(t, d.Set(key, valFirst, nil)) 633 require.NoError(t, d.Set(key2, val2, nil)) 634 require.NoError(t, d.Flush()) 635 636 require.NoError(t, d.SingleDelete(key, nil)) 637 require.NoError(t, d.Set(key, valSecond, nil)) 638 require.NoError(t, d.Delete(key2, nil)) 639 require.NoError(t, d.Set(key2, val2, nil)) 640 require.NoError(t, d.Flush()) 641 642 require.NoError(t, d.SingleDelete(key, nil)) 643 require.NoError(t, d.Delete(key2, nil)) 644 require.NoError(t, d.Flush()) 645 646 verifyGetNotFound(t, d, key) 647 verifyGetNotFound(t, d, key2) 648 } 649 650 func TestUnremovableSingleDelete(t *testing.T) { 651 d, err := Open("", testingRandomized(t, &Options{ 652 FS: vfs.NewMem(), 653 L0CompactionThreshold: 8, 654 })) 655 require.NoError(t, err) 656 defer func() { 657 require.NoError(t, d.Close()) 658 }() 659 660 key := []byte("key") 661 valFirst := []byte("valFirst") 662 valSecond := []byte("valSecond") 663 664 require.NoError(t, d.Set(key, valFirst, nil)) 665 ss := d.NewSnapshot() 666 defer ss.Close() 667 require.NoError(t, d.SingleDelete(key, nil)) 668 require.NoError(t, d.Set(key, valSecond, nil)) 669 require.NoError(t, d.Flush()) 670 671 verifyGet(t, ss, key, valFirst) 672 verifyGet(t, d, key, valSecond) 673 674 require.NoError(t, d.SingleDelete(key, nil)) 675 676 verifyGet(t, ss, key, valFirst) 677 verifyGetNotFound(t, d, key) 678 679 require.NoError(t, d.Flush()) 680 681 verifyGet(t, ss, key, valFirst) 682 verifyGetNotFound(t, d, key) 683 } 684 685 func TestIterLeak(t *testing.T) { 686 for _, leak := range []bool{true, false} { 687 t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { 688 for _, flush := range []bool{true, false} { 689 t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { 690 d, err := Open("", testingRandomized(t, &Options{ 691 FS: vfs.NewMem(), 692 })) 693 require.NoError(t, err) 694 695 require.NoError(t, d.Set([]byte("a"), []byte("a"), nil)) 696 if flush { 697 require.NoError(t, d.Flush()) 698 } 699 iter, _ := d.NewIter(nil) 700 iter.First() 701 if !leak { 702 require.NoError(t, iter.Close()) 703 require.NoError(t, d.Close()) 704 } else { 705 defer iter.Close() 706 if err := d.Close(); err == nil { 707 t.Fatalf("expected failure, but found success") 708 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 709 t.Fatalf("expected leaked iterators, but found %+v", err) 710 } else { 711 t.Log(err.Error()) 712 } 713 } 714 }) 715 } 716 }) 717 } 718 } 719 720 // Make sure that we detect an iter leak when only one DB closes 721 // while the second db still holds a reference to the TableCache. 722 func TestIterLeakSharedCache(t *testing.T) { 723 for _, leak := range []bool{true, false} { 724 t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { 725 for _, flush := range []bool{true, false} { 726 t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { 727 d1, err := Open("", &Options{ 728 FS: vfs.NewMem(), 729 }) 730 require.NoError(t, err) 731 732 d2, err := Open("", &Options{ 733 FS: vfs.NewMem(), 734 }) 735 require.NoError(t, err) 736 737 require.NoError(t, d1.Set([]byte("a"), []byte("a"), nil)) 738 if flush { 739 require.NoError(t, d1.Flush()) 740 } 741 742 require.NoError(t, d2.Set([]byte("a"), []byte("a"), nil)) 743 if flush { 744 require.NoError(t, d2.Flush()) 745 } 746 747 // Check if leak detection works with only one db closing. 748 { 749 iter1, _ := d1.NewIter(nil) 750 iter1.First() 751 if !leak { 752 require.NoError(t, iter1.Close()) 753 require.NoError(t, d1.Close()) 754 } else { 755 defer iter1.Close() 756 if err := d1.Close(); err == nil { 757 t.Fatalf("expected failure, but found success") 758 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 759 t.Fatalf("expected leaked iterators, but found %+v", err) 760 } else { 761 t.Log(err.Error()) 762 } 763 } 764 } 765 766 { 767 iter2, _ := d2.NewIter(nil) 768 iter2.First() 769 if !leak { 770 require.NoError(t, iter2.Close()) 771 require.NoError(t, d2.Close()) 772 } else { 773 defer iter2.Close() 774 if err := d2.Close(); err == nil { 775 t.Fatalf("expected failure, but found success") 776 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 777 t.Fatalf("expected leaked iterators, but found %+v", err) 778 } else { 779 t.Log(err.Error()) 780 } 781 } 782 } 783 784 }) 785 } 786 }) 787 } 788 } 789 790 func TestMemTableReservation(t *testing.T) { 791 opts := &Options{ 792 Cache: NewCache(128 << 10 /* 128 KB */), 793 MemTableSize: initialMemTableSize, 794 FS: vfs.NewMem(), 795 } 796 defer opts.Cache.Unref() 797 opts.testingRandomized(t) 798 opts.EnsureDefaults() 799 // We're going to be looking at and asserting the global memtable reservation 800 // amount below so we don't want to race with any triggered stats collections. 801 opts.private.disableTableStats = true 802 803 // Add a block to the cache. Note that the memtable size is larger than the 804 // cache size, so opening the DB should cause this block to be evicted. 805 tmpID := opts.Cache.NewID() 806 helloWorld := []byte("hello world") 807 value := cache.Alloc(len(helloWorld)) 808 copy(value.Buf(), helloWorld) 809 opts.Cache.Set(tmpID, base.FileNum(0).DiskFileNum(), 0, value).Release() 810 811 d, err := Open("", opts) 812 require.NoError(t, err) 813 814 checkReserved := func(expected int64) { 815 t.Helper() 816 if reserved := d.memTableReserved.Load(); expected != reserved { 817 t.Fatalf("expected %d reserved, but found %d", expected, reserved) 818 } 819 } 820 821 checkReserved(int64(opts.MemTableSize)) 822 if refs := d.mu.mem.queue[len(d.mu.mem.queue)-1].readerRefs.Load(); refs != 2 { 823 t.Fatalf("expected 2 refs, but found %d", refs) 824 } 825 // Verify the memtable reservation has caused our test block to be evicted. 826 if h := opts.Cache.Get(tmpID, base.FileNum(0).DiskFileNum(), 0); h.Get() != nil { 827 t.Fatalf("expected failure, but found success: %s", h.Get()) 828 } 829 830 // Flush the memtable. The memtable reservation should double because old 831 // memtable will be recycled, saved for the next memtable allocation. 832 require.NoError(t, d.Flush()) 833 checkReserved(int64(2 * opts.MemTableSize)) 834 // Flush again. The memtable reservation should be unchanged because at most 835 // 1 memtable may be preserved for recycling. 836 837 // Flush in the presence of an active iterator. The iterator will hold a 838 // reference to a readState which will in turn hold a reader reference to the 839 // memtable. 840 iter, _ := d.NewIter(nil) 841 require.NoError(t, d.Flush()) 842 // The flush moved the recycled memtable into position as an active mutable 843 // memtable. There are now two allocated memtables: 1 mutable and 1 pinned 844 // by the iterator's read state. 845 checkReserved(2 * int64(opts.MemTableSize)) 846 847 // Flushing again should increase the reservation total to 3x: 1 active 848 // mutable, 1 for recycling, 1 pinned by iterator's read state. 849 require.NoError(t, d.Flush()) 850 checkReserved(3 * int64(opts.MemTableSize)) 851 852 // Closing the iterator will release the iterator's read state, and the old 853 // memtable will be moved into position as the next memtable to recycle. 854 // There was already a memtable ready to be recycled, so that memtable will 855 // be freed and the overall reservation total is reduced to 2x. 856 require.NoError(t, iter.Close()) 857 checkReserved(2 * int64(opts.MemTableSize)) 858 859 require.NoError(t, d.Close()) 860 } 861 862 func TestMemTableReservationLeak(t *testing.T) { 863 d, err := Open("", &Options{FS: vfs.NewMem()}) 864 require.NoError(t, err) 865 866 d.mu.Lock() 867 last := d.mu.mem.queue[len(d.mu.mem.queue)-1] 868 last.readerRef() 869 defer func() { 870 last.readerUnref(true) 871 }() 872 d.mu.Unlock() 873 if err := d.Close(); err == nil { 874 t.Fatalf("expected failure, but found success") 875 } else if !strings.HasPrefix(err.Error(), "leaked memtable reservation:") { 876 t.Fatalf("expected leaked memtable reservation, but found %+v", err) 877 } else { 878 t.Log(err.Error()) 879 } 880 } 881 882 func TestCacheEvict(t *testing.T) { 883 cache := NewCache(10 << 20) 884 defer cache.Unref() 885 886 d, err := Open("", &Options{ 887 Cache: cache, 888 FS: vfs.NewMem(), 889 }) 890 require.NoError(t, err) 891 892 for i := 0; i < 1000; i++ { 893 key := []byte(fmt.Sprintf("%04d", i)) 894 require.NoError(t, d.Set(key, key, nil)) 895 } 896 897 require.NoError(t, d.Flush()) 898 iter, _ := d.NewIter(nil) 899 for iter.First(); iter.Valid(); iter.Next() { 900 } 901 require.NoError(t, iter.Close()) 902 903 if size := cache.Size(); size == 0 { 904 t.Fatalf("expected non-zero cache size") 905 } 906 907 for i := 0; i < 1000; i++ { 908 key := []byte(fmt.Sprintf("%04d", i)) 909 require.NoError(t, d.Delete(key, nil)) 910 } 911 912 require.NoError(t, d.Compact([]byte("0"), []byte("1"), false)) 913 914 require.NoError(t, d.Close()) 915 916 if size := cache.Size(); size != 0 { 917 t.Fatalf("expected empty cache, but found %d", size) 918 } 919 } 920 921 func TestFlushEmpty(t *testing.T) { 922 d, err := Open("", testingRandomized(t, &Options{ 923 FS: vfs.NewMem(), 924 })) 925 require.NoError(t, err) 926 927 // Flushing an empty memtable should not fail. 928 require.NoError(t, d.Flush()) 929 require.NoError(t, d.Close()) 930 } 931 932 func TestRollManifest(t *testing.T) { 933 toPreserve := rand.Int31n(5) + 1 934 opts := &Options{ 935 MaxManifestFileSize: 1, 936 L0CompactionThreshold: 10, 937 L0StopWritesThreshold: 1000, 938 FS: vfs.NewMem(), 939 NumPrevManifest: int(toPreserve), 940 } 941 opts.DisableAutomaticCompactions = true 942 opts.testingRandomized(t) 943 d, err := Open("", opts) 944 require.NoError(t, err) 945 946 manifestFileNumber := func() FileNum { 947 d.mu.Lock() 948 defer d.mu.Unlock() 949 return d.mu.versions.manifestFileNum 950 } 951 sizeRolloverState := func() (int64, int64) { 952 d.mu.Lock() 953 defer d.mu.Unlock() 954 return d.mu.versions.rotationHelper.DebugInfo() 955 } 956 957 current := func() string { 958 desc, err := Peek(d.dirname, d.opts.FS) 959 require.NoError(t, err) 960 return desc.ManifestFilename 961 } 962 963 lastManifestNum := manifestFileNumber() 964 manifestNums := []base.FileNum{lastManifestNum} 965 for i := 0; i < 5; i++ { 966 // MaxManifestFileSize is 1, but the rollover logic also counts edits 967 // since the last snapshot to decide on rollover, so do as many flushes as 968 // it demands. 969 lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() 970 var expectedLastSnapshotCount, expectedEditsSinceSnapshotCount int64 971 switch i { 972 case 0: 973 // DB is empty. 974 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 0 975 case 1: 976 // First edit that caused rollover is not in the snapshot. 977 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 1 978 case 2: 979 // One flush is in the snapshot. One flush in the edit. 980 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 1, 1 981 case 3: 982 // Two flushes in the snapshot. One flush in the edit. Will need to do 983 // two more flushes, the first of which will be in the next snapshot. 984 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 2, 1 985 case 4: 986 // Four flushes in the snapshot. One flush in the edit. Will need to do 987 // four more flushes, three of which will be in the snapshot. 988 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 4, 1 989 } 990 require.Equal(t, expectedLastSnapshotCount, lastSnapshotCount) 991 require.Equal(t, expectedEditsSinceSnapshotCount, editsSinceSnapshotCount) 992 // Number of flushes to do to trigger the rollover. 993 steps := int(lastSnapshotCount - editsSinceSnapshotCount + 1) 994 // Steps can be <= 0, but we need to do at least one edit to trigger the 995 // rollover logic. 996 if steps <= 0 { 997 steps = 1 998 } 999 for j := 0; j < steps; j++ { 1000 require.NoError(t, d.Set([]byte("a"), nil, nil)) 1001 require.NoError(t, d.Flush()) 1002 } 1003 d.TestOnlyWaitForCleaning() 1004 num := manifestFileNumber() 1005 if lastManifestNum == num { 1006 t.Fatalf("manifest failed to roll %d: %d == %d", i, lastManifestNum, num) 1007 } 1008 1009 manifestNums = append(manifestNums, num) 1010 lastManifestNum = num 1011 1012 expectedCurrent := fmt.Sprintf("MANIFEST-%s", lastManifestNum) 1013 if v := current(); expectedCurrent != v { 1014 t.Fatalf("expected %s, but found %s", expectedCurrent, v) 1015 } 1016 } 1017 lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() 1018 require.EqualValues(t, 8, lastSnapshotCount) 1019 require.EqualValues(t, 1, editsSinceSnapshotCount) 1020 1021 files, err := d.opts.FS.List("") 1022 require.NoError(t, err) 1023 1024 var manifests []string 1025 for _, filename := range files { 1026 fileType, _, ok := base.ParseFilename(d.opts.FS, filename) 1027 if !ok { 1028 continue 1029 } 1030 if fileType == fileTypeManifest { 1031 manifests = append(manifests, filename) 1032 } 1033 } 1034 1035 sort.Slice(manifests, func(i, j int) bool { 1036 return manifests[i] < manifests[j] 1037 }) 1038 1039 var expected []string 1040 for i := len(manifestNums) - int(toPreserve) - 1; i < len(manifestNums); i++ { 1041 expected = append( 1042 expected, 1043 fmt.Sprintf("MANIFEST-%s", manifestNums[i]), 1044 ) 1045 } 1046 require.EqualValues(t, expected, manifests) 1047 1048 // Test the logic that uses the future snapshot size to rollover. 1049 // Reminder: we have a snapshot with 8 files and the manifest has 1 edit 1050 // (flush) with 1 file. 1051 // Add 8 more files with a different key. 1052 lastManifestNum = manifestFileNumber() 1053 for j := 0; j < 8; j++ { 1054 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1055 require.NoError(t, d.Flush()) 1056 } 1057 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1058 // Need 16 more files in edits to trigger a rollover. 1059 require.EqualValues(t, 16, lastSnapshotCount) 1060 require.EqualValues(t, 1, editsSinceSnapshotCount) 1061 require.NotEqual(t, manifestFileNumber(), lastManifestNum) 1062 lastManifestNum = manifestFileNumber() 1063 // Do a compaction that moves 8 of the files from L0 to 1 file in L6. This 1064 // adds 9 files in edits. We still need 6 more files in edits based on the 1065 // last snapshot. But the current version has only 9 L0 files and 1 L6 file, 1066 // for a total of 10 files. So 1 flush should push us over that threshold. 1067 d.Compact([]byte("c"), []byte("d"), false) 1068 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1069 require.EqualValues(t, 16, lastSnapshotCount) 1070 require.EqualValues(t, 10, editsSinceSnapshotCount) 1071 require.Equal(t, manifestFileNumber(), lastManifestNum) 1072 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1073 require.NoError(t, d.Flush()) 1074 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1075 require.EqualValues(t, 10, lastSnapshotCount) 1076 require.EqualValues(t, 1, editsSinceSnapshotCount) 1077 require.NotEqual(t, manifestFileNumber(), lastManifestNum) 1078 1079 require.NoError(t, d.Close()) 1080 } 1081 1082 func TestDBClosed(t *testing.T) { 1083 d, err := Open("", &Options{ 1084 FS: vfs.NewMem(), 1085 }) 1086 require.NoError(t, err) 1087 require.NoError(t, d.Close()) 1088 1089 catch := func(f func()) (err error) { 1090 defer func() { 1091 if r := recover(); r != nil { 1092 err = r.(error) 1093 } 1094 }() 1095 f() 1096 return nil 1097 } 1098 1099 require.True(t, errors.Is(catch(func() { _ = d.Close() }), ErrClosed)) 1100 1101 require.True(t, errors.Is(catch(func() { _ = d.Compact(nil, nil, false) }), ErrClosed)) 1102 require.True(t, errors.Is(catch(func() { _ = d.Flush() }), ErrClosed)) 1103 require.True(t, errors.Is(catch(func() { _, _ = d.AsyncFlush() }), ErrClosed)) 1104 1105 require.True(t, errors.Is(catch(func() { _, _, _ = d.Get(nil) }), ErrClosed)) 1106 require.True(t, errors.Is(catch(func() { _ = d.Delete(nil, nil) }), ErrClosed)) 1107 require.True(t, errors.Is(catch(func() { _ = d.DeleteRange(nil, nil, nil) }), ErrClosed)) 1108 require.True(t, errors.Is(catch(func() { _ = d.Ingest(nil) }), ErrClosed)) 1109 require.True(t, errors.Is(catch(func() { _ = d.LogData(nil, nil) }), ErrClosed)) 1110 require.True(t, errors.Is(catch(func() { _ = d.Merge(nil, nil, nil) }), ErrClosed)) 1111 require.True(t, errors.Is(catch(func() { _ = d.RatchetFormatMajorVersion(internalFormatNewest) }), ErrClosed)) 1112 require.True(t, errors.Is(catch(func() { _ = d.Set(nil, nil, nil) }), ErrClosed)) 1113 1114 require.True(t, errors.Is(catch(func() { _ = d.NewSnapshot() }), ErrClosed)) 1115 1116 b := d.NewIndexedBatch() 1117 require.True(t, errors.Is(catch(func() { _ = b.Commit(nil) }), ErrClosed)) 1118 require.True(t, errors.Is(catch(func() { _ = d.Apply(b, nil) }), ErrClosed)) 1119 require.True(t, errors.Is(catch(func() { _, _ = b.NewIter(nil) }), ErrClosed)) 1120 } 1121 1122 func TestDBConcurrentCommitCompactFlush(t *testing.T) { 1123 d, err := Open("", testingRandomized(t, &Options{ 1124 FS: vfs.NewMem(), 1125 })) 1126 require.NoError(t, err) 1127 1128 // Concurrently commit, compact, and flush in order to stress the locking around 1129 // those operations. 1130 const n = 1000 1131 var wg sync.WaitGroup 1132 wg.Add(n) 1133 for i := 0; i < n; i++ { 1134 go func(i int) { 1135 defer wg.Done() 1136 _ = d.Set([]byte(fmt.Sprint(i)), nil, nil) 1137 var err error 1138 switch i % 3 { 1139 case 0: 1140 err = d.Compact(nil, []byte("\xff"), false) 1141 case 1: 1142 err = d.Flush() 1143 case 2: 1144 _, err = d.AsyncFlush() 1145 } 1146 require.NoError(t, err) 1147 }(i) 1148 } 1149 wg.Wait() 1150 1151 require.NoError(t, d.Close()) 1152 } 1153 1154 func TestDBConcurrentCompactClose(t *testing.T) { 1155 // Test closing while a compaction is ongoing. This ensures compaction code 1156 // detects the close and finishes cleanly. 1157 mem := vfs.NewMem() 1158 for i := 0; i < 100; i++ { 1159 opts := &Options{ 1160 FS: mem, 1161 MaxConcurrentCompactions: func() int { 1162 return 2 1163 }, 1164 } 1165 d, err := Open("", testingRandomized(t, opts)) 1166 require.NoError(t, err) 1167 1168 // Ingest a series of files containing a single key each. As the outer 1169 // loop progresses, these ingestions will build up compaction debt 1170 // causing compactions to be running concurrently with the close below. 1171 for j := 0; j < 10; j++ { 1172 path := fmt.Sprintf("ext%d", j) 1173 f, err := mem.Create(path) 1174 require.NoError(t, err) 1175 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1176 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1177 }) 1178 require.NoError(t, w.Set([]byte(fmt.Sprint(j)), nil)) 1179 require.NoError(t, w.Close()) 1180 require.NoError(t, d.Ingest([]string{path})) 1181 } 1182 1183 require.NoError(t, d.Close()) 1184 } 1185 } 1186 1187 func TestDBApplyBatchNilDB(t *testing.T) { 1188 d, err := Open("", &Options{FS: vfs.NewMem()}) 1189 require.NoError(t, err) 1190 1191 b1 := &Batch{} 1192 b1.Set([]byte("test"), nil, nil) 1193 1194 b2 := &Batch{} 1195 b2.Apply(b1, nil) 1196 if b2.memTableSize != 0 { 1197 t.Fatalf("expected memTableSize to not be set") 1198 } 1199 require.NoError(t, d.Apply(b2, nil)) 1200 if b1.memTableSize != b2.memTableSize { 1201 t.Fatalf("expected memTableSize %d, but found %d", b1.memTableSize, b2.memTableSize) 1202 } 1203 1204 require.NoError(t, d.Close()) 1205 } 1206 1207 func TestDBApplyBatchMismatch(t *testing.T) { 1208 srcDB, err := Open("", &Options{FS: vfs.NewMem()}) 1209 require.NoError(t, err) 1210 1211 applyDB, err := Open("", &Options{FS: vfs.NewMem()}) 1212 require.NoError(t, err) 1213 1214 err = func() (err error) { 1215 defer func() { 1216 if v := recover(); v != nil { 1217 err = errors.Errorf("%v", v) 1218 } 1219 }() 1220 1221 b := srcDB.NewBatch() 1222 b.Set([]byte("test"), nil, nil) 1223 return applyDB.Apply(b, nil) 1224 }() 1225 if err == nil || !strings.Contains(err.Error(), "pebble: batch db mismatch:") { 1226 t.Fatalf("expected error, but found %v", err) 1227 } 1228 1229 require.NoError(t, srcDB.Close()) 1230 require.NoError(t, applyDB.Close()) 1231 } 1232 1233 func TestCloseCleanerRace(t *testing.T) { 1234 mem := vfs.NewMem() 1235 for i := 0; i < 20; i++ { 1236 db, err := Open("", testingRandomized(t, &Options{FS: mem})) 1237 require.NoError(t, err) 1238 require.NoError(t, db.Set([]byte("a"), []byte("something"), Sync)) 1239 require.NoError(t, db.Flush()) 1240 // Ref the sstables so cannot be deleted. 1241 it, _ := db.NewIter(nil) 1242 require.NotNil(t, it) 1243 require.NoError(t, db.DeleteRange([]byte("a"), []byte("b"), Sync)) 1244 require.NoError(t, db.Compact([]byte("a"), []byte("b"), false)) 1245 // Only the iterator is keeping the sstables alive. 1246 files, err := mem.List("/") 1247 require.NoError(t, err) 1248 var found bool 1249 for _, f := range files { 1250 if strings.HasSuffix(f, ".sst") { 1251 found = true 1252 break 1253 } 1254 } 1255 require.True(t, found) 1256 // Close the iterator and the db in succession so file cleaning races with DB.Close() -- 1257 // latter should wait for file cleaning to finish. 1258 require.NoError(t, it.Close()) 1259 require.NoError(t, db.Close()) 1260 files, err = mem.List("/") 1261 require.NoError(t, err) 1262 for _, f := range files { 1263 if strings.HasSuffix(f, ".sst") { 1264 t.Fatalf("found sst: %s", f) 1265 } 1266 } 1267 } 1268 } 1269 1270 func TestSSTablesWithApproximateSpanBytes(t *testing.T) { 1271 d, err := Open("", &Options{ 1272 FS: vfs.NewMem(), 1273 }) 1274 require.NoError(t, err) 1275 defer func() { 1276 if d != nil { 1277 require.NoError(t, d.Close()) 1278 } 1279 }() 1280 1281 // Create two sstables. 1282 // sstable is contained within keyspan (fileNum = 5). 1283 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1284 require.NoError(t, d.Set([]byte("d"), nil, nil)) 1285 require.NoError(t, d.Flush()) 1286 1287 // sstable partially overlaps keyspan (fileNum = 7). 1288 require.NoError(t, d.Set([]byte("d"), nil, nil)) 1289 require.NoError(t, d.Set([]byte("g"), nil, nil)) 1290 require.NoError(t, d.Flush()) 1291 1292 // cannot use WithApproximateSpanBytes without WithProperties. 1293 _, err = d.SSTables(WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) 1294 require.Error(t, err) 1295 1296 // cannot use WithApproximateSpanBytes without WithKeyRangeFilter. 1297 _, err = d.SSTables(WithProperties(), WithApproximateSpanBytes()) 1298 require.Error(t, err) 1299 1300 tableInfos, err := d.SSTables(WithProperties(), WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) 1301 require.NoError(t, err) 1302 1303 for _, levelTables := range tableInfos { 1304 for _, table := range levelTables { 1305 approximateSpanBytes, err := strconv.ParseInt(table.Properties.UserProperties["approximate-span-bytes"], 10, 64) 1306 require.NoError(t, err) 1307 if table.FileNum == 5 { 1308 require.Equal(t, uint64(approximateSpanBytes), table.Size) 1309 } 1310 if table.FileNum == 7 { 1311 require.Less(t, uint64(approximateSpanBytes), table.Size) 1312 } 1313 } 1314 } 1315 } 1316 1317 func TestFilterSSTablesWithOption(t *testing.T) { 1318 d, err := Open("", &Options{ 1319 FS: vfs.NewMem(), 1320 }) 1321 require.NoError(t, err) 1322 defer func() { 1323 if d != nil { 1324 require.NoError(t, d.Close()) 1325 } 1326 }() 1327 1328 // Create two sstables. 1329 require.NoError(t, d.Set([]byte("/Table/5"), nil, nil)) 1330 require.NoError(t, d.Flush()) 1331 require.NoError(t, d.Set([]byte("/Table/10"), nil, nil)) 1332 require.NoError(t, d.Flush()) 1333 1334 tableInfos, err := d.SSTables(WithKeyRangeFilter([]byte("/Table/5"), []byte("/Table/6"))) 1335 require.NoError(t, err) 1336 1337 totalTables := 0 1338 for _, levelTables := range tableInfos { 1339 totalTables += len(levelTables) 1340 } 1341 1342 // with filter second sstable should not be returned 1343 require.EqualValues(t, 1, totalTables) 1344 1345 tableInfos, err = d.SSTables() 1346 require.NoError(t, err) 1347 1348 totalTables = 0 1349 for _, levelTables := range tableInfos { 1350 totalTables += len(levelTables) 1351 } 1352 1353 // without filter 1354 require.EqualValues(t, 2, totalTables) 1355 } 1356 1357 func TestSSTables(t *testing.T) { 1358 d, err := Open("", &Options{ 1359 FS: vfs.NewMem(), 1360 }) 1361 require.NoError(t, err) 1362 defer func() { 1363 if d != nil { 1364 require.NoError(t, d.Close()) 1365 } 1366 }() 1367 1368 // Create two sstables. 1369 require.NoError(t, d.Set([]byte("hello"), nil, nil)) 1370 require.NoError(t, d.Flush()) 1371 require.NoError(t, d.Set([]byte("world"), nil, nil)) 1372 require.NoError(t, d.Flush()) 1373 1374 // by default returned table infos should not contain Properties 1375 tableInfos, err := d.SSTables() 1376 require.NoError(t, err) 1377 for _, levelTables := range tableInfos { 1378 for _, info := range levelTables { 1379 require.Nil(t, info.Properties) 1380 } 1381 } 1382 1383 // with opt `WithProperties()` the `Properties` in table info should not be nil 1384 tableInfos, err = d.SSTables(WithProperties()) 1385 require.NoError(t, err) 1386 for _, levelTables := range tableInfos { 1387 for _, info := range levelTables { 1388 require.NotNil(t, info.Properties) 1389 } 1390 } 1391 } 1392 1393 type testTracer struct { 1394 enabledOnlyForNonBackgroundContext bool 1395 buf strings.Builder 1396 } 1397 1398 func (t *testTracer) Infof(format string, args ...interface{}) {} 1399 func (t *testTracer) Fatalf(format string, args ...interface{}) {} 1400 1401 func (t *testTracer) Eventf(ctx context.Context, format string, args ...interface{}) { 1402 if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { 1403 return 1404 } 1405 fmt.Fprintf(&t.buf, format, args...) 1406 fmt.Fprint(&t.buf, "\n") 1407 } 1408 1409 func (t *testTracer) IsTracingEnabled(ctx context.Context) bool { 1410 if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { 1411 return false 1412 } 1413 return true 1414 } 1415 1416 func TestTracing(t *testing.T) { 1417 if !invariants.Enabled { 1418 // The test relies on timing behavior injected when invariants.Enabled. 1419 return 1420 } 1421 var tracer testTracer 1422 c := NewCache(0) 1423 defer c.Unref() 1424 d, err := Open("", &Options{ 1425 FS: vfs.NewMem(), 1426 Cache: c, 1427 LoggerAndTracer: &tracer, 1428 }) 1429 require.NoError(t, err) 1430 defer func() { 1431 require.NoError(t, d.Close()) 1432 }() 1433 1434 // Create a sstable. 1435 require.NoError(t, d.Set([]byte("hello"), nil, nil)) 1436 require.NoError(t, d.Flush()) 1437 _, closer, err := d.Get([]byte("hello")) 1438 require.NoError(t, err) 1439 closer.Close() 1440 readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n" 1441 iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n" 1442 require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String()) 1443 1444 // Get again, but since it currently uses context.Background(), no trace 1445 // output is produced. 1446 tracer.buf.Reset() 1447 tracer.enabledOnlyForNonBackgroundContext = true 1448 _, closer, err = d.Get([]byte("hello")) 1449 require.NoError(t, err) 1450 closer.Close() 1451 require.Equal(t, "", tracer.buf.String()) 1452 1453 ctx, cancel := context.WithCancel(context.Background()) 1454 defer cancel() 1455 iter, _ := d.NewIterWithContext(ctx, nil) 1456 iter.SeekGE([]byte("hello")) 1457 iter.Close() 1458 require.Equal(t, iterTraceString, tracer.buf.String()) 1459 1460 tracer.buf.Reset() 1461 snap := d.NewSnapshot() 1462 iter, _ = snap.NewIterWithContext(ctx, nil) 1463 iter.SeekGE([]byte("hello")) 1464 iter.Close() 1465 require.Equal(t, iterTraceString, tracer.buf.String()) 1466 snap.Close() 1467 1468 tracer.buf.Reset() 1469 b := d.NewIndexedBatch() 1470 iter = b.NewIterWithContext(ctx, nil) 1471 iter.SeekGE([]byte("hello")) 1472 iter.Close() 1473 require.Equal(t, iterTraceString, tracer.buf.String()) 1474 b.Close() 1475 } 1476 1477 func TestMemtableIngestInversion(t *testing.T) { 1478 memFS := vfs.NewMem() 1479 opts := &Options{ 1480 FS: memFS, 1481 MemTableSize: 256 << 10, // 4KB 1482 MemTableStopWritesThreshold: 1000, 1483 L0StopWritesThreshold: 1000, 1484 L0CompactionThreshold: 2, 1485 MaxConcurrentCompactions: func() int { 1486 return 1000 1487 }, 1488 } 1489 1490 const channelTimeout = 5 * time.Second 1491 1492 // We induce delay in compactions by passing in an EventListener that stalls on 1493 // the first TableCreated event for a compaction job we want to block. 1494 // FlushBegin and CompactionBegin has info on compaction start/output levels 1495 // which is what we need to identify what compactions to block. However 1496 // FlushBegin and CompactionBegin are called while holding db.mu, so we cannot 1497 // block those events forever. Instead, we grab the job ID from those events 1498 // and store it. Then during TableCreated, we check if we're creating an output 1499 // for a job we have identified earlier as one to block, and then hold on a 1500 // semaphore there until there's a signal from the test code to resume with the 1501 // compaction. 1502 // 1503 // If nextBlockedCompaction is non-zero, we must block the next compaction 1504 // out of the nextBlockedCompaction - 3 start level. 1 means block the next 1505 // intra-L0 compaction and 2 means block the next flush (as flushes have 1506 // a -1 start level). 1507 var nextBlockedCompaction, blockedJobID int 1508 var blockedCompactionsMu sync.Mutex // protects the above two variables. 1509 nextSem := make(chan chan struct{}, 1) 1510 var el EventListener 1511 el.EnsureDefaults(testLogger{t: t}) 1512 el.FlushBegin = func(info FlushInfo) { 1513 blockedCompactionsMu.Lock() 1514 defer blockedCompactionsMu.Unlock() 1515 if nextBlockedCompaction == 2 { 1516 nextBlockedCompaction = 0 1517 blockedJobID = info.JobID 1518 } 1519 } 1520 el.CompactionBegin = func(info CompactionInfo) { 1521 // 0 = block nothing, 1 = block intra-L0 compaction, 2 = block flush, 1522 // 3 = block L0 -> LBase compaction, 4 = block compaction out of L1, and so on. 1523 blockedCompactionsMu.Lock() 1524 defer blockedCompactionsMu.Unlock() 1525 blockValue := info.Input[0].Level + 3 1526 if info.Input[0].Level == 0 && info.Output.Level == 0 { 1527 // Intra L0 compaction, denoted by casValue of 1. 1528 blockValue = 1 1529 } 1530 if nextBlockedCompaction == blockValue { 1531 nextBlockedCompaction = 0 1532 blockedJobID = info.JobID 1533 } 1534 } 1535 el.TableCreated = func(info TableCreateInfo) { 1536 blockedCompactionsMu.Lock() 1537 if info.JobID != blockedJobID { 1538 blockedCompactionsMu.Unlock() 1539 return 1540 } 1541 blockedJobID = 0 1542 blockedCompactionsMu.Unlock() 1543 sem := make(chan struct{}) 1544 nextSem <- sem 1545 <-sem 1546 } 1547 tel := TeeEventListener(MakeLoggingEventListener(testLogger{t: t}), el) 1548 opts.EventListener = &tel 1549 opts.Experimental.L0CompactionConcurrency = 1 1550 d, err := Open("", opts) 1551 require.NoError(t, err) 1552 defer func() { 1553 if d != nil { 1554 require.NoError(t, d.Close()) 1555 } 1556 }() 1557 1558 printLSM := func() { 1559 d.mu.Lock() 1560 s := d.mu.versions.currentVersion().String() 1561 d.mu.Unlock() 1562 t.Logf("%s", s) 1563 } 1564 1565 // Create some sstables. These should go into L6. These are irrelevant for 1566 // the rest of the test. 1567 require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) 1568 require.NoError(t, d.Flush()) 1569 require.NoError(t, d.Set([]byte("d"), []byte("bar"), nil)) 1570 require.NoError(t, d.Flush()) 1571 require.NoError(t, d.Compact([]byte("a"), []byte("z"), true)) 1572 1573 var baseCompactionSem, flushSem, intraL0Sem chan struct{} 1574 // Block an L0 -> LBase compaction. This is necessary to induce intra-L0 1575 // compactions later on. 1576 blockedCompactionsMu.Lock() 1577 nextBlockedCompaction = 3 1578 blockedCompactionsMu.Unlock() 1579 timeoutSem := time.After(channelTimeout) 1580 t.Log("blocking an L0 -> LBase compaction") 1581 // Write an sstable to L0 until we're blocked on an L0 -> LBase compaction. 1582 breakLoop := false 1583 for !breakLoop { 1584 select { 1585 case sem := <-nextSem: 1586 baseCompactionSem = sem 1587 breakLoop = true 1588 case <-timeoutSem: 1589 t.Fatal("did not get blocked on an LBase compaction") 1590 default: 1591 require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) 1592 require.NoError(t, d.Set([]byte("g"), []byte("bar"), nil)) 1593 require.NoError(t, d.Flush()) 1594 time.Sleep(100 * time.Millisecond) 1595 } 1596 } 1597 printLSM() 1598 1599 // Do 4 ingests, one with the key cc, one with bb and cc, and two with just bb. 1600 // The purpose of the sstable containing cc is to inflate the L0 sublevel 1601 // count of the interval at cc, as that's where we want the intra-L0 compaction 1602 // to be seeded. However we also need a file left of that interval to have 1603 // the same (or higher) sublevel to trigger the bug in 1604 // cockroachdb/cockroach#101896. That's why we ingest a file after it to 1605 // "bridge" the bb/cc intervals, and then ingest a file at bb. These go 1606 // into sublevels like this: 1607 // 1608 // bb 1609 // bb 1610 // bb-----cc 1611 // cc 1612 // 1613 // Eventually, we'll drop an ingested file containing a range del starting at 1614 // cc around here: 1615 // 1616 // bb 1617 // bb cc---... 1618 // bb-----cc 1619 // cc 1620 { 1621 path := "ingest1.sst" 1622 f, err := memFS.Create(path) 1623 require.NoError(t, err) 1624 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1625 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1626 }) 1627 require.NoError(t, w.Set([]byte("cc"), []byte("foo"))) 1628 require.NoError(t, w.Close()) 1629 require.NoError(t, d.Ingest([]string{path})) 1630 } 1631 { 1632 path := "ingest2.sst" 1633 f, err := memFS.Create(path) 1634 require.NoError(t, err) 1635 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1636 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1637 }) 1638 require.NoError(t, w.Set([]byte("bb"), []byte("foo2"))) 1639 require.NoError(t, w.Set([]byte("cc"), []byte("foo2"))) 1640 require.NoError(t, w.Close()) 1641 require.NoError(t, d.Ingest([]string{path})) 1642 } 1643 { 1644 path := "ingest3.sst" 1645 f, err := memFS.Create(path) 1646 require.NoError(t, err) 1647 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1648 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1649 }) 1650 require.NoError(t, w.Set([]byte("bb"), []byte("foo3"))) 1651 require.NoError(t, w.Close()) 1652 require.NoError(t, d.Ingest([]string{path})) 1653 } 1654 { 1655 path := "ingest4.sst" 1656 f, err := memFS.Create(path) 1657 require.NoError(t, err) 1658 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1659 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1660 }) 1661 require.NoError(t, w.Set([]byte("bb"), []byte("foo4"))) 1662 require.NoError(t, w.Close()) 1663 require.NoError(t, d.Ingest([]string{path})) 1664 } 1665 1666 // We now have a base compaction blocked. Block a memtable flush to cause 1667 // memtables to queue up. 1668 // 1669 // Memtable (stuck): 1670 // 1671 // b-----------------g 1672 // 1673 // Relevant L0 ssstables 1674 // 1675 // bb 1676 // bb 1677 // bb-----cc 1678 // cc 1679 blockedCompactionsMu.Lock() 1680 nextBlockedCompaction = 2 1681 blockedCompactionsMu.Unlock() 1682 t.Log("blocking a flush") 1683 require.NoError(t, d.Set([]byte("b"), []byte("foo2"), nil)) 1684 require.NoError(t, d.Set([]byte("g"), []byte("bar2"), nil)) 1685 _, _ = d.AsyncFlush() 1686 select { 1687 case sem := <-nextSem: 1688 flushSem = sem 1689 case <-time.After(channelTimeout): 1690 t.Fatal("did not get blocked on a flush") 1691 } 1692 // Add one memtable to flush queue, and finish it off. 1693 // 1694 // Memtables (stuck): 1695 // 1696 // b-----------------g (waiting to flush) 1697 // b-----------------g (flushing, blocked) 1698 // 1699 // Relevant L0 ssstables 1700 // 1701 // bb 1702 // bb 1703 // bb-----cc 1704 // cc 1705 require.NoError(t, d.Set([]byte("b"), []byte("foo3"), nil)) 1706 require.NoError(t, d.Set([]byte("g"), []byte("bar3"), nil)) 1707 // note: this flush will wait for the earlier, blocked flush, but it closes 1708 // off the memtable which is what we want. 1709 _, _ = d.AsyncFlush() 1710 1711 // Open a new mutable memtable. This gets us an earlier earlierUnflushedSeqNum 1712 // than the ingest below it. 1713 require.NoError(t, d.Set([]byte("c"), []byte("somethingbigishappening"), nil)) 1714 // Block an intra-L0 compaction, as one might happen around this time. 1715 blockedCompactionsMu.Lock() 1716 nextBlockedCompaction = 1 1717 blockedCompactionsMu.Unlock() 1718 t.Log("blocking an intra-L0 compaction") 1719 // Ingest a file containing a cc-e rangedel. 1720 // 1721 // Memtables: 1722 // 1723 // c (mutable) 1724 // b-----------------g (waiting to flush) 1725 // b-----------------g (flushing, blocked) 1726 // 1727 // Relevant L0 ssstables 1728 // 1729 // bb 1730 // bb cc-----e (just ingested) 1731 // bb-----cc 1732 // cc 1733 { 1734 path := "ingest5.sst" 1735 f, err := memFS.Create(path) 1736 require.NoError(t, err) 1737 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1738 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1739 }) 1740 require.NoError(t, w.DeleteRange([]byte("cc"), []byte("e"))) 1741 require.NoError(t, w.Close()) 1742 require.NoError(t, d.Ingest([]string{path})) 1743 } 1744 t.Log("main ingest complete") 1745 printLSM() 1746 t.Logf("%s", d.Metrics().String()) 1747 1748 require.NoError(t, d.Set([]byte("d"), []byte("ThisShouldNotBeDeleted"), nil)) 1749 1750 // Do another ingest with a seqnum newer than d. The purpose of this is to 1751 // increase the LargestSeqNum of the intra-L0 compaction output *beyond* 1752 // the flush that contains d=ThisShouldNotBeDeleted, therefore causing 1753 // that point key to be deleted (in the buggy code). 1754 // 1755 // Memtables: 1756 // 1757 // c-----d (mutable) 1758 // b-----------------g (waiting to flush) 1759 // b-----------------g (flushing, blocked) 1760 // 1761 // Relevant L0 ssstables 1762 // 1763 // bb cc 1764 // bb cc-----e (just ingested) 1765 // bb-----cc 1766 // cc 1767 { 1768 path := "ingest6.sst" 1769 f, err := memFS.Create(path) 1770 require.NoError(t, err) 1771 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1772 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1773 }) 1774 require.NoError(t, w.Set([]byte("cc"), []byte("doesntmatter"))) 1775 require.NoError(t, w.Close()) 1776 require.NoError(t, d.Ingest([]string{path})) 1777 } 1778 1779 // Unblock earlier flushes. We will first finish flushing the blocked 1780 // memtable, and end up in this state: 1781 // 1782 // Memtables: 1783 // 1784 // c-----d (mutable) 1785 // b-----------------g (waiting to flush) 1786 // 1787 // Relevant L0 ssstables 1788 // 1789 // b-------------------g (irrelevant, just flushed) 1790 // bb cc (has LargestSeqNum > earliestUnflushedSeqNum) 1791 // bb cc-----e (has a rangedel) 1792 // bb-----cc 1793 // cc 1794 // 1795 // Note that while b----g is relatively old (and so has a low LargestSeqNum), 1796 // it bridges a bunch of intervals. Had we regenerated sublevels from scratch, 1797 // it'd have gone below the cc-e sstable. But due to #101896, we just slapped 1798 // it on top. Now, as long as our seed interval is the one at cc and our seed 1799 // file is the just-flushed L0 sstable, we will go down and include anything 1800 // in that interval even if it has a LargestSeqNum > earliestUnflushedSeqNum. 1801 // 1802 // All asterisked L0 sstables should now get picked in an intra-L0 compaction 1803 // right after the flush finishes, that we then block: 1804 // 1805 // b-------------------g* 1806 // bb* cc* 1807 // bb* cc-----e* 1808 // bb-----cc* 1809 // cc* 1810 t.Log("unblocking flush") 1811 flushSem <- struct{}{} 1812 printLSM() 1813 1814 select { 1815 case sem := <-nextSem: 1816 intraL0Sem = sem 1817 case <-time.After(channelTimeout): 1818 t.Fatal("did not get blocked on an intra L0 compaction") 1819 } 1820 1821 // Ensure all memtables are flushed. This will mean d=ThisShouldNotBeDeleted 1822 // will land in L0 and since that was the last key written to a memtable, 1823 // and the ingestion at cc came after it, the output of the intra-L0 1824 // compaction will elevate the cc-e rangedel above it and delete it 1825 // (if #101896 is not fixed). 1826 ch, _ := d.AsyncFlush() 1827 <-ch 1828 1829 // Unblock earlier intra-L0 compaction. 1830 t.Log("unblocking intraL0") 1831 intraL0Sem <- struct{}{} 1832 printLSM() 1833 1834 // Try reading d a couple times. 1835 for i := 0; i < 2; i++ { 1836 val, closer, err := d.Get([]byte("d")) 1837 require.NoError(t, err) 1838 require.Equal(t, []byte("ThisShouldNotBeDeleted"), val) 1839 if closer != nil { 1840 closer.Close() 1841 } 1842 time.Sleep(100 * time.Millisecond) 1843 } 1844 1845 // Unblock everything. 1846 baseCompactionSem <- struct{}{} 1847 } 1848 1849 func BenchmarkDelete(b *testing.B) { 1850 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1851 const keyCount = 10000 1852 var keys [keyCount][]byte 1853 for i := 0; i < keyCount; i++ { 1854 keys[i] = []byte(strconv.Itoa(rng.Int())) 1855 } 1856 val := bytes.Repeat([]byte("x"), 10) 1857 1858 benchmark := func(b *testing.B, useSingleDelete bool) { 1859 d, err := Open( 1860 "", 1861 &Options{ 1862 FS: vfs.NewMem(), 1863 }) 1864 if err != nil { 1865 b.Fatal(err) 1866 } 1867 defer func() { 1868 if err := d.Close(); err != nil { 1869 b.Fatal(err) 1870 } 1871 }() 1872 1873 b.StartTimer() 1874 for _, key := range keys { 1875 _ = d.Set(key, val, nil) 1876 if useSingleDelete { 1877 _ = d.SingleDelete(key, nil) 1878 } else { 1879 _ = d.Delete(key, nil) 1880 } 1881 } 1882 // Manually flush as it is flushing/compaction where SingleDelete 1883 // performance shows up. With SingleDelete, we can elide all of the 1884 // SingleDelete and Set records. 1885 if err := d.Flush(); err != nil { 1886 b.Fatal(err) 1887 } 1888 b.StopTimer() 1889 } 1890 1891 b.Run("delete", func(b *testing.B) { 1892 for i := 0; i < b.N; i++ { 1893 benchmark(b, false) 1894 } 1895 }) 1896 1897 b.Run("single-delete", func(b *testing.B) { 1898 for i := 0; i < b.N; i++ { 1899 benchmark(b, true) 1900 } 1901 }) 1902 } 1903 1904 func BenchmarkNewIterReadAmp(b *testing.B) { 1905 for _, readAmp := range []int{10, 100, 1000} { 1906 b.Run(strconv.Itoa(readAmp), func(b *testing.B) { 1907 opts := &Options{ 1908 FS: vfs.NewMem(), 1909 L0StopWritesThreshold: 1000, 1910 } 1911 opts.DisableAutomaticCompactions = true 1912 1913 d, err := Open("", opts) 1914 require.NoError(b, err) 1915 1916 for i := 0; i < readAmp; i++ { 1917 require.NoError(b, d.Set([]byte("a"), []byte("b"), NoSync)) 1918 require.NoError(b, d.Flush()) 1919 } 1920 1921 require.Equal(b, d.Metrics().ReadAmp(), readAmp) 1922 1923 b.StopTimer() 1924 b.ResetTimer() 1925 for i := 0; i < b.N; i++ { 1926 b.StartTimer() 1927 iter, _ := d.NewIter(nil) 1928 b.StopTimer() 1929 require.NoError(b, iter.Close()) 1930 } 1931 1932 require.NoError(b, d.Close()) 1933 }) 1934 } 1935 } 1936 1937 func verifyGet(t *testing.T, r Reader, key, expected []byte) { 1938 val, closer, err := r.Get(key) 1939 require.NoError(t, err) 1940 if !bytes.Equal(expected, val) { 1941 t.Fatalf("expected %s, but got %s", expected, val) 1942 } 1943 closer.Close() 1944 } 1945 1946 func verifyGetNotFound(t *testing.T, r Reader, key []byte) { 1947 val, _, err := r.Get(key) 1948 if err != base.ErrNotFound { 1949 t.Fatalf("expected nil, but got %s", val) 1950 } 1951 } 1952 1953 func BenchmarkRotateMemtables(b *testing.B) { 1954 o := &Options{FS: vfs.NewMem(), MemTableSize: 64 << 20 /* 64 MB */} 1955 d, err := Open("", o) 1956 require.NoError(b, err) 1957 1958 // We want to jump to full-sized memtables. 1959 d.mu.Lock() 1960 d.mu.mem.nextSize = o.MemTableSize 1961 d.mu.Unlock() 1962 require.NoError(b, d.Flush()) 1963 1964 b.ResetTimer() 1965 for i := 0; i < b.N; i++ { 1966 if err := d.Flush(); err != nil { 1967 b.Fatal(err) 1968 } 1969 } 1970 }