github.com/cockroachdb/pebble@v1.1.2/db_test.go (about) 1 // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "path/filepath" 13 "runtime" 14 "sort" 15 "strconv" 16 "strings" 17 "sync" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/errors" 23 "github.com/cockroachdb/fifo" 24 "github.com/cockroachdb/pebble/internal/base" 25 "github.com/cockroachdb/pebble/internal/cache" 26 "github.com/cockroachdb/pebble/internal/invariants" 27 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 28 "github.com/cockroachdb/pebble/sstable" 29 "github.com/cockroachdb/pebble/vfs" 30 "github.com/stretchr/testify/require" 31 "golang.org/x/exp/rand" 32 ) 33 34 // try repeatedly calls f, sleeping between calls with exponential back-off, 35 // until f returns a nil error or the total sleep time is greater than or equal 36 // to maxTotalSleep. It always calls f at least once. 37 func try(initialSleep, maxTotalSleep time.Duration, f func() error) error { 38 totalSleep := time.Duration(0) 39 for d := initialSleep; ; d *= 2 { 40 time.Sleep(d) 41 totalSleep += d 42 if err := f(); err == nil || totalSleep >= maxTotalSleep { 43 return err 44 } 45 } 46 } 47 48 func TestTry(t *testing.T) { 49 c := make(chan struct{}) 50 go func() { 51 time.Sleep(1 * time.Millisecond) 52 close(c) 53 }() 54 55 attemptsMu := sync.Mutex{} 56 attempts := 0 57 58 err := try(100*time.Microsecond, 20*time.Second, func() error { 59 attemptsMu.Lock() 60 attempts++ 61 attemptsMu.Unlock() 62 63 select { 64 default: 65 return errors.New("timed out") 66 case <-c: 67 return nil 68 } 69 }) 70 require.NoError(t, err) 71 72 attemptsMu.Lock() 73 a := attempts 74 attemptsMu.Unlock() 75 76 if a == 0 { 77 t.Fatalf("attempts: got 0, want > 0") 78 } 79 } 80 81 func TestBasicReads(t *testing.T) { 82 testCases := []struct { 83 dirname string 84 wantMap map[string]string 85 }{ 86 { 87 "db-stage-1", 88 map[string]string{ 89 "aaa": "", 90 "bar": "", 91 "baz": "", 92 "foo": "", 93 "quux": "", 94 "zzz": "", 95 }, 96 }, 97 { 98 "db-stage-2", 99 map[string]string{ 100 "aaa": "", 101 "bar": "", 102 "baz": "three", 103 "foo": "four", 104 "quux": "", 105 "zzz": "", 106 }, 107 }, 108 { 109 "db-stage-3", 110 map[string]string{ 111 "aaa": "", 112 "bar": "", 113 "baz": "three", 114 "foo": "four", 115 "quux": "", 116 "zzz": "", 117 }, 118 }, 119 { 120 "db-stage-4", 121 map[string]string{ 122 "aaa": "", 123 "bar": "", 124 "baz": "", 125 "foo": "five", 126 "quux": "six", 127 "zzz": "", 128 }, 129 }, 130 } 131 for _, tc := range testCases { 132 t.Run(tc.dirname, func(t *testing.T) { 133 fs := vfs.NewMem() 134 _, err := vfs.Clone(vfs.Default, fs, filepath.Join("testdata", tc.dirname), tc.dirname) 135 if err != nil { 136 t.Fatalf("%s: cloneFileSystem failed: %v", tc.dirname, err) 137 } 138 d, err := Open(tc.dirname, testingRandomized(t, &Options{ 139 FS: fs, 140 })) 141 if err != nil { 142 t.Fatalf("%s: Open failed: %v", tc.dirname, err) 143 } 144 for key, want := range tc.wantMap { 145 got, closer, err := d.Get([]byte(key)) 146 if err != nil && err != ErrNotFound { 147 t.Fatalf("%s: Get(%q) failed: %v", tc.dirname, key, err) 148 } 149 if string(got) != string(want) { 150 t.Fatalf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want) 151 } 152 if closer != nil { 153 closer.Close() 154 } 155 } 156 err = d.Close() 157 if err != nil { 158 t.Fatalf("%s: Close failed: %v", tc.dirname, err) 159 } 160 }) 161 } 162 } 163 164 func TestBasicWrites(t *testing.T) { 165 d, err := Open("", testingRandomized(t, &Options{ 166 FS: vfs.NewMem(), 167 })) 168 require.NoError(t, err) 169 170 names := []string{ 171 "Alatar", 172 "Gandalf", 173 "Pallando", 174 "Radagast", 175 "Saruman", 176 "Joe", 177 } 178 wantMap := map[string]string{} 179 180 inBatch, batch, pending := false, &Batch{}, [][]string(nil) 181 set0 := func(k, v string) error { 182 return d.Set([]byte(k), []byte(v), nil) 183 } 184 del0 := func(k string) error { 185 return d.Delete([]byte(k), nil) 186 } 187 set1 := func(k, v string) error { 188 batch.Set([]byte(k), []byte(v), nil) 189 return nil 190 } 191 del1 := func(k string) error { 192 batch.Delete([]byte(k), nil) 193 return nil 194 } 195 set, del := set0, del0 196 197 testCases := []string{ 198 "set Gandalf Grey", 199 "set Saruman White", 200 "set Radagast Brown", 201 "delete Saruman", 202 "set Gandalf White", 203 "batch", 204 " set Alatar AliceBlue", 205 "apply", 206 "delete Pallando", 207 "set Alatar AntiqueWhite", 208 "set Pallando PapayaWhip", 209 "batch", 210 "apply", 211 "set Pallando PaleVioletRed", 212 "batch", 213 " delete Alatar", 214 " set Gandalf GhostWhite", 215 " set Saruman Seashell", 216 " delete Saruman", 217 " set Saruman SeaGreen", 218 " set Radagast RosyBrown", 219 " delete Pallando", 220 "apply", 221 "delete Radagast", 222 "delete Radagast", 223 "delete Radagast", 224 "set Gandalf Goldenrod", 225 "set Pallando PeachPuff", 226 "batch", 227 " delete Joe", 228 " delete Saruman", 229 " delete Radagast", 230 " delete Pallando", 231 " delete Gandalf", 232 " delete Alatar", 233 "apply", 234 "set Joe Plumber", 235 } 236 for i, tc := range testCases { 237 s := strings.Split(strings.TrimSpace(tc), " ") 238 switch s[0] { 239 case "set": 240 if err := set(s[1], s[2]); err != nil { 241 t.Fatalf("#%d %s: %v", i, tc, err) 242 } 243 if inBatch { 244 pending = append(pending, s) 245 } else { 246 wantMap[s[1]] = s[2] 247 } 248 case "delete": 249 if err := del(s[1]); err != nil { 250 t.Fatalf("#%d %s: %v", i, tc, err) 251 } 252 if inBatch { 253 pending = append(pending, s) 254 } else { 255 delete(wantMap, s[1]) 256 } 257 case "batch": 258 inBatch, batch, set, del = true, &Batch{}, set1, del1 259 case "apply": 260 if err := d.Apply(batch, nil); err != nil { 261 t.Fatalf("#%d %s: %v", i, tc, err) 262 } 263 for _, p := range pending { 264 switch p[0] { 265 case "set": 266 wantMap[p[1]] = p[2] 267 case "delete": 268 delete(wantMap, p[1]) 269 } 270 } 271 inBatch, pending, set, del = false, nil, set0, del0 272 default: 273 t.Fatalf("#%d %s: bad test case: %q", i, tc, s) 274 } 275 276 fail := false 277 for _, name := range names { 278 g, closer, err := d.Get([]byte(name)) 279 if err != nil && err != ErrNotFound { 280 t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err) 281 fail = true 282 } 283 got, gOK := string(g), err == nil 284 want, wOK := wantMap[name] 285 if got != want || gOK != wOK { 286 t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t", 287 i, tc, name, got, gOK, want, wOK) 288 fail = true 289 } 290 if closer != nil { 291 closer.Close() 292 } 293 } 294 if fail { 295 return 296 } 297 } 298 299 require.NoError(t, d.Close()) 300 } 301 302 func TestRandomWrites(t *testing.T) { 303 d, err := Open("", testingRandomized(t, &Options{ 304 FS: vfs.NewMem(), 305 MemTableSize: 8 * 1024, 306 })) 307 require.NoError(t, err) 308 309 keys := [64][]byte{} 310 wants := [64]int{} 311 for k := range keys { 312 keys[k] = []byte(strconv.Itoa(k)) 313 wants[k] = -1 314 } 315 xxx := bytes.Repeat([]byte("x"), 512) 316 317 rng := rand.New(rand.NewSource(123)) 318 const N = 1000 319 for i := 0; i < N; i++ { 320 k := rng.Intn(len(keys)) 321 if rng.Intn(20) != 0 { 322 wants[k] = rng.Intn(len(xxx) + 1) 323 if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil { 324 t.Fatalf("i=%d: Set: %v", i, err) 325 } 326 } else { 327 wants[k] = -1 328 if err := d.Delete(keys[k], nil); err != nil { 329 t.Fatalf("i=%d: Delete: %v", i, err) 330 } 331 } 332 333 if i != N-1 || rng.Intn(50) != 0 { 334 continue 335 } 336 for k := range keys { 337 got := -1 338 if v, closer, err := d.Get(keys[k]); err != nil { 339 if err != ErrNotFound { 340 t.Fatalf("Get: %v", err) 341 } 342 } else { 343 got = len(v) 344 closer.Close() 345 } 346 if got != wants[k] { 347 t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k]) 348 } 349 } 350 } 351 352 require.NoError(t, d.Close()) 353 } 354 355 func TestLargeBatch(t *testing.T) { 356 d, err := Open("", testingRandomized(t, &Options{ 357 FS: vfs.NewMem(), 358 MemTableSize: 1400, 359 MemTableStopWritesThreshold: 100, 360 })) 361 require.NoError(t, err) 362 363 verifyLSM := func(expected string) func() error { 364 return func() error { 365 d.mu.Lock() 366 s := d.mu.versions.currentVersion().String() 367 d.mu.Unlock() 368 if expected != s { 369 if testing.Verbose() { 370 fmt.Println(strings.TrimSpace(s)) 371 } 372 return errors.Errorf("expected %s, but found %s", expected, s) 373 } 374 return nil 375 } 376 } 377 378 logNum := func() base.DiskFileNum { 379 d.mu.Lock() 380 defer d.mu.Unlock() 381 return d.mu.log.queue[len(d.mu.log.queue)-1].fileNum 382 } 383 fileSize := func(fileNum base.DiskFileNum) int64 { 384 info, err := d.opts.FS.Stat(base.MakeFilepath(d.opts.FS, "", fileTypeLog, fileNum)) 385 require.NoError(t, err) 386 return info.Size() 387 } 388 memTableCreationSeqNum := func() uint64 { 389 d.mu.Lock() 390 defer d.mu.Unlock() 391 return d.mu.mem.mutable.logSeqNum 392 } 393 394 startLogNum := logNum() 395 startLogStartSize := fileSize(startLogNum) 396 startSeqNum := d.mu.versions.logSeqNum.Load() 397 398 // Write a key with a value larger than the memtable size. 399 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("a"), 512), nil)) 400 401 // Verify that the large batch was written to the WAL that existed before it 402 // was committed. We verify that WAL rotation occurred, where the large batch 403 // was written to, and that the new WAL is empty. 404 endLogNum := logNum() 405 if startLogNum == endLogNum { 406 t.Fatal("expected WAL rotation") 407 } 408 startLogEndSize := fileSize(startLogNum) 409 if startLogEndSize == startLogStartSize { 410 t.Fatalf("expected large batch to be written to %s.log, but file size unchanged at %d", 411 startLogNum, startLogEndSize) 412 } 413 endLogSize := fileSize(endLogNum) 414 if endLogSize != 0 { 415 t.Fatalf("expected %s.log to be empty, but found %d", endLogNum, endLogSize) 416 } 417 if creationSeqNum := memTableCreationSeqNum(); creationSeqNum <= startSeqNum { 418 t.Fatalf("expected memTable.logSeqNum=%d > largeBatch.seqNum=%d", creationSeqNum, startSeqNum) 419 } 420 421 // Verify this results in one L0 table being created. 422 require.NoError(t, try(100*time.Microsecond, 20*time.Second, 423 verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n"))) 424 425 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 512), nil)) 426 427 // Verify this results in a second L0 table being created. 428 require.NoError(t, try(100*time.Microsecond, 20*time.Second, 429 verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n 000007:[b#11,SET-b#11,SET]\n"))) 430 431 // Allocate a bunch of batches to exhaust the batchPool. None of these 432 // batches should have a non-zero count. 433 for i := 0; i < 10; i++ { 434 b := d.NewBatch() 435 require.EqualValues(t, 0, b.Count()) 436 } 437 438 require.NoError(t, d.Close()) 439 } 440 441 func TestGetNoCache(t *testing.T) { 442 cache := NewCache(0) 443 defer cache.Unref() 444 445 d, err := Open("", testingRandomized(t, &Options{ 446 Cache: cache, 447 FS: vfs.NewMem(), 448 })) 449 require.NoError(t, err) 450 451 require.NoError(t, d.Set([]byte("a"), []byte("aa"), nil)) 452 require.NoError(t, d.Flush()) 453 verifyGet(t, d, []byte("a"), []byte("aa")) 454 455 require.NoError(t, d.Close()) 456 } 457 458 func TestGetMerge(t *testing.T) { 459 d, err := Open("", testingRandomized(t, &Options{ 460 FS: vfs.NewMem(), 461 })) 462 require.NoError(t, err) 463 464 key := []byte("a") 465 verify := func(expected string) { 466 val, closer, err := d.Get(key) 467 require.NoError(t, err) 468 469 if expected != string(val) { 470 t.Fatalf("expected %s, but got %s", expected, val) 471 } 472 closer.Close() 473 } 474 475 const val = "1" 476 for i := 1; i <= 3; i++ { 477 require.NoError(t, d.Merge(key, []byte(val), nil)) 478 479 expected := strings.Repeat(val, i) 480 verify(expected) 481 482 require.NoError(t, d.Flush()) 483 verify(expected) 484 } 485 486 require.NoError(t, d.Close()) 487 } 488 489 func TestMergeOrderSameAfterFlush(t *testing.T) { 490 // Ensure compaction iterator (used by flush) and user iterator process merge 491 // operands in the same order 492 d, err := Open("", testingRandomized(t, &Options{ 493 FS: vfs.NewMem(), 494 })) 495 require.NoError(t, err) 496 497 key := []byte("a") 498 verify := func(expected string) { 499 iter, _ := d.NewIter(nil) 500 if !iter.SeekGE([]byte("a")) { 501 t.Fatal("expected one value, but got empty iterator") 502 } 503 if expected != string(iter.Value()) { 504 t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) 505 } 506 if !iter.SeekLT([]byte("b")) { 507 t.Fatal("expected one value, but got empty iterator") 508 } 509 if expected != string(iter.Value()) { 510 t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) 511 } 512 require.NoError(t, iter.Close()) 513 } 514 515 require.NoError(t, d.Merge(key, []byte("0"), nil)) 516 require.NoError(t, d.Merge(key, []byte("1"), nil)) 517 518 verify("01") 519 require.NoError(t, d.Flush()) 520 verify("01") 521 522 require.NoError(t, d.Close()) 523 } 524 525 type closableMerger struct { 526 lastBuf []byte 527 closed bool 528 } 529 530 func (m *closableMerger) MergeNewer(value []byte) error { 531 m.lastBuf = append(m.lastBuf[:0], value...) 532 return nil 533 } 534 535 func (m *closableMerger) MergeOlder(value []byte) error { 536 m.lastBuf = append(m.lastBuf[:0], value...) 537 return nil 538 } 539 540 func (m *closableMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { 541 return m.lastBuf, m, nil 542 } 543 544 func (m *closableMerger) Close() error { 545 m.closed = true 546 return nil 547 } 548 549 func TestMergerClosing(t *testing.T) { 550 m := &closableMerger{} 551 552 d, err := Open("", testingRandomized(t, &Options{ 553 FS: vfs.NewMem(), 554 Merger: &Merger{ 555 Merge: func(key, value []byte) (base.ValueMerger, error) { 556 return m, m.MergeNewer(value) 557 }, 558 }, 559 })) 560 require.NoError(t, err) 561 562 defer func() { 563 require.NoError(t, d.Close()) 564 }() 565 566 err = d.Merge([]byte("a"), []byte("b"), nil) 567 require.NoError(t, err) 568 require.False(t, m.closed) 569 570 val, closer, err := d.Get([]byte("a")) 571 require.NoError(t, err) 572 require.Equal(t, []byte("b"), val) 573 require.NotNil(t, closer) 574 require.False(t, m.closed) 575 _ = closer.Close() 576 require.True(t, m.closed) 577 } 578 579 func TestLogData(t *testing.T) { 580 d, err := Open("", testingRandomized(t, &Options{ 581 FS: vfs.NewMem(), 582 })) 583 require.NoError(t, err) 584 585 defer func() { 586 require.NoError(t, d.Close()) 587 }() 588 589 require.NoError(t, d.LogData([]byte("foo"), Sync)) 590 require.NoError(t, d.LogData([]byte("bar"), Sync)) 591 // TODO(itsbilal): Confirm that we wrote some bytes to the WAL. 592 // For now, LogData proceeding ahead without a panic is good enough. 593 } 594 595 func TestSingleDeleteGet(t *testing.T) { 596 d, err := Open("", testingRandomized(t, &Options{ 597 FS: vfs.NewMem(), 598 })) 599 require.NoError(t, err) 600 defer func() { 601 require.NoError(t, d.Close()) 602 }() 603 604 key := []byte("key") 605 val := []byte("val") 606 607 require.NoError(t, d.Set(key, val, nil)) 608 verifyGet(t, d, key, val) 609 610 key2 := []byte("key2") 611 val2 := []byte("val2") 612 613 require.NoError(t, d.Set(key2, val2, nil)) 614 verifyGet(t, d, key2, val2) 615 616 require.NoError(t, d.SingleDelete(key2, nil)) 617 verifyGetNotFound(t, d, key2) 618 } 619 620 func TestSingleDeleteFlush(t *testing.T) { 621 d, err := Open("", testingRandomized(t, &Options{ 622 FS: vfs.NewMem(), 623 })) 624 require.NoError(t, err) 625 defer func() { 626 require.NoError(t, d.Close()) 627 }() 628 629 key := []byte("key") 630 valFirst := []byte("first") 631 valSecond := []byte("second") 632 key2 := []byte("key2") 633 val2 := []byte("val2") 634 635 require.NoError(t, d.Set(key, valFirst, nil)) 636 require.NoError(t, d.Set(key2, val2, nil)) 637 require.NoError(t, d.Flush()) 638 639 require.NoError(t, d.SingleDelete(key, nil)) 640 require.NoError(t, d.Set(key, valSecond, nil)) 641 require.NoError(t, d.Delete(key2, nil)) 642 require.NoError(t, d.Set(key2, val2, nil)) 643 require.NoError(t, d.Flush()) 644 645 require.NoError(t, d.SingleDelete(key, nil)) 646 require.NoError(t, d.Delete(key2, nil)) 647 require.NoError(t, d.Flush()) 648 649 verifyGetNotFound(t, d, key) 650 verifyGetNotFound(t, d, key2) 651 } 652 653 func TestUnremovableSingleDelete(t *testing.T) { 654 d, err := Open("", testingRandomized(t, &Options{ 655 FS: vfs.NewMem(), 656 L0CompactionThreshold: 8, 657 })) 658 require.NoError(t, err) 659 defer func() { 660 require.NoError(t, d.Close()) 661 }() 662 663 key := []byte("key") 664 valFirst := []byte("valFirst") 665 valSecond := []byte("valSecond") 666 667 require.NoError(t, d.Set(key, valFirst, nil)) 668 ss := d.NewSnapshot() 669 defer ss.Close() 670 require.NoError(t, d.SingleDelete(key, nil)) 671 require.NoError(t, d.Set(key, valSecond, nil)) 672 require.NoError(t, d.Flush()) 673 674 verifyGet(t, ss, key, valFirst) 675 verifyGet(t, d, key, valSecond) 676 677 require.NoError(t, d.SingleDelete(key, nil)) 678 679 verifyGet(t, ss, key, valFirst) 680 verifyGetNotFound(t, d, key) 681 682 require.NoError(t, d.Flush()) 683 684 verifyGet(t, ss, key, valFirst) 685 verifyGetNotFound(t, d, key) 686 } 687 688 func TestIterLeak(t *testing.T) { 689 for _, leak := range []bool{true, false} { 690 t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { 691 for _, flush := range []bool{true, false} { 692 t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { 693 d, err := Open("", testingRandomized(t, &Options{ 694 FS: vfs.NewMem(), 695 })) 696 require.NoError(t, err) 697 698 require.NoError(t, d.Set([]byte("a"), []byte("a"), nil)) 699 if flush { 700 require.NoError(t, d.Flush()) 701 } 702 iter, _ := d.NewIter(nil) 703 iter.First() 704 if !leak { 705 require.NoError(t, iter.Close()) 706 require.NoError(t, d.Close()) 707 } else { 708 defer iter.Close() 709 if err := d.Close(); err == nil { 710 t.Fatalf("expected failure, but found success") 711 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 712 t.Fatalf("expected leaked iterators, but found %+v", err) 713 } else { 714 t.Log(err.Error()) 715 } 716 } 717 }) 718 } 719 }) 720 } 721 } 722 723 // Make sure that we detect an iter leak when only one DB closes 724 // while the second db still holds a reference to the TableCache. 725 func TestIterLeakSharedCache(t *testing.T) { 726 for _, leak := range []bool{true, false} { 727 t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { 728 for _, flush := range []bool{true, false} { 729 t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { 730 d1, err := Open("", &Options{ 731 FS: vfs.NewMem(), 732 }) 733 require.NoError(t, err) 734 735 d2, err := Open("", &Options{ 736 FS: vfs.NewMem(), 737 }) 738 require.NoError(t, err) 739 740 require.NoError(t, d1.Set([]byte("a"), []byte("a"), nil)) 741 if flush { 742 require.NoError(t, d1.Flush()) 743 } 744 745 require.NoError(t, d2.Set([]byte("a"), []byte("a"), nil)) 746 if flush { 747 require.NoError(t, d2.Flush()) 748 } 749 750 // Check if leak detection works with only one db closing. 751 { 752 iter1, _ := d1.NewIter(nil) 753 iter1.First() 754 if !leak { 755 require.NoError(t, iter1.Close()) 756 require.NoError(t, d1.Close()) 757 } else { 758 defer iter1.Close() 759 if err := d1.Close(); err == nil { 760 t.Fatalf("expected failure, but found success") 761 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 762 t.Fatalf("expected leaked iterators, but found %+v", err) 763 } else { 764 t.Log(err.Error()) 765 } 766 } 767 } 768 769 { 770 iter2, _ := d2.NewIter(nil) 771 iter2.First() 772 if !leak { 773 require.NoError(t, iter2.Close()) 774 require.NoError(t, d2.Close()) 775 } else { 776 defer iter2.Close() 777 if err := d2.Close(); err == nil { 778 t.Fatalf("expected failure, but found success") 779 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 780 t.Fatalf("expected leaked iterators, but found %+v", err) 781 } else { 782 t.Log(err.Error()) 783 } 784 } 785 } 786 787 }) 788 } 789 }) 790 } 791 } 792 793 func TestMemTableReservation(t *testing.T) { 794 opts := &Options{ 795 Cache: NewCache(128 << 10 /* 128 KB */), 796 MemTableSize: initialMemTableSize, 797 FS: vfs.NewMem(), 798 } 799 defer opts.Cache.Unref() 800 opts.testingRandomized(t) 801 opts.EnsureDefaults() 802 // We're going to be looking at and asserting the global memtable reservation 803 // amount below so we don't want to race with any triggered stats collections. 804 opts.private.disableTableStats = true 805 806 // Add a block to the cache. Note that the memtable size is larger than the 807 // cache size, so opening the DB should cause this block to be evicted. 808 tmpID := opts.Cache.NewID() 809 helloWorld := []byte("hello world") 810 value := cache.Alloc(len(helloWorld)) 811 copy(value.Buf(), helloWorld) 812 opts.Cache.Set(tmpID, base.FileNum(0).DiskFileNum(), 0, value).Release() 813 814 d, err := Open("", opts) 815 require.NoError(t, err) 816 817 checkReserved := func(expected int64) { 818 t.Helper() 819 if reserved := d.memTableReserved.Load(); expected != reserved { 820 t.Fatalf("expected %d reserved, but found %d", expected, reserved) 821 } 822 } 823 824 checkReserved(int64(opts.MemTableSize)) 825 if refs := d.mu.mem.queue[len(d.mu.mem.queue)-1].readerRefs.Load(); refs != 2 { 826 t.Fatalf("expected 2 refs, but found %d", refs) 827 } 828 // Verify the memtable reservation has caused our test block to be evicted. 829 if h := opts.Cache.Get(tmpID, base.FileNum(0).DiskFileNum(), 0); h.Get() != nil { 830 t.Fatalf("expected failure, but found success: %s", h.Get()) 831 } 832 833 // Flush the memtable. The memtable reservation should double because old 834 // memtable will be recycled, saved for the next memtable allocation. 835 require.NoError(t, d.Flush()) 836 checkReserved(int64(2 * opts.MemTableSize)) 837 // Flush again. The memtable reservation should be unchanged because at most 838 // 1 memtable may be preserved for recycling. 839 840 // Flush in the presence of an active iterator. The iterator will hold a 841 // reference to a readState which will in turn hold a reader reference to the 842 // memtable. 843 iter, _ := d.NewIter(nil) 844 require.NoError(t, d.Flush()) 845 // The flush moved the recycled memtable into position as an active mutable 846 // memtable. There are now two allocated memtables: 1 mutable and 1 pinned 847 // by the iterator's read state. 848 checkReserved(2 * int64(opts.MemTableSize)) 849 850 // Flushing again should increase the reservation total to 3x: 1 active 851 // mutable, 1 for recycling, 1 pinned by iterator's read state. 852 require.NoError(t, d.Flush()) 853 checkReserved(3 * int64(opts.MemTableSize)) 854 855 // Closing the iterator will release the iterator's read state, and the old 856 // memtable will be moved into position as the next memtable to recycle. 857 // There was already a memtable ready to be recycled, so that memtable will 858 // be freed and the overall reservation total is reduced to 2x. 859 require.NoError(t, iter.Close()) 860 checkReserved(2 * int64(opts.MemTableSize)) 861 862 require.NoError(t, d.Close()) 863 } 864 865 func TestMemTableReservationLeak(t *testing.T) { 866 d, err := Open("", &Options{FS: vfs.NewMem()}) 867 require.NoError(t, err) 868 869 d.mu.Lock() 870 last := d.mu.mem.queue[len(d.mu.mem.queue)-1] 871 last.readerRef() 872 defer func() { 873 last.readerUnref(true) 874 }() 875 d.mu.Unlock() 876 if err := d.Close(); err == nil { 877 t.Fatalf("expected failure, but found success") 878 } else if !strings.HasPrefix(err.Error(), "leaked memtable reservation:") { 879 t.Fatalf("expected leaked memtable reservation, but found %+v", err) 880 } else { 881 t.Log(err.Error()) 882 } 883 } 884 885 func TestCacheEvict(t *testing.T) { 886 cache := NewCache(10 << 20) 887 defer cache.Unref() 888 889 d, err := Open("", &Options{ 890 Cache: cache, 891 FS: vfs.NewMem(), 892 }) 893 require.NoError(t, err) 894 895 for i := 0; i < 1000; i++ { 896 key := []byte(fmt.Sprintf("%04d", i)) 897 require.NoError(t, d.Set(key, key, nil)) 898 } 899 900 require.NoError(t, d.Flush()) 901 iter, _ := d.NewIter(nil) 902 for iter.First(); iter.Valid(); iter.Next() { 903 } 904 require.NoError(t, iter.Close()) 905 906 if size := cache.Size(); size == 0 { 907 t.Fatalf("expected non-zero cache size") 908 } 909 910 for i := 0; i < 1000; i++ { 911 key := []byte(fmt.Sprintf("%04d", i)) 912 require.NoError(t, d.Delete(key, nil)) 913 } 914 915 require.NoError(t, d.Compact([]byte("0"), []byte("1"), false)) 916 917 require.NoError(t, d.Close()) 918 919 if size := cache.Size(); size != 0 { 920 t.Fatalf("expected empty cache, but found %d", size) 921 } 922 } 923 924 func TestFlushEmpty(t *testing.T) { 925 d, err := Open("", testingRandomized(t, &Options{ 926 FS: vfs.NewMem(), 927 })) 928 require.NoError(t, err) 929 930 // Flushing an empty memtable should not fail. 931 require.NoError(t, d.Flush()) 932 require.NoError(t, d.Close()) 933 } 934 935 func TestRollManifest(t *testing.T) { 936 toPreserve := rand.Int31n(5) + 1 937 opts := &Options{ 938 MaxManifestFileSize: 1, 939 L0CompactionThreshold: 10, 940 L0StopWritesThreshold: 1000, 941 FS: vfs.NewMem(), 942 NumPrevManifest: int(toPreserve), 943 } 944 opts.DisableAutomaticCompactions = true 945 opts.testingRandomized(t) 946 d, err := Open("", opts) 947 require.NoError(t, err) 948 949 manifestFileNumber := func() FileNum { 950 d.mu.Lock() 951 defer d.mu.Unlock() 952 return d.mu.versions.manifestFileNum 953 } 954 sizeRolloverState := func() (int64, int64) { 955 d.mu.Lock() 956 defer d.mu.Unlock() 957 return d.mu.versions.rotationHelper.DebugInfo() 958 } 959 960 current := func() string { 961 desc, err := Peek(d.dirname, d.opts.FS) 962 require.NoError(t, err) 963 return desc.ManifestFilename 964 } 965 966 lastManifestNum := manifestFileNumber() 967 manifestNums := []base.FileNum{lastManifestNum} 968 for i := 0; i < 5; i++ { 969 // MaxManifestFileSize is 1, but the rollover logic also counts edits 970 // since the last snapshot to decide on rollover, so do as many flushes as 971 // it demands. 972 lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() 973 var expectedLastSnapshotCount, expectedEditsSinceSnapshotCount int64 974 switch i { 975 case 0: 976 // DB is empty. 977 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 0 978 case 1: 979 // First edit that caused rollover is not in the snapshot. 980 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 1 981 case 2: 982 // One flush is in the snapshot. One flush in the edit. 983 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 1, 1 984 case 3: 985 // Two flushes in the snapshot. One flush in the edit. Will need to do 986 // two more flushes, the first of which will be in the next snapshot. 987 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 2, 1 988 case 4: 989 // Four flushes in the snapshot. One flush in the edit. Will need to do 990 // four more flushes, three of which will be in the snapshot. 991 expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 4, 1 992 } 993 require.Equal(t, expectedLastSnapshotCount, lastSnapshotCount) 994 require.Equal(t, expectedEditsSinceSnapshotCount, editsSinceSnapshotCount) 995 // Number of flushes to do to trigger the rollover. 996 steps := int(lastSnapshotCount - editsSinceSnapshotCount + 1) 997 // Steps can be <= 0, but we need to do at least one edit to trigger the 998 // rollover logic. 999 if steps <= 0 { 1000 steps = 1 1001 } 1002 for j := 0; j < steps; j++ { 1003 require.NoError(t, d.Set([]byte("a"), nil, nil)) 1004 require.NoError(t, d.Flush()) 1005 } 1006 d.TestOnlyWaitForCleaning() 1007 num := manifestFileNumber() 1008 if lastManifestNum == num { 1009 t.Fatalf("manifest failed to roll %d: %d == %d", i, lastManifestNum, num) 1010 } 1011 1012 manifestNums = append(manifestNums, num) 1013 lastManifestNum = num 1014 1015 expectedCurrent := fmt.Sprintf("MANIFEST-%s", lastManifestNum) 1016 if v := current(); expectedCurrent != v { 1017 t.Fatalf("expected %s, but found %s", expectedCurrent, v) 1018 } 1019 } 1020 lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() 1021 require.EqualValues(t, 8, lastSnapshotCount) 1022 require.EqualValues(t, 1, editsSinceSnapshotCount) 1023 1024 files, err := d.opts.FS.List("") 1025 require.NoError(t, err) 1026 1027 var manifests []string 1028 for _, filename := range files { 1029 fileType, _, ok := base.ParseFilename(d.opts.FS, filename) 1030 if !ok { 1031 continue 1032 } 1033 if fileType == fileTypeManifest { 1034 manifests = append(manifests, filename) 1035 } 1036 } 1037 1038 sort.Slice(manifests, func(i, j int) bool { 1039 return manifests[i] < manifests[j] 1040 }) 1041 1042 var expected []string 1043 for i := len(manifestNums) - int(toPreserve) - 1; i < len(manifestNums); i++ { 1044 expected = append( 1045 expected, 1046 fmt.Sprintf("MANIFEST-%s", manifestNums[i]), 1047 ) 1048 } 1049 require.EqualValues(t, expected, manifests) 1050 1051 // Test the logic that uses the future snapshot size to rollover. 1052 // Reminder: we have a snapshot with 8 files and the manifest has 1 edit 1053 // (flush) with 1 file. 1054 // Add 8 more files with a different key. 1055 lastManifestNum = manifestFileNumber() 1056 for j := 0; j < 8; j++ { 1057 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1058 require.NoError(t, d.Flush()) 1059 } 1060 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1061 // Need 16 more files in edits to trigger a rollover. 1062 require.EqualValues(t, 16, lastSnapshotCount) 1063 require.EqualValues(t, 1, editsSinceSnapshotCount) 1064 require.NotEqual(t, manifestFileNumber(), lastManifestNum) 1065 lastManifestNum = manifestFileNumber() 1066 // Do a compaction that moves 8 of the files from L0 to 1 file in L6. This 1067 // adds 9 files in edits. We still need 6 more files in edits based on the 1068 // last snapshot. But the current version has only 9 L0 files and 1 L6 file, 1069 // for a total of 10 files. So 1 flush should push us over that threshold. 1070 d.Compact([]byte("c"), []byte("d"), false) 1071 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1072 require.EqualValues(t, 16, lastSnapshotCount) 1073 require.EqualValues(t, 10, editsSinceSnapshotCount) 1074 require.Equal(t, manifestFileNumber(), lastManifestNum) 1075 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1076 require.NoError(t, d.Flush()) 1077 lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() 1078 require.EqualValues(t, 10, lastSnapshotCount) 1079 require.EqualValues(t, 1, editsSinceSnapshotCount) 1080 require.NotEqual(t, manifestFileNumber(), lastManifestNum) 1081 1082 require.NoError(t, d.Close()) 1083 } 1084 1085 func TestDBClosed(t *testing.T) { 1086 d, err := Open("", &Options{ 1087 FS: vfs.NewMem(), 1088 }) 1089 require.NoError(t, err) 1090 require.NoError(t, d.Close()) 1091 1092 catch := func(f func()) (err error) { 1093 defer func() { 1094 if r := recover(); r != nil { 1095 err = r.(error) 1096 } 1097 }() 1098 f() 1099 return nil 1100 } 1101 1102 require.True(t, errors.Is(catch(func() { _ = d.Close() }), ErrClosed)) 1103 1104 require.True(t, errors.Is(catch(func() { _ = d.Compact(nil, nil, false) }), ErrClosed)) 1105 require.True(t, errors.Is(catch(func() { _ = d.Flush() }), ErrClosed)) 1106 require.True(t, errors.Is(catch(func() { _, _ = d.AsyncFlush() }), ErrClosed)) 1107 1108 require.True(t, errors.Is(catch(func() { _, _, _ = d.Get(nil) }), ErrClosed)) 1109 require.True(t, errors.Is(catch(func() { _ = d.Delete(nil, nil) }), ErrClosed)) 1110 require.True(t, errors.Is(catch(func() { _ = d.DeleteRange(nil, nil, nil) }), ErrClosed)) 1111 require.True(t, errors.Is(catch(func() { _ = d.Ingest(nil) }), ErrClosed)) 1112 require.True(t, errors.Is(catch(func() { _ = d.LogData(nil, nil) }), ErrClosed)) 1113 require.True(t, errors.Is(catch(func() { _ = d.Merge(nil, nil, nil) }), ErrClosed)) 1114 require.True(t, errors.Is(catch(func() { _ = d.RatchetFormatMajorVersion(internalFormatNewest) }), ErrClosed)) 1115 require.True(t, errors.Is(catch(func() { _ = d.Set(nil, nil, nil) }), ErrClosed)) 1116 1117 require.True(t, errors.Is(catch(func() { _ = d.NewSnapshot() }), ErrClosed)) 1118 1119 b := d.NewIndexedBatch() 1120 require.True(t, errors.Is(catch(func() { _ = b.Commit(nil) }), ErrClosed)) 1121 require.True(t, errors.Is(catch(func() { _ = d.Apply(b, nil) }), ErrClosed)) 1122 require.True(t, errors.Is(catch(func() { _, _ = b.NewIter(nil) }), ErrClosed)) 1123 } 1124 1125 func TestDBConcurrentCommitCompactFlush(t *testing.T) { 1126 d, err := Open("", testingRandomized(t, &Options{ 1127 FS: vfs.NewMem(), 1128 })) 1129 require.NoError(t, err) 1130 1131 // Concurrently commit, compact, and flush in order to stress the locking around 1132 // those operations. 1133 const n = 1000 1134 var wg sync.WaitGroup 1135 wg.Add(n) 1136 for i := 0; i < n; i++ { 1137 go func(i int) { 1138 defer wg.Done() 1139 _ = d.Set([]byte(fmt.Sprint(i)), nil, nil) 1140 var err error 1141 switch i % 3 { 1142 case 0: 1143 err = d.Compact(nil, []byte("\xff"), false) 1144 case 1: 1145 err = d.Flush() 1146 case 2: 1147 _, err = d.AsyncFlush() 1148 } 1149 require.NoError(t, err) 1150 }(i) 1151 } 1152 wg.Wait() 1153 1154 require.NoError(t, d.Close()) 1155 } 1156 1157 func TestDBConcurrentCompactClose(t *testing.T) { 1158 // Test closing while a compaction is ongoing. This ensures compaction code 1159 // detects the close and finishes cleanly. 1160 mem := vfs.NewMem() 1161 for i := 0; i < 100; i++ { 1162 opts := &Options{ 1163 FS: mem, 1164 MaxConcurrentCompactions: func() int { 1165 return 2 1166 }, 1167 } 1168 d, err := Open("", testingRandomized(t, opts)) 1169 require.NoError(t, err) 1170 1171 // Ingest a series of files containing a single key each. As the outer 1172 // loop progresses, these ingestions will build up compaction debt 1173 // causing compactions to be running concurrently with the close below. 1174 for j := 0; j < 10; j++ { 1175 path := fmt.Sprintf("ext%d", j) 1176 f, err := mem.Create(path) 1177 require.NoError(t, err) 1178 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1179 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1180 }) 1181 require.NoError(t, w.Set([]byte(fmt.Sprint(j)), nil)) 1182 require.NoError(t, w.Close()) 1183 require.NoError(t, d.Ingest([]string{path})) 1184 } 1185 1186 require.NoError(t, d.Close()) 1187 } 1188 } 1189 1190 func TestDBApplyBatchNilDB(t *testing.T) { 1191 d, err := Open("", &Options{FS: vfs.NewMem()}) 1192 require.NoError(t, err) 1193 1194 b1 := &Batch{} 1195 b1.Set([]byte("test"), nil, nil) 1196 1197 b2 := &Batch{} 1198 b2.Apply(b1, nil) 1199 if b2.memTableSize != 0 { 1200 t.Fatalf("expected memTableSize to not be set") 1201 } 1202 require.NoError(t, d.Apply(b2, nil)) 1203 if b1.memTableSize != b2.memTableSize { 1204 t.Fatalf("expected memTableSize %d, but found %d", b1.memTableSize, b2.memTableSize) 1205 } 1206 1207 require.NoError(t, d.Close()) 1208 } 1209 1210 func TestDBApplyBatchMismatch(t *testing.T) { 1211 srcDB, err := Open("", &Options{FS: vfs.NewMem()}) 1212 require.NoError(t, err) 1213 1214 applyDB, err := Open("", &Options{FS: vfs.NewMem()}) 1215 require.NoError(t, err) 1216 1217 err = func() (err error) { 1218 defer func() { 1219 if v := recover(); v != nil { 1220 err = errors.Errorf("%v", v) 1221 } 1222 }() 1223 1224 b := srcDB.NewBatch() 1225 b.Set([]byte("test"), nil, nil) 1226 return applyDB.Apply(b, nil) 1227 }() 1228 if err == nil || !strings.Contains(err.Error(), "pebble: batch db mismatch:") { 1229 t.Fatalf("expected error, but found %v", err) 1230 } 1231 1232 require.NoError(t, srcDB.Close()) 1233 require.NoError(t, applyDB.Close()) 1234 } 1235 1236 func TestCloseCleanerRace(t *testing.T) { 1237 mem := vfs.NewMem() 1238 for i := 0; i < 20; i++ { 1239 db, err := Open("", testingRandomized(t, &Options{FS: mem})) 1240 require.NoError(t, err) 1241 require.NoError(t, db.Set([]byte("a"), []byte("something"), Sync)) 1242 require.NoError(t, db.Flush()) 1243 // Ref the sstables so cannot be deleted. 1244 it, _ := db.NewIter(nil) 1245 require.NotNil(t, it) 1246 require.NoError(t, db.DeleteRange([]byte("a"), []byte("b"), Sync)) 1247 require.NoError(t, db.Compact([]byte("a"), []byte("b"), false)) 1248 // Only the iterator is keeping the sstables alive. 1249 files, err := mem.List("/") 1250 require.NoError(t, err) 1251 var found bool 1252 for _, f := range files { 1253 if strings.HasSuffix(f, ".sst") { 1254 found = true 1255 break 1256 } 1257 } 1258 require.True(t, found) 1259 // Close the iterator and the db in succession so file cleaning races with DB.Close() -- 1260 // latter should wait for file cleaning to finish. 1261 require.NoError(t, it.Close()) 1262 require.NoError(t, db.Close()) 1263 files, err = mem.List("/") 1264 require.NoError(t, err) 1265 for _, f := range files { 1266 if strings.HasSuffix(f, ".sst") { 1267 t.Fatalf("found sst: %s", f) 1268 } 1269 } 1270 } 1271 } 1272 1273 func TestSSTablesWithApproximateSpanBytes(t *testing.T) { 1274 d, err := Open("", &Options{ 1275 FS: vfs.NewMem(), 1276 }) 1277 require.NoError(t, err) 1278 defer func() { 1279 if d != nil { 1280 require.NoError(t, d.Close()) 1281 } 1282 }() 1283 1284 // Create two sstables. 1285 // sstable is contained within keyspan (fileNum = 5). 1286 require.NoError(t, d.Set([]byte("c"), nil, nil)) 1287 require.NoError(t, d.Set([]byte("d"), nil, nil)) 1288 require.NoError(t, d.Flush()) 1289 1290 // sstable partially overlaps keyspan (fileNum = 7). 1291 require.NoError(t, d.Set([]byte("d"), nil, nil)) 1292 require.NoError(t, d.Set([]byte("g"), nil, nil)) 1293 require.NoError(t, d.Flush()) 1294 1295 // cannot use WithApproximateSpanBytes without WithProperties. 1296 _, err = d.SSTables(WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) 1297 require.Error(t, err) 1298 1299 // cannot use WithApproximateSpanBytes without WithKeyRangeFilter. 1300 _, err = d.SSTables(WithProperties(), WithApproximateSpanBytes()) 1301 require.Error(t, err) 1302 1303 tableInfos, err := d.SSTables(WithProperties(), WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) 1304 require.NoError(t, err) 1305 1306 for _, levelTables := range tableInfos { 1307 for _, table := range levelTables { 1308 approximateSpanBytes, err := strconv.ParseInt(table.Properties.UserProperties["approximate-span-bytes"], 10, 64) 1309 require.NoError(t, err) 1310 if table.FileNum == 5 { 1311 require.Equal(t, uint64(approximateSpanBytes), table.Size) 1312 } 1313 if table.FileNum == 7 { 1314 require.Less(t, uint64(approximateSpanBytes), table.Size) 1315 } 1316 } 1317 } 1318 } 1319 1320 func TestFilterSSTablesWithOption(t *testing.T) { 1321 d, err := Open("", &Options{ 1322 FS: vfs.NewMem(), 1323 }) 1324 require.NoError(t, err) 1325 defer func() { 1326 if d != nil { 1327 require.NoError(t, d.Close()) 1328 } 1329 }() 1330 1331 // Create two sstables. 1332 require.NoError(t, d.Set([]byte("/Table/5"), nil, nil)) 1333 require.NoError(t, d.Flush()) 1334 require.NoError(t, d.Set([]byte("/Table/10"), nil, nil)) 1335 require.NoError(t, d.Flush()) 1336 1337 tableInfos, err := d.SSTables(WithKeyRangeFilter([]byte("/Table/5"), []byte("/Table/6"))) 1338 require.NoError(t, err) 1339 1340 totalTables := 0 1341 for _, levelTables := range tableInfos { 1342 totalTables += len(levelTables) 1343 } 1344 1345 // with filter second sstable should not be returned 1346 require.EqualValues(t, 1, totalTables) 1347 1348 tableInfos, err = d.SSTables() 1349 require.NoError(t, err) 1350 1351 totalTables = 0 1352 for _, levelTables := range tableInfos { 1353 totalTables += len(levelTables) 1354 } 1355 1356 // without filter 1357 require.EqualValues(t, 2, totalTables) 1358 } 1359 1360 func TestSSTables(t *testing.T) { 1361 d, err := Open("", &Options{ 1362 FS: vfs.NewMem(), 1363 }) 1364 require.NoError(t, err) 1365 defer func() { 1366 if d != nil { 1367 require.NoError(t, d.Close()) 1368 } 1369 }() 1370 1371 // Create two sstables. 1372 require.NoError(t, d.Set([]byte("hello"), nil, nil)) 1373 require.NoError(t, d.Flush()) 1374 require.NoError(t, d.Set([]byte("world"), nil, nil)) 1375 require.NoError(t, d.Flush()) 1376 1377 // by default returned table infos should not contain Properties 1378 tableInfos, err := d.SSTables() 1379 require.NoError(t, err) 1380 for _, levelTables := range tableInfos { 1381 for _, info := range levelTables { 1382 require.Nil(t, info.Properties) 1383 } 1384 } 1385 1386 // with opt `WithProperties()` the `Properties` in table info should not be nil 1387 tableInfos, err = d.SSTables(WithProperties()) 1388 require.NoError(t, err) 1389 for _, levelTables := range tableInfos { 1390 for _, info := range levelTables { 1391 require.NotNil(t, info.Properties) 1392 } 1393 } 1394 } 1395 1396 type testTracer struct { 1397 enabledOnlyForNonBackgroundContext bool 1398 buf strings.Builder 1399 } 1400 1401 func (t *testTracer) Infof(format string, args ...interface{}) {} 1402 func (t *testTracer) Fatalf(format string, args ...interface{}) {} 1403 1404 func (t *testTracer) Eventf(ctx context.Context, format string, args ...interface{}) { 1405 if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { 1406 return 1407 } 1408 fmt.Fprintf(&t.buf, format, args...) 1409 fmt.Fprint(&t.buf, "\n") 1410 } 1411 1412 func (t *testTracer) IsTracingEnabled(ctx context.Context) bool { 1413 if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { 1414 return false 1415 } 1416 return true 1417 } 1418 1419 func TestTracing(t *testing.T) { 1420 if !invariants.Enabled { 1421 // The test relies on timing behavior injected when invariants.Enabled. 1422 return 1423 } 1424 var tracer testTracer 1425 c := NewCache(0) 1426 defer c.Unref() 1427 d, err := Open("", &Options{ 1428 FS: vfs.NewMem(), 1429 Cache: c, 1430 LoggerAndTracer: &tracer, 1431 }) 1432 require.NoError(t, err) 1433 defer func() { 1434 require.NoError(t, d.Close()) 1435 }() 1436 1437 // Create a sstable. 1438 require.NoError(t, d.Set([]byte("hello"), nil, nil)) 1439 require.NoError(t, d.Flush()) 1440 _, closer, err := d.Get([]byte("hello")) 1441 require.NoError(t, err) 1442 closer.Close() 1443 readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n" 1444 iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n" 1445 require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String()) 1446 1447 // Get again, but since it currently uses context.Background(), no trace 1448 // output is produced. 1449 tracer.buf.Reset() 1450 tracer.enabledOnlyForNonBackgroundContext = true 1451 _, closer, err = d.Get([]byte("hello")) 1452 require.NoError(t, err) 1453 closer.Close() 1454 require.Equal(t, "", tracer.buf.String()) 1455 1456 ctx, cancel := context.WithCancel(context.Background()) 1457 defer cancel() 1458 iter, _ := d.NewIterWithContext(ctx, nil) 1459 iter.SeekGE([]byte("hello")) 1460 iter.Close() 1461 require.Equal(t, iterTraceString, tracer.buf.String()) 1462 1463 tracer.buf.Reset() 1464 snap := d.NewSnapshot() 1465 iter, _ = snap.NewIterWithContext(ctx, nil) 1466 iter.SeekGE([]byte("hello")) 1467 iter.Close() 1468 require.Equal(t, iterTraceString, tracer.buf.String()) 1469 snap.Close() 1470 1471 tracer.buf.Reset() 1472 b := d.NewIndexedBatch() 1473 iter = b.NewIterWithContext(ctx, nil) 1474 iter.SeekGE([]byte("hello")) 1475 iter.Close() 1476 require.Equal(t, iterTraceString, tracer.buf.String()) 1477 b.Close() 1478 } 1479 1480 func TestMemtableIngestInversion(t *testing.T) { 1481 memFS := vfs.NewMem() 1482 opts := &Options{ 1483 FS: memFS, 1484 MemTableSize: 256 << 10, // 4KB 1485 MemTableStopWritesThreshold: 1000, 1486 L0StopWritesThreshold: 1000, 1487 L0CompactionThreshold: 2, 1488 MaxConcurrentCompactions: func() int { 1489 return 1000 1490 }, 1491 } 1492 1493 const channelTimeout = 5 * time.Second 1494 1495 // We induce delay in compactions by passing in an EventListener that stalls on 1496 // the first TableCreated event for a compaction job we want to block. 1497 // FlushBegin and CompactionBegin has info on compaction start/output levels 1498 // which is what we need to identify what compactions to block. However 1499 // FlushBegin and CompactionBegin are called while holding db.mu, so we cannot 1500 // block those events forever. Instead, we grab the job ID from those events 1501 // and store it. Then during TableCreated, we check if we're creating an output 1502 // for a job we have identified earlier as one to block, and then hold on a 1503 // semaphore there until there's a signal from the test code to resume with the 1504 // compaction. 1505 // 1506 // If nextBlockedCompaction is non-zero, we must block the next compaction 1507 // out of the nextBlockedCompaction - 3 start level. 1 means block the next 1508 // intra-L0 compaction and 2 means block the next flush (as flushes have 1509 // a -1 start level). 1510 var nextBlockedCompaction, blockedJobID int 1511 var blockedCompactionsMu sync.Mutex // protects the above two variables. 1512 nextSem := make(chan chan struct{}, 1) 1513 var el EventListener 1514 el.EnsureDefaults(testLogger{t: t}) 1515 el.FlushBegin = func(info FlushInfo) { 1516 blockedCompactionsMu.Lock() 1517 defer blockedCompactionsMu.Unlock() 1518 if nextBlockedCompaction == 2 { 1519 nextBlockedCompaction = 0 1520 blockedJobID = info.JobID 1521 } 1522 } 1523 el.CompactionBegin = func(info CompactionInfo) { 1524 // 0 = block nothing, 1 = block intra-L0 compaction, 2 = block flush, 1525 // 3 = block L0 -> LBase compaction, 4 = block compaction out of L1, and so on. 1526 blockedCompactionsMu.Lock() 1527 defer blockedCompactionsMu.Unlock() 1528 blockValue := info.Input[0].Level + 3 1529 if info.Input[0].Level == 0 && info.Output.Level == 0 { 1530 // Intra L0 compaction, denoted by casValue of 1. 1531 blockValue = 1 1532 } 1533 if nextBlockedCompaction == blockValue { 1534 nextBlockedCompaction = 0 1535 blockedJobID = info.JobID 1536 } 1537 } 1538 el.TableCreated = func(info TableCreateInfo) { 1539 blockedCompactionsMu.Lock() 1540 if info.JobID != blockedJobID { 1541 blockedCompactionsMu.Unlock() 1542 return 1543 } 1544 blockedJobID = 0 1545 blockedCompactionsMu.Unlock() 1546 sem := make(chan struct{}) 1547 nextSem <- sem 1548 <-sem 1549 } 1550 tel := TeeEventListener(MakeLoggingEventListener(testLogger{t: t}), el) 1551 opts.EventListener = &tel 1552 opts.Experimental.L0CompactionConcurrency = 1 1553 d, err := Open("", opts) 1554 require.NoError(t, err) 1555 defer func() { 1556 if d != nil { 1557 require.NoError(t, d.Close()) 1558 } 1559 }() 1560 1561 printLSM := func() { 1562 d.mu.Lock() 1563 s := d.mu.versions.currentVersion().String() 1564 d.mu.Unlock() 1565 t.Logf("%s", s) 1566 } 1567 1568 // Create some sstables. These should go into L6. These are irrelevant for 1569 // the rest of the test. 1570 require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) 1571 require.NoError(t, d.Flush()) 1572 require.NoError(t, d.Set([]byte("d"), []byte("bar"), nil)) 1573 require.NoError(t, d.Flush()) 1574 require.NoError(t, d.Compact([]byte("a"), []byte("z"), true)) 1575 1576 var baseCompactionSem, flushSem, intraL0Sem chan struct{} 1577 // Block an L0 -> LBase compaction. This is necessary to induce intra-L0 1578 // compactions later on. 1579 blockedCompactionsMu.Lock() 1580 nextBlockedCompaction = 3 1581 blockedCompactionsMu.Unlock() 1582 timeoutSem := time.After(channelTimeout) 1583 t.Log("blocking an L0 -> LBase compaction") 1584 // Write an sstable to L0 until we're blocked on an L0 -> LBase compaction. 1585 breakLoop := false 1586 for !breakLoop { 1587 select { 1588 case sem := <-nextSem: 1589 baseCompactionSem = sem 1590 breakLoop = true 1591 case <-timeoutSem: 1592 t.Fatal("did not get blocked on an LBase compaction") 1593 default: 1594 require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) 1595 require.NoError(t, d.Set([]byte("g"), []byte("bar"), nil)) 1596 require.NoError(t, d.Flush()) 1597 time.Sleep(100 * time.Millisecond) 1598 } 1599 } 1600 printLSM() 1601 1602 // Do 4 ingests, one with the key cc, one with bb and cc, and two with just bb. 1603 // The purpose of the sstable containing cc is to inflate the L0 sublevel 1604 // count of the interval at cc, as that's where we want the intra-L0 compaction 1605 // to be seeded. However we also need a file left of that interval to have 1606 // the same (or higher) sublevel to trigger the bug in 1607 // cockroachdb/cockroach#101896. That's why we ingest a file after it to 1608 // "bridge" the bb/cc intervals, and then ingest a file at bb. These go 1609 // into sublevels like this: 1610 // 1611 // bb 1612 // bb 1613 // bb-----cc 1614 // cc 1615 // 1616 // Eventually, we'll drop an ingested file containing a range del starting at 1617 // cc around here: 1618 // 1619 // bb 1620 // bb cc---... 1621 // bb-----cc 1622 // cc 1623 { 1624 path := "ingest1.sst" 1625 f, err := memFS.Create(path) 1626 require.NoError(t, err) 1627 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1628 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1629 }) 1630 require.NoError(t, w.Set([]byte("cc"), []byte("foo"))) 1631 require.NoError(t, w.Close()) 1632 require.NoError(t, d.Ingest([]string{path})) 1633 } 1634 { 1635 path := "ingest2.sst" 1636 f, err := memFS.Create(path) 1637 require.NoError(t, err) 1638 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1639 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1640 }) 1641 require.NoError(t, w.Set([]byte("bb"), []byte("foo2"))) 1642 require.NoError(t, w.Set([]byte("cc"), []byte("foo2"))) 1643 require.NoError(t, w.Close()) 1644 require.NoError(t, d.Ingest([]string{path})) 1645 } 1646 { 1647 path := "ingest3.sst" 1648 f, err := memFS.Create(path) 1649 require.NoError(t, err) 1650 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1651 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1652 }) 1653 require.NoError(t, w.Set([]byte("bb"), []byte("foo3"))) 1654 require.NoError(t, w.Close()) 1655 require.NoError(t, d.Ingest([]string{path})) 1656 } 1657 { 1658 path := "ingest4.sst" 1659 f, err := memFS.Create(path) 1660 require.NoError(t, err) 1661 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1662 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1663 }) 1664 require.NoError(t, w.Set([]byte("bb"), []byte("foo4"))) 1665 require.NoError(t, w.Close()) 1666 require.NoError(t, d.Ingest([]string{path})) 1667 } 1668 1669 // We now have a base compaction blocked. Block a memtable flush to cause 1670 // memtables to queue up. 1671 // 1672 // Memtable (stuck): 1673 // 1674 // b-----------------g 1675 // 1676 // Relevant L0 ssstables 1677 // 1678 // bb 1679 // bb 1680 // bb-----cc 1681 // cc 1682 blockedCompactionsMu.Lock() 1683 nextBlockedCompaction = 2 1684 blockedCompactionsMu.Unlock() 1685 t.Log("blocking a flush") 1686 require.NoError(t, d.Set([]byte("b"), []byte("foo2"), nil)) 1687 require.NoError(t, d.Set([]byte("g"), []byte("bar2"), nil)) 1688 _, _ = d.AsyncFlush() 1689 select { 1690 case sem := <-nextSem: 1691 flushSem = sem 1692 case <-time.After(channelTimeout): 1693 t.Fatal("did not get blocked on a flush") 1694 } 1695 // Add one memtable to flush queue, and finish it off. 1696 // 1697 // Memtables (stuck): 1698 // 1699 // b-----------------g (waiting to flush) 1700 // b-----------------g (flushing, blocked) 1701 // 1702 // Relevant L0 ssstables 1703 // 1704 // bb 1705 // bb 1706 // bb-----cc 1707 // cc 1708 require.NoError(t, d.Set([]byte("b"), []byte("foo3"), nil)) 1709 require.NoError(t, d.Set([]byte("g"), []byte("bar3"), nil)) 1710 // note: this flush will wait for the earlier, blocked flush, but it closes 1711 // off the memtable which is what we want. 1712 _, _ = d.AsyncFlush() 1713 1714 // Open a new mutable memtable. This gets us an earlier earlierUnflushedSeqNum 1715 // than the ingest below it. 1716 require.NoError(t, d.Set([]byte("c"), []byte("somethingbigishappening"), nil)) 1717 // Block an intra-L0 compaction, as one might happen around this time. 1718 blockedCompactionsMu.Lock() 1719 nextBlockedCompaction = 1 1720 blockedCompactionsMu.Unlock() 1721 t.Log("blocking an intra-L0 compaction") 1722 // Ingest a file containing a cc-e rangedel. 1723 // 1724 // Memtables: 1725 // 1726 // c (mutable) 1727 // b-----------------g (waiting to flush) 1728 // b-----------------g (flushing, blocked) 1729 // 1730 // Relevant L0 ssstables 1731 // 1732 // bb 1733 // bb cc-----e (just ingested) 1734 // bb-----cc 1735 // cc 1736 { 1737 path := "ingest5.sst" 1738 f, err := memFS.Create(path) 1739 require.NoError(t, err) 1740 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1741 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1742 }) 1743 require.NoError(t, w.DeleteRange([]byte("cc"), []byte("e"))) 1744 require.NoError(t, w.Close()) 1745 require.NoError(t, d.Ingest([]string{path})) 1746 } 1747 t.Log("main ingest complete") 1748 printLSM() 1749 t.Logf("%s", d.Metrics().String()) 1750 1751 require.NoError(t, d.Set([]byte("d"), []byte("ThisShouldNotBeDeleted"), nil)) 1752 1753 // Do another ingest with a seqnum newer than d. The purpose of this is to 1754 // increase the LargestSeqNum of the intra-L0 compaction output *beyond* 1755 // the flush that contains d=ThisShouldNotBeDeleted, therefore causing 1756 // that point key to be deleted (in the buggy code). 1757 // 1758 // Memtables: 1759 // 1760 // c-----d (mutable) 1761 // b-----------------g (waiting to flush) 1762 // b-----------------g (flushing, blocked) 1763 // 1764 // Relevant L0 ssstables 1765 // 1766 // bb cc 1767 // bb cc-----e (just ingested) 1768 // bb-----cc 1769 // cc 1770 { 1771 path := "ingest6.sst" 1772 f, err := memFS.Create(path) 1773 require.NoError(t, err) 1774 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 1775 TableFormat: d.FormatMajorVersion().MaxTableFormat(), 1776 }) 1777 require.NoError(t, w.Set([]byte("cc"), []byte("doesntmatter"))) 1778 require.NoError(t, w.Close()) 1779 require.NoError(t, d.Ingest([]string{path})) 1780 } 1781 1782 // Unblock earlier flushes. We will first finish flushing the blocked 1783 // memtable, and end up in this state: 1784 // 1785 // Memtables: 1786 // 1787 // c-----d (mutable) 1788 // b-----------------g (waiting to flush) 1789 // 1790 // Relevant L0 ssstables 1791 // 1792 // b-------------------g (irrelevant, just flushed) 1793 // bb cc (has LargestSeqNum > earliestUnflushedSeqNum) 1794 // bb cc-----e (has a rangedel) 1795 // bb-----cc 1796 // cc 1797 // 1798 // Note that while b----g is relatively old (and so has a low LargestSeqNum), 1799 // it bridges a bunch of intervals. Had we regenerated sublevels from scratch, 1800 // it'd have gone below the cc-e sstable. But due to #101896, we just slapped 1801 // it on top. Now, as long as our seed interval is the one at cc and our seed 1802 // file is the just-flushed L0 sstable, we will go down and include anything 1803 // in that interval even if it has a LargestSeqNum > earliestUnflushedSeqNum. 1804 // 1805 // All asterisked L0 sstables should now get picked in an intra-L0 compaction 1806 // right after the flush finishes, that we then block: 1807 // 1808 // b-------------------g* 1809 // bb* cc* 1810 // bb* cc-----e* 1811 // bb-----cc* 1812 // cc* 1813 t.Log("unblocking flush") 1814 flushSem <- struct{}{} 1815 printLSM() 1816 1817 select { 1818 case sem := <-nextSem: 1819 intraL0Sem = sem 1820 case <-time.After(channelTimeout): 1821 t.Fatal("did not get blocked on an intra L0 compaction") 1822 } 1823 1824 // Ensure all memtables are flushed. This will mean d=ThisShouldNotBeDeleted 1825 // will land in L0 and since that was the last key written to a memtable, 1826 // and the ingestion at cc came after it, the output of the intra-L0 1827 // compaction will elevate the cc-e rangedel above it and delete it 1828 // (if #101896 is not fixed). 1829 ch, _ := d.AsyncFlush() 1830 <-ch 1831 1832 // Unblock earlier intra-L0 compaction. 1833 t.Log("unblocking intraL0") 1834 intraL0Sem <- struct{}{} 1835 printLSM() 1836 1837 // Try reading d a couple times. 1838 for i := 0; i < 2; i++ { 1839 val, closer, err := d.Get([]byte("d")) 1840 require.NoError(t, err) 1841 require.Equal(t, []byte("ThisShouldNotBeDeleted"), val) 1842 if closer != nil { 1843 closer.Close() 1844 } 1845 time.Sleep(100 * time.Millisecond) 1846 } 1847 1848 // Unblock everything. 1849 baseCompactionSem <- struct{}{} 1850 } 1851 1852 func BenchmarkDelete(b *testing.B) { 1853 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 1854 const keyCount = 10000 1855 var keys [keyCount][]byte 1856 for i := 0; i < keyCount; i++ { 1857 keys[i] = []byte(strconv.Itoa(rng.Int())) 1858 } 1859 val := bytes.Repeat([]byte("x"), 10) 1860 1861 benchmark := func(b *testing.B, useSingleDelete bool) { 1862 d, err := Open( 1863 "", 1864 &Options{ 1865 FS: vfs.NewMem(), 1866 }) 1867 if err != nil { 1868 b.Fatal(err) 1869 } 1870 defer func() { 1871 if err := d.Close(); err != nil { 1872 b.Fatal(err) 1873 } 1874 }() 1875 1876 b.StartTimer() 1877 for _, key := range keys { 1878 _ = d.Set(key, val, nil) 1879 if useSingleDelete { 1880 _ = d.SingleDelete(key, nil) 1881 } else { 1882 _ = d.Delete(key, nil) 1883 } 1884 } 1885 // Manually flush as it is flushing/compaction where SingleDelete 1886 // performance shows up. With SingleDelete, we can elide all of the 1887 // SingleDelete and Set records. 1888 if err := d.Flush(); err != nil { 1889 b.Fatal(err) 1890 } 1891 b.StopTimer() 1892 } 1893 1894 b.Run("delete", func(b *testing.B) { 1895 for i := 0; i < b.N; i++ { 1896 benchmark(b, false) 1897 } 1898 }) 1899 1900 b.Run("single-delete", func(b *testing.B) { 1901 for i := 0; i < b.N; i++ { 1902 benchmark(b, true) 1903 } 1904 }) 1905 } 1906 1907 func BenchmarkNewIterReadAmp(b *testing.B) { 1908 for _, readAmp := range []int{10, 100, 1000} { 1909 b.Run(strconv.Itoa(readAmp), func(b *testing.B) { 1910 opts := &Options{ 1911 FS: vfs.NewMem(), 1912 L0StopWritesThreshold: 1000, 1913 } 1914 opts.DisableAutomaticCompactions = true 1915 1916 d, err := Open("", opts) 1917 require.NoError(b, err) 1918 1919 for i := 0; i < readAmp; i++ { 1920 require.NoError(b, d.Set([]byte("a"), []byte("b"), NoSync)) 1921 require.NoError(b, d.Flush()) 1922 } 1923 1924 require.Equal(b, d.Metrics().ReadAmp(), readAmp) 1925 1926 b.StopTimer() 1927 b.ResetTimer() 1928 for i := 0; i < b.N; i++ { 1929 b.StartTimer() 1930 iter, _ := d.NewIter(nil) 1931 b.StopTimer() 1932 require.NoError(b, iter.Close()) 1933 } 1934 1935 require.NoError(b, d.Close()) 1936 }) 1937 } 1938 } 1939 1940 func verifyGet(t *testing.T, r Reader, key, expected []byte) { 1941 val, closer, err := r.Get(key) 1942 require.NoError(t, err) 1943 if !bytes.Equal(expected, val) { 1944 t.Fatalf("expected %s, but got %s", expected, val) 1945 } 1946 closer.Close() 1947 } 1948 1949 func verifyGetNotFound(t *testing.T, r Reader, key []byte) { 1950 val, _, err := r.Get(key) 1951 if err != base.ErrNotFound { 1952 t.Fatalf("expected nil, but got %s", val) 1953 } 1954 } 1955 1956 func BenchmarkRotateMemtables(b *testing.B) { 1957 o := &Options{FS: vfs.NewMem(), MemTableSize: 64 << 20 /* 64 MB */} 1958 d, err := Open("", o) 1959 require.NoError(b, err) 1960 1961 // We want to jump to full-sized memtables. 1962 d.mu.Lock() 1963 d.mu.mem.nextSize = o.MemTableSize 1964 d.mu.Unlock() 1965 require.NoError(b, d.Flush()) 1966 1967 b.ResetTimer() 1968 for i := 0; i < b.N; i++ { 1969 if err := d.Flush(); err != nil { 1970 b.Fatal(err) 1971 } 1972 } 1973 } 1974 1975 type readTrackFS struct { 1976 vfs.FS 1977 1978 currReadCount atomic.Int32 1979 maxReadCount atomic.Int32 1980 } 1981 1982 type readTrackFile struct { 1983 vfs.File 1984 fs *readTrackFS 1985 } 1986 1987 func (fs *readTrackFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) { 1988 file, err := fs.FS.Open(name, opts...) 1989 if err != nil || !strings.HasSuffix(name, ".sst") { 1990 return file, err 1991 } 1992 return &readTrackFile{ 1993 File: file, 1994 fs: fs, 1995 }, nil 1996 } 1997 1998 func (f *readTrackFile) ReadAt(p []byte, off int64) (n int, err error) { 1999 val := f.fs.currReadCount.Add(1) 2000 defer f.fs.currReadCount.Add(-1) 2001 for maxVal := f.fs.maxReadCount.Load(); val > maxVal; maxVal = f.fs.maxReadCount.Load() { 2002 if f.fs.maxReadCount.CompareAndSwap(maxVal, val) { 2003 break 2004 } 2005 } 2006 return f.File.ReadAt(p, off) 2007 } 2008 2009 func TestLoadBlockSema(t *testing.T) { 2010 fs := &readTrackFS{FS: vfs.NewMem()} 2011 sema := fifo.NewSemaphore(100) 2012 db, err := Open("", testingRandomized(t, &Options{ 2013 Cache: cache.New(1), 2014 FS: fs, 2015 LoadBlockSema: sema, 2016 })) 2017 require.NoError(t, err) 2018 2019 key := func(i, j int) []byte { 2020 return []byte(fmt.Sprintf("%02d/%02d", i, j)) 2021 } 2022 2023 // Create 20 regions and compact them separately, so we end up with 20 2024 // disjoint tables. 2025 const numRegions = 20 2026 const numKeys = 20 2027 for i := 0; i < numRegions; i++ { 2028 for j := 0; j < numKeys; j++ { 2029 require.NoError(t, db.Set(key(i, j), []byte("value"), nil)) 2030 } 2031 require.NoError(t, db.Compact(key(i, 0), key(i, numKeys-1), false)) 2032 } 2033 2034 // Read all regions to warm up the table cache. 2035 for i := 0; i < numRegions; i++ { 2036 val, closer, err := db.Get(key(i, 1)) 2037 require.NoError(t, err) 2038 require.Equal(t, []byte("value"), val) 2039 if closer != nil { 2040 closer.Close() 2041 } 2042 } 2043 2044 for _, n := range []int64{1, 2, 4} { 2045 t.Run(fmt.Sprintf("%d", n), func(t *testing.T) { 2046 sema.UpdateCapacity(n) 2047 fs.maxReadCount.Store(0) 2048 var wg sync.WaitGroup 2049 // Spin up workers that perform random reads. 2050 const numWorkers = 20 2051 for i := 0; i < numWorkers; i++ { 2052 wg.Add(1) 2053 go func() { 2054 defer wg.Done() 2055 const numQueries = 100 2056 for i := 0; i < numQueries; i++ { 2057 val, closer, err := db.Get(key(rand.Intn(numRegions), rand.Intn(numKeys))) 2058 require.NoError(t, err) 2059 require.Equal(t, []byte("value"), val) 2060 if closer != nil { 2061 closer.Close() 2062 } 2063 runtime.Gosched() 2064 } 2065 }() 2066 } 2067 wg.Wait() 2068 // Verify the maximum read count did not exceed the limit. 2069 maxReadCount := fs.maxReadCount.Load() 2070 require.Greater(t, maxReadCount, int32(0)) 2071 require.LessOrEqual(t, maxReadCount, int32(n)) 2072 }) 2073 } 2074 }