github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/table_cache_test.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bufio" 9 "bytes" 10 "context" 11 "fmt" 12 "io" 13 "os" 14 "path" 15 "strconv" 16 "strings" 17 "sync" 18 "testing" 19 "time" 20 21 "github.com/cockroachdb/errors" 22 "github.com/cockroachdb/pebble/internal/base" 23 "github.com/cockroachdb/pebble/internal/keyspan" 24 "github.com/cockroachdb/pebble/internal/manifest" 25 "github.com/cockroachdb/pebble/internal/testkeys" 26 "github.com/cockroachdb/pebble/objstorage" 27 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 28 "github.com/cockroachdb/pebble/sstable" 29 "github.com/cockroachdb/pebble/vfs" 30 "github.com/stretchr/testify/require" 31 "golang.org/x/exp/rand" 32 ) 33 34 type tableCacheTestFile struct { 35 vfs.File 36 fs *tableCacheTestFS 37 name string 38 } 39 40 func (f *tableCacheTestFile) Close() error { 41 f.fs.mu.Lock() 42 if f.fs.closeCounts != nil { 43 f.fs.closeCounts[f.name]++ 44 } 45 f.fs.mu.Unlock() 46 return f.File.Close() 47 } 48 49 type tableCacheTestFS struct { 50 vfs.FS 51 52 mu sync.Mutex 53 openCounts map[string]int 54 closeCounts map[string]int 55 openErrorEnabled bool 56 } 57 58 func (fs *tableCacheTestFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) { 59 fs.mu.Lock() 60 if fs.openErrorEnabled { 61 fs.mu.Unlock() 62 return nil, errors.New("injected error") 63 } 64 if fs.openCounts != nil { 65 fs.openCounts[name]++ 66 } 67 fs.mu.Unlock() 68 f, err := fs.FS.Open(name, opts...) 69 if len(opts) < 1 || opts[0] != vfs.RandomReadsOption { 70 return nil, errors.Errorf("sstable file %s not opened with random reads option", name) 71 } 72 if err != nil { 73 return nil, err 74 } 75 return &tableCacheTestFile{f, fs, name}, nil 76 } 77 78 func (fs *tableCacheTestFS) validate( 79 t *testing.T, c *tableCacheContainer, f func(i, gotO, gotC int) error, 80 ) { 81 if err := fs.validateOpenTables(f); err != nil { 82 t.Error(err) 83 return 84 } 85 c.close() 86 if err := fs.validateNoneStillOpen(); err != nil { 87 t.Error(err) 88 return 89 } 90 } 91 92 func (fs *tableCacheTestFS) setOpenError(enabled bool) { 93 fs.mu.Lock() 94 defer fs.mu.Unlock() 95 fs.openErrorEnabled = enabled 96 } 97 98 // validateOpenTables validates that no tables in the cache are open twice, and 99 // the number still open is no greater than tableCacheTestCacheSize. 100 func (fs *tableCacheTestFS) validateOpenTables(f func(i, gotO, gotC int) error) error { 101 // try backs off to let any clean-up goroutines do their work. 102 return try(100*time.Microsecond, 20*time.Second, func() error { 103 fs.mu.Lock() 104 defer fs.mu.Unlock() 105 106 numStillOpen := 0 107 for i := 0; i < tableCacheTestNumTables; i++ { 108 filename := base.MakeFilepath(fs, "", fileTypeTable, base.FileNum(uint64(i)).DiskFileNum()) 109 gotO, gotC := fs.openCounts[filename], fs.closeCounts[filename] 110 if gotO > gotC { 111 numStillOpen++ 112 } 113 if gotC != gotO && gotC != gotO-1 { 114 return errors.Errorf("i=%d: table closed too many or too few times: opened %d times, closed %d times", 115 i, gotO, gotC) 116 } 117 if f != nil { 118 if err := f(i, gotO, gotC); err != nil { 119 return err 120 } 121 } 122 } 123 if numStillOpen > tableCacheTestCacheSize { 124 return errors.Errorf("numStillOpen is %d, want <= %d", numStillOpen, tableCacheTestCacheSize) 125 } 126 return nil 127 }) 128 } 129 130 // validateNoneStillOpen validates that no tables in the cache are open. 131 func (fs *tableCacheTestFS) validateNoneStillOpen() error { 132 // try backs off to let any clean-up goroutines do their work. 133 return try(100*time.Microsecond, 20*time.Second, func() error { 134 fs.mu.Lock() 135 defer fs.mu.Unlock() 136 137 for i := 0; i < tableCacheTestNumTables; i++ { 138 filename := base.MakeFilepath(fs, "", fileTypeTable, base.FileNum(uint64(i)).DiskFileNum()) 139 gotO, gotC := fs.openCounts[filename], fs.closeCounts[filename] 140 if gotO != gotC { 141 return errors.Errorf("i=%d: opened %d times, closed %d times", i, gotO, gotC) 142 } 143 } 144 return nil 145 }) 146 } 147 148 const ( 149 tableCacheTestNumTables = 300 150 tableCacheTestCacheSize = 100 151 ) 152 153 // newTableCacheTest returns a shareable table cache to be used for tests. 154 // It is the caller's responsibility to unref the table cache. 155 func newTableCacheTest(size int64, tableCacheSize int, numShards int) *TableCache { 156 cache := NewCache(size) 157 defer cache.Unref() 158 return NewTableCache(cache, numShards, tableCacheSize) 159 } 160 161 func newTableCacheContainerTest( 162 tc *TableCache, dirname string, 163 ) (*tableCacheContainer, *tableCacheTestFS, error) { 164 xxx := bytes.Repeat([]byte("x"), tableCacheTestNumTables) 165 fs := &tableCacheTestFS{ 166 FS: vfs.NewMem(), 167 } 168 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(fs, dirname)) 169 if err != nil { 170 return nil, nil, err 171 } 172 defer objProvider.Close() 173 174 for i := 0; i < tableCacheTestNumTables; i++ { 175 w, _, err := objProvider.Create(context.Background(), fileTypeTable, base.FileNum(uint64(i)).DiskFileNum(), objstorage.CreateOptions{}) 176 if err != nil { 177 return nil, nil, errors.Wrap(err, "fs.Create") 178 } 179 tw := sstable.NewWriter(w, sstable.WriterOptions{TableFormat: sstable.TableFormatPebblev2}) 180 ik := base.ParseInternalKey(fmt.Sprintf("k.SET.%d", i)) 181 if err := tw.Add(ik, xxx[:i]); err != nil { 182 return nil, nil, errors.Wrap(err, "tw.Set") 183 } 184 if err := tw.RangeKeySet([]byte("k"), []byte("l"), nil, xxx[:i]); err != nil { 185 return nil, nil, errors.Wrap(err, "tw.Set") 186 } 187 if err := tw.Close(); err != nil { 188 return nil, nil, errors.Wrap(err, "tw.Close") 189 } 190 } 191 192 fs.mu.Lock() 193 fs.openCounts = map[string]int{} 194 fs.closeCounts = map[string]int{} 195 fs.mu.Unlock() 196 197 opts := &Options{} 198 opts.EnsureDefaults() 199 if tc == nil { 200 opts.Cache = NewCache(8 << 20) // 8 MB 201 defer opts.Cache.Unref() 202 } else { 203 opts.Cache = tc.cache 204 } 205 206 c := newTableCacheContainer(tc, opts.Cache.NewID(), objProvider, opts, tableCacheTestCacheSize) 207 return c, fs, nil 208 } 209 210 // Test basic reference counting for the table cache. 211 func TestTableCacheRefs(t *testing.T) { 212 tc := newTableCacheTest(8<<20, 10, 2) 213 214 v := tc.refs.Load() 215 if v != 1 { 216 require.Equal(t, 1, v) 217 } 218 219 tc.Ref() 220 v = tc.refs.Load() 221 if v != 2 { 222 require.Equal(t, 2, v) 223 } 224 225 tc.Unref() 226 v = tc.refs.Load() 227 if v != 1 { 228 require.Equal(t, 1, v) 229 } 230 231 tc.Unref() 232 v = tc.refs.Load() 233 if v != 0 { 234 require.Equal(t, 0, v) 235 } 236 237 defer func() { 238 if r := recover(); r != nil { 239 if fmt.Sprint(r) != "pebble: inconsistent reference count: -1" { 240 t.Fatalf("unexpected panic message") 241 } 242 } else if r == nil { 243 t.Fatalf("expected panic") 244 } 245 }() 246 tc.Unref() 247 } 248 249 // Basic test to determine if reads through the table cache are wired correctly. 250 func TestVirtualReadsWiring(t *testing.T) { 251 var d *DB 252 var err error 253 d, err = Open("", 254 &Options{ 255 FS: vfs.NewMem(), 256 FormatMajorVersion: internalFormatNewest, 257 Comparer: testkeys.Comparer, 258 // Compactions which conflict with virtual sstable creation can be 259 // picked by Pebble. We disable that. 260 DisableAutomaticCompactions: true, 261 }) 262 require.NoError(t, err) 263 defer d.Close() 264 265 b := newBatch(d) 266 // Some combination of sets, range deletes, and range key sets/unsets, so 267 // all of the table cache iterator functions are utilized. 268 require.NoError(t, b.Set([]byte{'a'}, []byte{'a'}, nil)) 269 require.NoError(t, b.Set([]byte{'d'}, []byte{'d'}, nil)) 270 require.NoError(t, b.DeleteRange([]byte{'c'}, []byte{'e'}, nil)) 271 require.NoError(t, b.Set([]byte{'f'}, []byte{'f'}, nil)) 272 require.NoError(t, b.RangeKeySet([]byte{'f'}, []byte{'k'}, nil, []byte{'c'}, nil)) 273 require.NoError(t, b.RangeKeyUnset([]byte{'j'}, []byte{'k'}, nil, nil)) 274 require.NoError(t, b.Set([]byte{'z'}, []byte{'z'}, nil)) 275 require.NoError(t, d.Apply(b, nil)) 276 require.NoError(t, d.Flush()) 277 require.NoError(t, d.Compact([]byte{'a'}, []byte{'b'}, false)) 278 require.Equal(t, 1, int(d.Metrics().Levels[6].NumFiles)) 279 280 d.mu.Lock() 281 282 // Virtualize the single sstable in the lsm. 283 284 currVersion := d.mu.versions.currentVersion() 285 l6 := currVersion.Levels[6] 286 l6FileIter := l6.Iter() 287 parentFile := l6FileIter.First() 288 f1 := d.mu.versions.nextFileNum 289 f2 := f1 + 1 290 d.mu.versions.nextFileNum += 2 291 292 v1 := &manifest.FileMetadata{ 293 FileBacking: parentFile.FileBacking, 294 FileNum: f1, 295 CreationTime: time.Now().Unix(), 296 Size: parentFile.Size / 2, 297 SmallestSeqNum: parentFile.SmallestSeqNum, 298 LargestSeqNum: parentFile.LargestSeqNum, 299 Smallest: base.MakeInternalKey([]byte{'a'}, parentFile.Smallest.SeqNum(), InternalKeyKindSet), 300 Largest: base.MakeInternalKey([]byte{'a'}, parentFile.Smallest.SeqNum(), InternalKeyKindSet), 301 HasPointKeys: true, 302 Virtual: true, 303 } 304 v1.Stats.NumEntries = 1 305 306 v2 := &manifest.FileMetadata{ 307 FileBacking: parentFile.FileBacking, 308 FileNum: f2, 309 CreationTime: time.Now().Unix(), 310 Size: parentFile.Size / 2, 311 SmallestSeqNum: parentFile.SmallestSeqNum, 312 LargestSeqNum: parentFile.LargestSeqNum, 313 Smallest: base.MakeInternalKey([]byte{'d'}, parentFile.Smallest.SeqNum()+1, InternalKeyKindSet), 314 Largest: base.MakeInternalKey([]byte{'z'}, parentFile.Largest.SeqNum(), InternalKeyKindSet), 315 HasPointKeys: true, 316 Virtual: true, 317 } 318 v2.Stats.NumEntries = 6 319 320 v1.LargestPointKey = v1.Largest 321 v1.SmallestPointKey = v1.Smallest 322 323 v2.LargestPointKey = v2.Largest 324 v2.SmallestPointKey = v2.Smallest 325 326 v1.ValidateVirtual(parentFile) 327 d.checkVirtualBounds(v1) 328 v2.ValidateVirtual(parentFile) 329 d.checkVirtualBounds(v2) 330 331 // Write the version edit. 332 fileMetrics := func(ve *versionEdit) map[int]*LevelMetrics { 333 metrics := newFileMetrics(ve.NewFiles) 334 for de, f := range ve.DeletedFiles { 335 lm := metrics[de.Level] 336 if lm == nil { 337 lm = &LevelMetrics{} 338 metrics[de.Level] = lm 339 } 340 metrics[de.Level].NumFiles-- 341 metrics[de.Level].Size -= int64(f.Size) 342 } 343 return metrics 344 } 345 346 applyVE := func(ve *versionEdit) error { 347 d.mu.versions.logLock() 348 jobID := d.mu.nextJobID 349 d.mu.nextJobID++ 350 351 err := d.mu.versions.logAndApply(jobID, ve, fileMetrics(ve), false, func() []compactionInfo { 352 return d.getInProgressCompactionInfoLocked(nil) 353 }) 354 d.updateReadStateLocked(nil) 355 return err 356 } 357 358 ve := manifest.VersionEdit{} 359 d1 := manifest.DeletedFileEntry{Level: 6, FileNum: parentFile.FileNum} 360 n1 := manifest.NewFileEntry{Level: 6, Meta: v1} 361 n2 := manifest.NewFileEntry{Level: 6, Meta: v2} 362 363 ve.DeletedFiles = make(map[manifest.DeletedFileEntry]*manifest.FileMetadata) 364 ve.DeletedFiles[d1] = parentFile 365 ve.NewFiles = append(ve.NewFiles, n1) 366 ve.NewFiles = append(ve.NewFiles, n2) 367 ve.CreatedBackingTables = append(ve.CreatedBackingTables, parentFile.FileBacking) 368 369 require.NoError(t, applyVE(&ve)) 370 371 currVersion = d.mu.versions.currentVersion() 372 l6 = currVersion.Levels[6] 373 l6FileIter = l6.Iter() 374 for f := l6FileIter.First(); f != nil; f = l6FileIter.Next() { 375 require.Equal(t, true, f.Virtual) 376 } 377 d.mu.Unlock() 378 379 // Confirm that there were only 2 virtual sstables in L6. 380 require.Equal(t, 2, int(d.Metrics().Levels[6].NumFiles)) 381 382 // These reads will go through the table cache. 383 iter, _ := d.NewIter(nil) 384 expected := []byte{'a', 'f', 'z'} 385 for i, x := 0, iter.First(); x; i, x = i+1, iter.Next() { 386 require.Equal(t, []byte{expected[i]}, iter.Value()) 387 } 388 iter.Close() 389 } 390 391 // The table cache shouldn't be usable after all the dbs close. 392 func TestSharedTableCacheUseAfterAllFree(t *testing.T) { 393 tc := newTableCacheTest(8<<20, 10, 1) 394 db1, err := Open("test", 395 &Options{ 396 FS: vfs.NewMem(), 397 Cache: tc.cache, 398 TableCache: tc, 399 }) 400 require.NoError(t, err) 401 402 // Release our reference, now that the db has a reference. 403 tc.Unref() 404 405 db2, err := Open("test", 406 &Options{ 407 FS: vfs.NewMem(), 408 Cache: tc.cache, 409 TableCache: tc, 410 }) 411 require.NoError(t, err) 412 413 require.NoError(t, db1.Close()) 414 require.NoError(t, db2.Close()) 415 416 v := tc.refs.Load() 417 if v != 0 { 418 t.Fatalf("expected reference count %d, got %d", 0, v) 419 } 420 421 defer func() { 422 // The cache ref gets incremented before the panic, so we should 423 // decrement it to prevent the finalizer from detecting a leak. 424 tc.cache.Unref() 425 426 if r := recover(); r != nil { 427 if fmt.Sprint(r) != "pebble: inconsistent reference count: 1" { 428 t.Fatalf("unexpected panic message") 429 } 430 } else if r == nil { 431 t.Fatalf("expected panic") 432 } 433 }() 434 435 db3, _ := Open("test", 436 &Options{ 437 FS: vfs.NewMem(), 438 Cache: tc.cache, 439 TableCache: tc, 440 }) 441 _ = db3 442 } 443 444 // Test whether a shared table cache is usable by a db, after 445 // one of the db's releases its reference. 446 func TestSharedTableCacheUseAfterOneFree(t *testing.T) { 447 tc := newTableCacheTest(8<<20, 10, 1) 448 db1, err := Open("test", 449 &Options{ 450 FS: vfs.NewMem(), 451 Cache: tc.cache, 452 TableCache: tc, 453 }) 454 require.NoError(t, err) 455 456 // Release our reference, now that the db has a reference. 457 tc.Unref() 458 459 db2, err := Open("test", 460 &Options{ 461 FS: vfs.NewMem(), 462 Cache: tc.cache, 463 TableCache: tc, 464 }) 465 require.NoError(t, err) 466 defer func() { 467 require.NoError(t, db2.Close()) 468 }() 469 470 // Make db1 release a reference to the cache. It should 471 // still be usable by db2. 472 require.NoError(t, db1.Close()) 473 v := tc.refs.Load() 474 if v != 1 { 475 t.Fatalf("expected reference count %d, got %d", 1, v) 476 } 477 478 // Check if db2 is still usable. 479 start := []byte("a") 480 end := []byte("d") 481 require.NoError(t, db2.Set(start, nil, nil)) 482 require.NoError(t, db2.Flush()) 483 require.NoError(t, db2.DeleteRange(start, end, nil)) 484 require.NoError(t, db2.Compact(start, end, false)) 485 } 486 487 // A basic test which makes sure that a shared table cache is usable 488 // by more than one database at once. 489 func TestSharedTableCacheUsable(t *testing.T) { 490 tc := newTableCacheTest(8<<20, 10, 1) 491 db1, err := Open("test", 492 &Options{ 493 FS: vfs.NewMem(), 494 Cache: tc.cache, 495 TableCache: tc, 496 }) 497 require.NoError(t, err) 498 499 // Release our reference, now that the db has a reference. 500 tc.Unref() 501 502 defer func() { 503 require.NoError(t, db1.Close()) 504 }() 505 506 db2, err := Open("test", 507 &Options{ 508 FS: vfs.NewMem(), 509 Cache: tc.cache, 510 TableCache: tc, 511 }) 512 require.NoError(t, err) 513 defer func() { 514 require.NoError(t, db2.Close()) 515 }() 516 517 start := []byte("a") 518 end := []byte("z") 519 require.NoError(t, db1.Set(start, nil, nil)) 520 require.NoError(t, db1.Flush()) 521 require.NoError(t, db1.DeleteRange(start, end, nil)) 522 require.NoError(t, db1.Compact(start, end, false)) 523 524 start = []byte("x") 525 end = []byte("y") 526 require.NoError(t, db2.Set(start, nil, nil)) 527 require.NoError(t, db2.Flush()) 528 require.NoError(t, db2.Set(start, []byte{'a'}, nil)) 529 require.NoError(t, db2.Flush()) 530 require.NoError(t, db2.DeleteRange(start, end, nil)) 531 require.NoError(t, db2.Compact(start, end, false)) 532 } 533 534 func TestSharedTableConcurrent(t *testing.T) { 535 tc := newTableCacheTest(8<<20, 10, 1) 536 db1, err := Open("test", 537 &Options{ 538 FS: vfs.NewMem(), 539 Cache: tc.cache, 540 TableCache: tc, 541 }) 542 require.NoError(t, err) 543 544 // Release our reference, now that the db has a reference. 545 tc.Unref() 546 547 defer func() { 548 require.NoError(t, db1.Close()) 549 }() 550 551 db2, err := Open("test", 552 &Options{ 553 FS: vfs.NewMem(), 554 Cache: tc.cache, 555 TableCache: tc, 556 }) 557 require.NoError(t, err) 558 defer func() { 559 require.NoError(t, db2.Close()) 560 }() 561 562 var wg sync.WaitGroup 563 wg.Add(2) 564 565 // Now that both dbs have a reference to the table cache, 566 // we'll run go routines which will use the DBs concurrently. 567 concFunc := func(db *DB) { 568 for i := 0; i < 1000; i++ { 569 start := []byte("a") 570 end := []byte("z") 571 require.NoError(t, db.Set(start, nil, nil)) 572 require.NoError(t, db.Flush()) 573 require.NoError(t, db.DeleteRange(start, end, nil)) 574 require.NoError(t, db.Compact(start, end, false)) 575 } 576 wg.Done() 577 } 578 579 go concFunc(db1) 580 go concFunc(db2) 581 582 wg.Wait() 583 } 584 585 func testTableCacheRandomAccess(t *testing.T, concurrent bool) { 586 const N = 2000 587 c, fs, err := newTableCacheContainerTest(nil, "") 588 require.NoError(t, err) 589 590 rngMu := sync.Mutex{} 591 rng := rand.New(rand.NewSource(1)) 592 593 errc := make(chan error, N) 594 for i := 0; i < N; i++ { 595 go func(i int) { 596 rngMu.Lock() 597 fileNum, sleepTime := rng.Intn(tableCacheTestNumTables), rng.Intn(1000) 598 rngMu.Unlock() 599 m := &fileMetadata{FileNum: FileNum(fileNum)} 600 m.InitPhysicalBacking() 601 m.Ref() 602 defer m.Unref() 603 iter, _, err := c.newIters(context.Background(), m, nil, internalIterOpts{}) 604 if err != nil { 605 errc <- errors.Errorf("i=%d, fileNum=%d: find: %v", i, fileNum, err) 606 return 607 } 608 key, value := iter.SeekGE([]byte("k"), base.SeekGEFlagsNone) 609 if concurrent { 610 time.Sleep(time.Duration(sleepTime) * time.Microsecond) 611 } 612 if key == nil { 613 errc <- errors.Errorf("i=%d, fileNum=%d: valid.0: got false, want true", i, fileNum) 614 return 615 } 616 v, _, err := value.Value(nil) 617 if err != nil { 618 errc <- errors.Errorf("i=%d, fileNum=%d: err extracting value: %v", err) 619 } 620 if got := len(v); got != fileNum { 621 errc <- errors.Errorf("i=%d, fileNum=%d: value: got %d bytes, want %d", i, fileNum, got, fileNum) 622 return 623 } 624 if key, _ := iter.Next(); key != nil { 625 errc <- errors.Errorf("i=%d, fileNum=%d: next.1: got true, want false", i, fileNum) 626 return 627 } 628 if err := iter.Close(); err != nil { 629 errc <- errors.Wrapf(err, "close error i=%d, fileNum=%dv", i, fileNum) 630 return 631 } 632 errc <- nil 633 }(i) 634 if !concurrent { 635 require.NoError(t, <-errc) 636 } 637 } 638 if concurrent { 639 for i := 0; i < N; i++ { 640 require.NoError(t, <-errc) 641 } 642 } 643 fs.validate(t, c, nil) 644 } 645 646 func TestTableCacheRandomAccessSequential(t *testing.T) { testTableCacheRandomAccess(t, false) } 647 func TestTableCacheRandomAccessConcurrent(t *testing.T) { testTableCacheRandomAccess(t, true) } 648 649 func testTableCacheFrequentlyUsedInternal(t *testing.T, rangeIter bool) { 650 const ( 651 N = 1000 652 pinned0 = 7 653 pinned1 = 11 654 ) 655 c, fs, err := newTableCacheContainerTest(nil, "") 656 require.NoError(t, err) 657 658 for i := 0; i < N; i++ { 659 for _, j := range [...]int{pinned0, i % tableCacheTestNumTables, pinned1} { 660 var iter io.Closer 661 var err error 662 m := &fileMetadata{FileNum: FileNum(j)} 663 m.InitPhysicalBacking() 664 m.Ref() 665 if rangeIter { 666 iter, err = c.newRangeKeyIter(m, keyspan.SpanIterOptions{}) 667 } else { 668 iter, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}) 669 } 670 if err != nil { 671 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 672 } 673 if err := iter.Close(); err != nil { 674 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 675 } 676 } 677 } 678 679 fs.validate(t, c, func(i, gotO, gotC int) error { 680 if i == pinned0 || i == pinned1 { 681 if gotO != 1 || gotC != 0 { 682 return errors.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 683 } 684 } 685 return nil 686 }) 687 } 688 689 func TestTableCacheFrequentlyUsed(t *testing.T) { 690 for i, iterType := range []string{"point", "range"} { 691 t.Run(fmt.Sprintf("iter=%s", iterType), func(t *testing.T) { 692 testTableCacheFrequentlyUsedInternal(t, i == 1) 693 }) 694 } 695 } 696 697 func TestSharedTableCacheFrequentlyUsed(t *testing.T) { 698 const ( 699 N = 1000 700 pinned0 = 7 701 pinned1 = 11 702 ) 703 tc := newTableCacheTest(8<<20, 2*tableCacheTestCacheSize, 16) 704 c1, fs1, err := newTableCacheContainerTest(tc, "") 705 require.NoError(t, err) 706 c2, fs2, err := newTableCacheContainerTest(tc, "") 707 require.NoError(t, err) 708 tc.Unref() 709 710 for i := 0; i < N; i++ { 711 for _, j := range [...]int{pinned0, i % tableCacheTestNumTables, pinned1} { 712 m := &fileMetadata{FileNum: FileNum(j)} 713 m.InitPhysicalBacking() 714 m.Ref() 715 iter1, _, err := c1.newIters(context.Background(), m, nil, internalIterOpts{}) 716 if err != nil { 717 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 718 } 719 iter2, _, err := c2.newIters(context.Background(), m, nil, internalIterOpts{}) 720 if err != nil { 721 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 722 } 723 724 if err := iter1.Close(); err != nil { 725 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 726 } 727 if err := iter2.Close(); err != nil { 728 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 729 } 730 } 731 } 732 733 fs1.validate(t, c1, func(i, gotO, gotC int) error { 734 if i == pinned0 || i == pinned1 { 735 if gotO != 1 || gotC != 0 { 736 return errors.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 737 } 738 } 739 return nil 740 }) 741 742 fs2.validate(t, c2, func(i, gotO, gotC int) error { 743 if i == pinned0 || i == pinned1 { 744 if gotO != 1 || gotC != 0 { 745 return errors.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 746 } 747 } 748 return nil 749 }) 750 } 751 752 func testTableCacheEvictionsInternal(t *testing.T, rangeIter bool) { 753 const ( 754 N = 1000 755 lo, hi = 10, 20 756 ) 757 c, fs, err := newTableCacheContainerTest(nil, "") 758 require.NoError(t, err) 759 760 rng := rand.New(rand.NewSource(2)) 761 for i := 0; i < N; i++ { 762 j := rng.Intn(tableCacheTestNumTables) 763 var iter io.Closer 764 var err error 765 m := &fileMetadata{FileNum: FileNum(j)} 766 m.InitPhysicalBacking() 767 m.Ref() 768 if rangeIter { 769 iter, err = c.newRangeKeyIter(m, keyspan.SpanIterOptions{}) 770 } else { 771 iter, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}) 772 } 773 if err != nil { 774 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 775 } 776 if err := iter.Close(); err != nil { 777 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 778 } 779 780 c.evict(base.FileNum(lo + rng.Uint64n(hi-lo)).DiskFileNum()) 781 } 782 783 sumEvicted, nEvicted := 0, 0 784 sumSafe, nSafe := 0, 0 785 fs.validate(t, c, func(i, gotO, gotC int) error { 786 if lo <= i && i < hi { 787 sumEvicted += gotO 788 nEvicted++ 789 } else { 790 sumSafe += gotO 791 nSafe++ 792 } 793 return nil 794 }) 795 fEvicted := float64(sumEvicted) / float64(nEvicted) 796 fSafe := float64(sumSafe) / float64(nSafe) 797 // The magic 1.25 number isn't derived from formal modeling. It's just a guess. For 798 // (lo, hi, tableCacheTestCacheSize, tableCacheTestNumTables) = (10, 20, 100, 300), 799 // the ratio seems to converge on roughly 1.5 for large N, compared to 1.0 if we do 800 // not evict any cache entries. 801 if ratio := fEvicted / fSafe; ratio < 1.25 { 802 t.Errorf("evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 803 fEvicted, fSafe, ratio) 804 } 805 } 806 807 func TestTableCacheEvictions(t *testing.T) { 808 for i, iterType := range []string{"point", "range"} { 809 t.Run(fmt.Sprintf("iter=%s", iterType), func(t *testing.T) { 810 testTableCacheEvictionsInternal(t, i == 1) 811 }) 812 } 813 } 814 815 func TestSharedTableCacheEvictions(t *testing.T) { 816 const ( 817 N = 1000 818 lo, hi = 10, 20 819 ) 820 tc := newTableCacheTest(8<<20, 2*tableCacheTestCacheSize, 16) 821 c1, fs1, err := newTableCacheContainerTest(tc, "") 822 require.NoError(t, err) 823 c2, fs2, err := newTableCacheContainerTest(tc, "") 824 require.NoError(t, err) 825 tc.Unref() 826 827 rng := rand.New(rand.NewSource(2)) 828 for i := 0; i < N; i++ { 829 j := rng.Intn(tableCacheTestNumTables) 830 m := &fileMetadata{FileNum: FileNum(j)} 831 m.InitPhysicalBacking() 832 m.Ref() 833 iter1, _, err := c1.newIters(context.Background(), m, nil, internalIterOpts{}) 834 if err != nil { 835 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 836 } 837 838 iter2, _, err := c2.newIters(context.Background(), m, nil, internalIterOpts{}) 839 if err != nil { 840 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 841 } 842 843 if err := iter1.Close(); err != nil { 844 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 845 } 846 847 if err := iter2.Close(); err != nil { 848 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 849 } 850 851 c1.evict(base.FileNum(lo + rng.Uint64n(hi-lo)).DiskFileNum()) 852 c2.evict(base.FileNum(lo + rng.Uint64n(hi-lo)).DiskFileNum()) 853 } 854 855 check := func(fs *tableCacheTestFS, c *tableCacheContainer) (float64, float64, float64) { 856 sumEvicted, nEvicted := 0, 0 857 sumSafe, nSafe := 0, 0 858 fs.validate(t, c, func(i, gotO, gotC int) error { 859 if lo <= i && i < hi { 860 sumEvicted += gotO 861 nEvicted++ 862 } else { 863 sumSafe += gotO 864 nSafe++ 865 } 866 return nil 867 }) 868 fEvicted := float64(sumEvicted) / float64(nEvicted) 869 fSafe := float64(sumSafe) / float64(nSafe) 870 871 return fEvicted, fSafe, fEvicted / fSafe 872 } 873 874 // The magic 1.25 number isn't derived from formal modeling. It's just a guess. For 875 // (lo, hi, tableCacheTestCacheSize, tableCacheTestNumTables) = (10, 20, 100, 300), 876 // the ratio seems to converge on roughly 1.5 for large N, compared to 1.0 if we do 877 // not evict any cache entries. 878 if fEvicted, fSafe, ratio := check(fs1, c1); ratio < 1.25 { 879 t.Errorf( 880 "evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 881 fEvicted, fSafe, ratio, 882 ) 883 } 884 885 if fEvicted, fSafe, ratio := check(fs2, c2); ratio < 1.25 { 886 t.Errorf( 887 "evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 888 fEvicted, fSafe, ratio, 889 ) 890 } 891 } 892 893 func TestTableCacheIterLeak(t *testing.T) { 894 c, _, err := newTableCacheContainerTest(nil, "") 895 require.NoError(t, err) 896 897 m := &fileMetadata{FileNum: 0} 898 m.InitPhysicalBacking() 899 m.Ref() 900 defer m.Unref() 901 iter, _, err := c.newIters(context.Background(), m, nil, internalIterOpts{}) 902 require.NoError(t, err) 903 904 if err := c.close(); err == nil { 905 t.Fatalf("expected failure, but found success") 906 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 907 t.Fatalf("expected leaked iterators, but found %+v", err) 908 } else { 909 t.Log(err.Error()) 910 } 911 require.NoError(t, iter.Close()) 912 } 913 914 func TestSharedTableCacheIterLeak(t *testing.T) { 915 tc := newTableCacheTest(8<<20, 2*tableCacheTestCacheSize, 16) 916 c1, _, err := newTableCacheContainerTest(tc, "") 917 require.NoError(t, err) 918 c2, _, err := newTableCacheContainerTest(tc, "") 919 require.NoError(t, err) 920 c3, _, err := newTableCacheContainerTest(tc, "") 921 require.NoError(t, err) 922 tc.Unref() 923 924 m := &fileMetadata{FileNum: 0} 925 m.InitPhysicalBacking() 926 m.Ref() 927 defer m.Unref() 928 iter, _, err := c1.newIters(context.Background(), m, nil, internalIterOpts{}) 929 require.NoError(t, err) 930 931 if err := c1.close(); err == nil { 932 t.Fatalf("expected failure, but found success") 933 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 934 t.Fatalf("expected leaked iterators, but found %+v", err) 935 } else { 936 t.Log(err.Error()) 937 } 938 939 // Closing c2 shouldn't error out since c2 isn't leaking any iterators. 940 require.NoError(t, c2.close()) 941 942 // Closing c3 should error out since c3 holds the last reference to 943 // the TableCache, and when the TableCache closes, it will detect 944 // that there was a leaked iterator. 945 if err := c3.close(); err == nil { 946 t.Fatalf("expected failure, but found success") 947 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 948 t.Fatalf("expected leaked iterators, but found %+v", err) 949 } else { 950 t.Log(err.Error()) 951 } 952 953 require.NoError(t, iter.Close()) 954 } 955 956 func TestTableCacheRetryAfterFailure(t *testing.T) { 957 // Test a retry can succeed after a failure, i.e., errors are not cached. 958 c, fs, err := newTableCacheContainerTest(nil, "") 959 require.NoError(t, err) 960 961 fs.setOpenError(true /* enabled */) 962 m := &fileMetadata{FileNum: 0} 963 m.InitPhysicalBacking() 964 m.Ref() 965 defer m.Unref() 966 if _, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}); err == nil { 967 t.Fatalf("expected failure, but found success") 968 } 969 require.Equal(t, "pebble: backing file 000000 error: injected error", err.Error()) 970 fs.setOpenError(false /* enabled */) 971 var iter internalIterator 972 iter, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}) 973 require.NoError(t, err) 974 require.NoError(t, iter.Close()) 975 fs.validate(t, c, nil) 976 } 977 978 // memFile is a file-like struct that buffers all data written to it in memory. 979 // Implements the objstorage.Writable interface. 980 type memFile struct { 981 buf bytes.Buffer 982 } 983 984 var _ objstorage.Writable = (*memFile)(nil) 985 986 // Finish is part of the objstorage.Writable interface. 987 func (*memFile) Finish() error { 988 return nil 989 } 990 991 // Abort is part of the objstorage.Writable interface. 992 func (*memFile) Abort() {} 993 994 // Write is part of the objstorage.Writable interface. 995 func (f *memFile) Write(p []byte) error { 996 _, err := f.buf.Write(p) 997 return err 998 } 999 1000 func TestTableCacheErrorBadMagicNumber(t *testing.T) { 1001 var file memFile 1002 tw := sstable.NewWriter(&file, sstable.WriterOptions{TableFormat: sstable.TableFormatPebblev2}) 1003 tw.Set([]byte("a"), nil) 1004 require.NoError(t, tw.Close()) 1005 buf := file.buf.Bytes() 1006 // Bad magic number. 1007 buf[len(buf)-1] = 0 1008 fs := &tableCacheTestFS{ 1009 FS: vfs.NewMem(), 1010 } 1011 const testFileNum = 3 1012 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(fs, "")) 1013 require.NoError(t, err) 1014 w, _, err := objProvider.Create(context.Background(), fileTypeTable, 1015 base.FileNum(testFileNum).DiskFileNum(), objstorage.CreateOptions{}) 1016 w.Write(buf) 1017 require.NoError(t, w.Finish()) 1018 opts := &Options{} 1019 opts.EnsureDefaults() 1020 opts.Cache = NewCache(8 << 20) // 8 MB 1021 defer opts.Cache.Unref() 1022 c := newTableCacheContainer(nil, opts.Cache.NewID(), objProvider, opts, tableCacheTestCacheSize) 1023 require.NoError(t, err) 1024 defer c.close() 1025 1026 m := &fileMetadata{FileNum: testFileNum} 1027 m.InitPhysicalBacking() 1028 m.Ref() 1029 defer m.Unref() 1030 if _, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}); err == nil { 1031 t.Fatalf("expected failure, but found success") 1032 } 1033 require.Equal(t, 1034 "pebble: backing file 000003 error: pebble/table: invalid table (bad magic number: 0xf09faab3f09faa00)", 1035 err.Error()) 1036 } 1037 1038 func TestTableCacheEvictClose(t *testing.T) { 1039 errs := make(chan error, 10) 1040 db, err := Open("test", 1041 &Options{ 1042 FS: vfs.NewMem(), 1043 EventListener: &EventListener{ 1044 TableDeleted: func(info TableDeleteInfo) { 1045 errs <- info.Err 1046 }, 1047 }, 1048 }) 1049 require.NoError(t, err) 1050 1051 start := []byte("a") 1052 end := []byte("z") 1053 require.NoError(t, db.Set(start, nil, nil)) 1054 require.NoError(t, db.Flush()) 1055 require.NoError(t, db.DeleteRange(start, end, nil)) 1056 require.NoError(t, db.Compact(start, end, false)) 1057 require.NoError(t, db.Close()) 1058 close(errs) 1059 1060 for err := range errs { 1061 require.NoError(t, err) 1062 } 1063 } 1064 1065 func TestTableCacheClockPro(t *testing.T) { 1066 // Test data was generated from the python code. See also 1067 // internal/cache/clockpro_test.go:TestCache. 1068 f, err := os.Open("internal/cache/testdata/cache") 1069 require.NoError(t, err) 1070 1071 mem := vfs.NewMem() 1072 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) 1073 require.NoError(t, err) 1074 defer objProvider.Close() 1075 1076 makeTable := func(dfn base.DiskFileNum) { 1077 require.NoError(t, err) 1078 f, _, err := objProvider.Create(context.Background(), fileTypeTable, dfn, objstorage.CreateOptions{}) 1079 require.NoError(t, err) 1080 w := sstable.NewWriter(f, sstable.WriterOptions{}) 1081 require.NoError(t, w.Set([]byte("a"), nil)) 1082 require.NoError(t, w.Close()) 1083 } 1084 1085 opts := &Options{ 1086 Cache: NewCache(8 << 20), // 8 MB 1087 } 1088 opts.EnsureDefaults() 1089 defer opts.Cache.Unref() 1090 1091 cache := &tableCacheShard{} 1092 // NB: The table cache size of 200 is required for the expected test values. 1093 cache.init(200) 1094 dbOpts := &tableCacheOpts{} 1095 dbOpts.loggerAndTracer = &base.LoggerWithNoopTracer{Logger: opts.Logger} 1096 dbOpts.cacheID = 0 1097 dbOpts.objProvider = objProvider 1098 dbOpts.opts = opts.MakeReaderOptions() 1099 1100 scanner := bufio.NewScanner(f) 1101 tables := make(map[int]bool) 1102 line := 1 1103 1104 for scanner.Scan() { 1105 fields := bytes.Fields(scanner.Bytes()) 1106 1107 key, err := strconv.Atoi(string(fields[0])) 1108 require.NoError(t, err) 1109 1110 // Ensure that underlying sstables exist on disk, creating each table the 1111 // first time it is seen. 1112 if !tables[key] { 1113 makeTable(base.FileNum(uint64(key)).DiskFileNum()) 1114 tables[key] = true 1115 } 1116 1117 oldHits := cache.hits.Load() 1118 m := &fileMetadata{FileNum: FileNum(key)} 1119 m.InitPhysicalBacking() 1120 m.Ref() 1121 v := cache.findNode(m, dbOpts) 1122 cache.unrefValue(v) 1123 1124 hit := cache.hits.Load() != oldHits 1125 wantHit := fields[1][0] == 'h' 1126 if hit != wantHit { 1127 t.Errorf("%d: cache hit mismatch: got %v, want %v\n", line, hit, wantHit) 1128 } 1129 line++ 1130 m.Unref() 1131 } 1132 } 1133 1134 func BenchmarkNewItersAlloc(b *testing.B) { 1135 opts := &Options{ 1136 FS: vfs.NewMem(), 1137 FormatMajorVersion: internalFormatNewest, 1138 } 1139 d, err := Open("", opts) 1140 require.NoError(b, err) 1141 defer func() { require.NoError(b, d.Close()) }() 1142 1143 require.NoError(b, d.Set([]byte{'a'}, []byte{'a'}, nil)) 1144 require.NoError(b, d.Flush()) 1145 require.NoError(b, d.Compact([]byte{'a'}, []byte{'z'}, false)) 1146 1147 d.mu.Lock() 1148 currVersion := d.mu.versions.currentVersion() 1149 it := currVersion.Levels[6].Iter() 1150 m := it.First() 1151 require.NotNil(b, m) 1152 d.mu.Unlock() 1153 1154 // Open once so that the Reader is cached. 1155 iter, _, err := d.newIters(context.Background(), m, nil, internalIterOpts{}) 1156 require.NoError(b, iter.Close()) 1157 require.NoError(b, err) 1158 1159 for i := 0; i < b.N; i++ { 1160 b.StartTimer() 1161 iter, _, err := d.newIters(context.Background(), m, nil, internalIterOpts{}) 1162 b.StopTimer() 1163 require.NoError(b, err) 1164 require.NoError(b, iter.Close()) 1165 } 1166 } 1167 1168 // TestTableCacheNoSuchFileError verifies that when the table cache hits a "no 1169 // such file" error, it generates a useful fatal message. 1170 func TestTableCacheNoSuchFileError(t *testing.T) { 1171 const dirname = "test" 1172 mem := vfs.NewMem() 1173 logger := &catchFatalLogger{} 1174 1175 d, err := Open(dirname, &Options{ 1176 FS: mem, 1177 Logger: logger, 1178 }) 1179 require.NoError(t, err) 1180 defer func() { _ = d.Close() }() 1181 require.NoError(t, d.Set([]byte("a"), []byte("val_a"), nil)) 1182 require.NoError(t, d.Set([]byte("b"), []byte("val_b"), nil)) 1183 require.NoError(t, d.Flush()) 1184 ls, err := mem.List(dirname) 1185 require.NoError(t, err) 1186 1187 // Find the sst file. 1188 var sst string 1189 for _, file := range ls { 1190 if strings.HasSuffix(file, ".sst") { 1191 if sst != "" { 1192 t.Fatalf("multiple SSTs found: %s, %s", sst, file) 1193 } 1194 sst = file 1195 } 1196 } 1197 if sst == "" { 1198 t.Fatalf("no SST found after flush") 1199 } 1200 require.NoError(t, mem.Remove(path.Join(dirname, sst))) 1201 1202 _, _, _ = d.Get([]byte("a")) 1203 require.NotZero(t, len(logger.fatalMsgs), "no fatal message emitted") 1204 require.Equal(t, 1, len(logger.fatalMsgs), "expected one fatal message; got: %v", logger.fatalMsgs) 1205 require.Contains(t, logger.fatalMsgs[0], "directory contains 6 files, 0 unknown, 0 tables, 2 logs, 1 manifests") 1206 } 1207 1208 func BenchmarkTableCacheHotPath(b *testing.B) { 1209 mem := vfs.NewMem() 1210 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) 1211 require.NoError(b, err) 1212 defer objProvider.Close() 1213 1214 makeTable := func(dfn base.DiskFileNum) { 1215 require.NoError(b, err) 1216 f, _, err := objProvider.Create(context.Background(), fileTypeTable, dfn, objstorage.CreateOptions{}) 1217 require.NoError(b, err) 1218 w := sstable.NewWriter(f, sstable.WriterOptions{}) 1219 require.NoError(b, w.Set([]byte("a"), nil)) 1220 require.NoError(b, w.Close()) 1221 } 1222 1223 opts := &Options{ 1224 Cache: NewCache(8 << 20), // 8 MB 1225 } 1226 opts.EnsureDefaults() 1227 defer opts.Cache.Unref() 1228 1229 cache := &tableCacheShard{} 1230 cache.init(2) 1231 dbOpts := &tableCacheOpts{} 1232 dbOpts.loggerAndTracer = &base.LoggerWithNoopTracer{Logger: opts.Logger} 1233 dbOpts.cacheID = 0 1234 dbOpts.objProvider = objProvider 1235 dbOpts.opts = opts.MakeReaderOptions() 1236 1237 makeTable(base.FileNum(1).DiskFileNum()) 1238 1239 m := &fileMetadata{FileNum: 1} 1240 m.InitPhysicalBacking() 1241 m.Ref() 1242 1243 b.ResetTimer() 1244 for i := 0; i < b.N; i++ { 1245 v := cache.findNode(m, dbOpts) 1246 cache.unrefValue(v) 1247 } 1248 } 1249 1250 type catchFatalLogger struct { 1251 fatalMsgs []string 1252 } 1253 1254 var _ Logger = (*catchFatalLogger)(nil) 1255 1256 func (tl *catchFatalLogger) Infof(format string, args ...interface{}) {} 1257 1258 func (tl *catchFatalLogger) Fatalf(format string, args ...interface{}) { 1259 tl.fatalMsgs = append(tl.fatalMsgs, fmt.Sprintf(format, args...)) 1260 }