github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/table_cache_test.go (about) 1 // Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bufio" 9 "bytes" 10 "context" 11 "fmt" 12 "io" 13 "os" 14 "path" 15 "strconv" 16 "strings" 17 "sync" 18 "testing" 19 "time" 20 21 "github.com/cockroachdb/errors" 22 "github.com/cockroachdb/pebble/internal/base" 23 "github.com/cockroachdb/pebble/internal/keyspan" 24 "github.com/cockroachdb/pebble/internal/manifest" 25 "github.com/cockroachdb/pebble/internal/testkeys" 26 "github.com/cockroachdb/pebble/objstorage" 27 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 28 "github.com/cockroachdb/pebble/sstable" 29 "github.com/cockroachdb/pebble/vfs" 30 "github.com/stretchr/testify/require" 31 "golang.org/x/exp/rand" 32 ) 33 34 type tableCacheTestFile struct { 35 vfs.File 36 fs *tableCacheTestFS 37 name string 38 } 39 40 func (f *tableCacheTestFile) Close() error { 41 f.fs.mu.Lock() 42 if f.fs.closeCounts != nil { 43 f.fs.closeCounts[f.name]++ 44 } 45 f.fs.mu.Unlock() 46 return f.File.Close() 47 } 48 49 type tableCacheTestFS struct { 50 vfs.FS 51 52 mu sync.Mutex 53 openCounts map[string]int 54 closeCounts map[string]int 55 openErrorEnabled bool 56 } 57 58 func (fs *tableCacheTestFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) { 59 fs.mu.Lock() 60 if fs.openErrorEnabled { 61 fs.mu.Unlock() 62 return nil, errors.New("injected error") 63 } 64 if fs.openCounts != nil { 65 fs.openCounts[name]++ 66 } 67 fs.mu.Unlock() 68 f, err := fs.FS.Open(name, opts...) 69 if len(opts) < 1 || opts[0] != vfs.RandomReadsOption { 70 return nil, errors.Errorf("sstable file %s not opened with random reads option", name) 71 } 72 if err != nil { 73 return nil, err 74 } 75 return &tableCacheTestFile{f, fs, name}, nil 76 } 77 78 func (fs *tableCacheTestFS) validate( 79 t *testing.T, c *tableCacheContainer, f func(i, gotO, gotC int) error, 80 ) { 81 if err := fs.validateOpenTables(f); err != nil { 82 t.Error(err) 83 return 84 } 85 c.close() 86 if err := fs.validateNoneStillOpen(); err != nil { 87 t.Error(err) 88 return 89 } 90 } 91 92 func (fs *tableCacheTestFS) setOpenError(enabled bool) { 93 fs.mu.Lock() 94 defer fs.mu.Unlock() 95 fs.openErrorEnabled = enabled 96 } 97 98 // validateOpenTables validates that no tables in the cache are open twice, and 99 // the number still open is no greater than tableCacheTestCacheSize. 100 func (fs *tableCacheTestFS) validateOpenTables(f func(i, gotO, gotC int) error) error { 101 // try backs off to let any clean-up goroutines do their work. 102 return try(100*time.Microsecond, 20*time.Second, func() error { 103 fs.mu.Lock() 104 defer fs.mu.Unlock() 105 106 numStillOpen := 0 107 for i := 0; i < tableCacheTestNumTables; i++ { 108 filename := base.MakeFilepath(fs, "", fileTypeTable, base.FileNum(uint64(i)).DiskFileNum()) 109 gotO, gotC := fs.openCounts[filename], fs.closeCounts[filename] 110 if gotO > gotC { 111 numStillOpen++ 112 } 113 if gotC != gotO && gotC != gotO-1 { 114 return errors.Errorf("i=%d: table closed too many or too few times: opened %d times, closed %d times", 115 i, gotO, gotC) 116 } 117 if f != nil { 118 if err := f(i, gotO, gotC); err != nil { 119 return err 120 } 121 } 122 } 123 if numStillOpen > tableCacheTestCacheSize { 124 return errors.Errorf("numStillOpen is %d, want <= %d", numStillOpen, tableCacheTestCacheSize) 125 } 126 return nil 127 }) 128 } 129 130 // validateNoneStillOpen validates that no tables in the cache are open. 131 func (fs *tableCacheTestFS) validateNoneStillOpen() error { 132 // try backs off to let any clean-up goroutines do their work. 133 return try(100*time.Microsecond, 20*time.Second, func() error { 134 fs.mu.Lock() 135 defer fs.mu.Unlock() 136 137 for i := 0; i < tableCacheTestNumTables; i++ { 138 filename := base.MakeFilepath(fs, "", fileTypeTable, base.FileNum(uint64(i)).DiskFileNum()) 139 gotO, gotC := fs.openCounts[filename], fs.closeCounts[filename] 140 if gotO != gotC { 141 return errors.Errorf("i=%d: opened %d times, closed %d times", i, gotO, gotC) 142 } 143 } 144 return nil 145 }) 146 } 147 148 const ( 149 tableCacheTestNumTables = 300 150 tableCacheTestCacheSize = 100 151 ) 152 153 // newTableCacheTest returns a shareable table cache to be used for tests. 154 // It is the caller's responsibility to unref the table cache. 155 func newTableCacheTest(size int64, tableCacheSize int, numShards int) *TableCache { 156 cache := NewCache(size) 157 defer cache.Unref() 158 return NewTableCache(cache, numShards, tableCacheSize) 159 } 160 161 func newTableCacheContainerTest( 162 tc *TableCache, dirname string, 163 ) (*tableCacheContainer, *tableCacheTestFS, error) { 164 xxx := bytes.Repeat([]byte("x"), tableCacheTestNumTables) 165 fs := &tableCacheTestFS{ 166 FS: vfs.NewMem(), 167 } 168 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(fs, dirname)) 169 if err != nil { 170 return nil, nil, err 171 } 172 defer objProvider.Close() 173 174 for i := 0; i < tableCacheTestNumTables; i++ { 175 w, _, err := objProvider.Create(context.Background(), fileTypeTable, base.FileNum(uint64(i)).DiskFileNum(), objstorage.CreateOptions{}) 176 if err != nil { 177 return nil, nil, errors.Wrap(err, "fs.Create") 178 } 179 tw := sstable.NewWriter(w, sstable.WriterOptions{TableFormat: sstable.TableFormatPebblev2}) 180 ik := base.ParseInternalKey(fmt.Sprintf("k.SET.%d", i)) 181 if err := tw.Add(ik, xxx[:i]); err != nil { 182 return nil, nil, errors.Wrap(err, "tw.Set") 183 } 184 if err := tw.RangeKeySet([]byte("k"), []byte("l"), nil, xxx[:i]); err != nil { 185 return nil, nil, errors.Wrap(err, "tw.Set") 186 } 187 if err := tw.Close(); err != nil { 188 return nil, nil, errors.Wrap(err, "tw.Close") 189 } 190 } 191 192 fs.mu.Lock() 193 fs.openCounts = map[string]int{} 194 fs.closeCounts = map[string]int{} 195 fs.mu.Unlock() 196 197 opts := &Options{} 198 opts.EnsureDefaults() 199 if tc == nil { 200 opts.Cache = NewCache(8 << 20) // 8 MB 201 defer opts.Cache.Unref() 202 } else { 203 opts.Cache = tc.cache 204 } 205 206 c := newTableCacheContainer(tc, opts.Cache.NewID(), objProvider, opts, tableCacheTestCacheSize, 207 &sstable.CategoryStatsCollector{}) 208 return c, fs, nil 209 } 210 211 // Test basic reference counting for the table cache. 212 func TestTableCacheRefs(t *testing.T) { 213 tc := newTableCacheTest(8<<20, 10, 2) 214 215 v := tc.refs.Load() 216 if v != 1 { 217 require.Equal(t, 1, v) 218 } 219 220 tc.Ref() 221 v = tc.refs.Load() 222 if v != 2 { 223 require.Equal(t, 2, v) 224 } 225 226 tc.Unref() 227 v = tc.refs.Load() 228 if v != 1 { 229 require.Equal(t, 1, v) 230 } 231 232 tc.Unref() 233 v = tc.refs.Load() 234 if v != 0 { 235 require.Equal(t, 0, v) 236 } 237 238 defer func() { 239 if r := recover(); r != nil { 240 if fmt.Sprint(r) != "pebble: inconsistent reference count: -1" { 241 t.Fatalf("unexpected panic message") 242 } 243 } else if r == nil { 244 t.Fatalf("expected panic") 245 } 246 }() 247 tc.Unref() 248 } 249 250 // Basic test to determine if reads through the table cache are wired correctly. 251 func TestVirtualReadsWiring(t *testing.T) { 252 var d *DB 253 var err error 254 d, err = Open("", 255 &Options{ 256 FS: vfs.NewMem(), 257 FormatMajorVersion: internalFormatNewest, 258 Comparer: testkeys.Comparer, 259 // Compactions which conflict with virtual sstable creation can be 260 // picked by Pebble. We disable that. 261 DisableAutomaticCompactions: true, 262 }) 263 require.NoError(t, err) 264 defer d.Close() 265 266 b := newBatch(d) 267 // Some combination of sets, range deletes, and range key sets/unsets, so 268 // all of the table cache iterator functions are utilized. 269 require.NoError(t, b.Set([]byte{'a'}, []byte{'a'}, nil)) 270 require.NoError(t, b.Set([]byte{'d'}, []byte{'d'}, nil)) 271 require.NoError(t, b.DeleteRange([]byte{'c'}, []byte{'e'}, nil)) 272 require.NoError(t, b.Set([]byte{'f'}, []byte{'f'}, nil)) 273 require.NoError(t, b.RangeKeySet([]byte{'f'}, []byte{'k'}, nil, []byte{'c'}, nil)) 274 require.NoError(t, b.RangeKeyUnset([]byte{'j'}, []byte{'k'}, nil, nil)) 275 require.NoError(t, b.Set([]byte{'z'}, []byte{'z'}, nil)) 276 require.NoError(t, d.Apply(b, nil)) 277 require.NoError(t, d.Flush()) 278 require.NoError(t, d.Compact([]byte{'a'}, []byte{'b'}, false)) 279 require.Equal(t, 1, int(d.Metrics().Levels[6].NumFiles)) 280 281 d.mu.Lock() 282 283 // Virtualize the single sstable in the lsm. 284 285 currVersion := d.mu.versions.currentVersion() 286 l6 := currVersion.Levels[6] 287 l6FileIter := l6.Iter() 288 parentFile := l6FileIter.First() 289 f1 := FileNum(d.mu.versions.nextFileNum) 290 f2 := f1 + 1 291 d.mu.versions.nextFileNum += 2 292 293 v1 := &manifest.FileMetadata{ 294 FileBacking: parentFile.FileBacking, 295 FileNum: f1, 296 CreationTime: time.Now().Unix(), 297 Size: parentFile.Size / 2, 298 SmallestSeqNum: parentFile.SmallestSeqNum, 299 LargestSeqNum: parentFile.LargestSeqNum, 300 Smallest: base.MakeInternalKey([]byte{'a'}, parentFile.Smallest.SeqNum(), InternalKeyKindSet), 301 Largest: base.MakeInternalKey([]byte{'a'}, parentFile.Smallest.SeqNum(), InternalKeyKindSet), 302 HasPointKeys: true, 303 Virtual: true, 304 } 305 v1.Stats.NumEntries = 1 306 307 v2 := &manifest.FileMetadata{ 308 FileBacking: parentFile.FileBacking, 309 FileNum: f2, 310 CreationTime: time.Now().Unix(), 311 Size: parentFile.Size / 2, 312 SmallestSeqNum: parentFile.SmallestSeqNum, 313 LargestSeqNum: parentFile.LargestSeqNum, 314 Smallest: base.MakeInternalKey([]byte{'d'}, parentFile.Smallest.SeqNum()+1, InternalKeyKindSet), 315 Largest: base.MakeInternalKey([]byte{'z'}, parentFile.Largest.SeqNum(), InternalKeyKindSet), 316 HasPointKeys: true, 317 Virtual: true, 318 } 319 v2.Stats.NumEntries = 6 320 321 v1.LargestPointKey = v1.Largest 322 v1.SmallestPointKey = v1.Smallest 323 324 v2.LargestPointKey = v2.Largest 325 v2.SmallestPointKey = v2.Smallest 326 327 v1.ValidateVirtual(parentFile) 328 d.checkVirtualBounds(v1) 329 v2.ValidateVirtual(parentFile) 330 d.checkVirtualBounds(v2) 331 332 // Write the version edit. 333 fileMetrics := func(ve *versionEdit) map[int]*LevelMetrics { 334 metrics := newFileMetrics(ve.NewFiles) 335 for de, f := range ve.DeletedFiles { 336 lm := metrics[de.Level] 337 if lm == nil { 338 lm = &LevelMetrics{} 339 metrics[de.Level] = lm 340 } 341 metrics[de.Level].NumFiles-- 342 metrics[de.Level].Size -= int64(f.Size) 343 } 344 return metrics 345 } 346 347 applyVE := func(ve *versionEdit) error { 348 d.mu.versions.logLock() 349 jobID := d.mu.nextJobID 350 d.mu.nextJobID++ 351 352 err := d.mu.versions.logAndApply(jobID, ve, fileMetrics(ve), false, func() []compactionInfo { 353 return d.getInProgressCompactionInfoLocked(nil) 354 }) 355 d.updateReadStateLocked(nil) 356 return err 357 } 358 359 ve := manifest.VersionEdit{} 360 d1 := manifest.DeletedFileEntry{Level: 6, FileNum: parentFile.FileNum} 361 n1 := manifest.NewFileEntry{Level: 6, Meta: v1} 362 n2 := manifest.NewFileEntry{Level: 6, Meta: v2} 363 364 ve.DeletedFiles = make(map[manifest.DeletedFileEntry]*manifest.FileMetadata) 365 ve.DeletedFiles[d1] = parentFile 366 ve.NewFiles = append(ve.NewFiles, n1) 367 ve.NewFiles = append(ve.NewFiles, n2) 368 ve.CreatedBackingTables = append(ve.CreatedBackingTables, parentFile.FileBacking) 369 370 require.NoError(t, applyVE(&ve)) 371 372 currVersion = d.mu.versions.currentVersion() 373 l6 = currVersion.Levels[6] 374 l6FileIter = l6.Iter() 375 for f := l6FileIter.First(); f != nil; f = l6FileIter.Next() { 376 require.Equal(t, true, f.Virtual) 377 } 378 d.mu.Unlock() 379 380 // Confirm that there were only 2 virtual sstables in L6. 381 require.Equal(t, 2, int(d.Metrics().Levels[6].NumFiles)) 382 383 // These reads will go through the table cache. 384 iter, _ := d.NewIter(nil) 385 expected := []byte{'a', 'f', 'z'} 386 for i, x := 0, iter.First(); x; i, x = i+1, iter.Next() { 387 require.Equal(t, []byte{expected[i]}, iter.Value()) 388 } 389 iter.Close() 390 } 391 392 // The table cache shouldn't be usable after all the dbs close. 393 func TestSharedTableCacheUseAfterAllFree(t *testing.T) { 394 tc := newTableCacheTest(8<<20, 10, 1) 395 db1, err := Open("test", 396 &Options{ 397 FS: vfs.NewMem(), 398 Cache: tc.cache, 399 TableCache: tc, 400 }) 401 require.NoError(t, err) 402 403 // Release our reference, now that the db has a reference. 404 tc.Unref() 405 406 db2, err := Open("test", 407 &Options{ 408 FS: vfs.NewMem(), 409 Cache: tc.cache, 410 TableCache: tc, 411 }) 412 require.NoError(t, err) 413 414 require.NoError(t, db1.Close()) 415 require.NoError(t, db2.Close()) 416 417 v := tc.refs.Load() 418 if v != 0 { 419 t.Fatalf("expected reference count %d, got %d", 0, v) 420 } 421 422 defer func() { 423 // The cache ref gets incremented before the panic, so we should 424 // decrement it to prevent the finalizer from detecting a leak. 425 tc.cache.Unref() 426 427 if r := recover(); r != nil { 428 if fmt.Sprint(r) != "pebble: inconsistent reference count: 1" { 429 t.Fatalf("unexpected panic message") 430 } 431 } else if r == nil { 432 t.Fatalf("expected panic") 433 } 434 }() 435 436 db3, _ := Open("test", 437 &Options{ 438 FS: vfs.NewMem(), 439 Cache: tc.cache, 440 TableCache: tc, 441 }) 442 _ = db3 443 } 444 445 // Test whether a shared table cache is usable by a db, after 446 // one of the db's releases its reference. 447 func TestSharedTableCacheUseAfterOneFree(t *testing.T) { 448 tc := newTableCacheTest(8<<20, 10, 1) 449 db1, err := Open("test", 450 &Options{ 451 FS: vfs.NewMem(), 452 Cache: tc.cache, 453 TableCache: tc, 454 }) 455 require.NoError(t, err) 456 457 // Release our reference, now that the db has a reference. 458 tc.Unref() 459 460 db2, err := Open("test", 461 &Options{ 462 FS: vfs.NewMem(), 463 Cache: tc.cache, 464 TableCache: tc, 465 }) 466 require.NoError(t, err) 467 defer func() { 468 require.NoError(t, db2.Close()) 469 }() 470 471 // Make db1 release a reference to the cache. It should 472 // still be usable by db2. 473 require.NoError(t, db1.Close()) 474 v := tc.refs.Load() 475 if v != 1 { 476 t.Fatalf("expected reference count %d, got %d", 1, v) 477 } 478 479 // Check if db2 is still usable. 480 start := []byte("a") 481 end := []byte("d") 482 require.NoError(t, db2.Set(start, nil, nil)) 483 require.NoError(t, db2.Flush()) 484 require.NoError(t, db2.DeleteRange(start, end, nil)) 485 require.NoError(t, db2.Compact(start, end, false)) 486 } 487 488 // A basic test which makes sure that a shared table cache is usable 489 // by more than one database at once. 490 func TestSharedTableCacheUsable(t *testing.T) { 491 tc := newTableCacheTest(8<<20, 10, 1) 492 db1, err := Open("test", 493 &Options{ 494 FS: vfs.NewMem(), 495 Cache: tc.cache, 496 TableCache: tc, 497 }) 498 require.NoError(t, err) 499 500 // Release our reference, now that the db has a reference. 501 tc.Unref() 502 503 defer func() { 504 require.NoError(t, db1.Close()) 505 }() 506 507 db2, err := Open("test", 508 &Options{ 509 FS: vfs.NewMem(), 510 Cache: tc.cache, 511 TableCache: tc, 512 }) 513 require.NoError(t, err) 514 defer func() { 515 require.NoError(t, db2.Close()) 516 }() 517 518 start := []byte("a") 519 end := []byte("z") 520 require.NoError(t, db1.Set(start, nil, nil)) 521 require.NoError(t, db1.Flush()) 522 require.NoError(t, db1.DeleteRange(start, end, nil)) 523 require.NoError(t, db1.Compact(start, end, false)) 524 525 start = []byte("x") 526 end = []byte("y") 527 require.NoError(t, db2.Set(start, nil, nil)) 528 require.NoError(t, db2.Flush()) 529 require.NoError(t, db2.Set(start, []byte{'a'}, nil)) 530 require.NoError(t, db2.Flush()) 531 require.NoError(t, db2.DeleteRange(start, end, nil)) 532 require.NoError(t, db2.Compact(start, end, false)) 533 } 534 535 func TestSharedTableConcurrent(t *testing.T) { 536 tc := newTableCacheTest(8<<20, 10, 1) 537 db1, err := Open("test", 538 &Options{ 539 FS: vfs.NewMem(), 540 Cache: tc.cache, 541 TableCache: tc, 542 }) 543 require.NoError(t, err) 544 545 // Release our reference, now that the db has a reference. 546 tc.Unref() 547 548 defer func() { 549 require.NoError(t, db1.Close()) 550 }() 551 552 db2, err := Open("test", 553 &Options{ 554 FS: vfs.NewMem(), 555 Cache: tc.cache, 556 TableCache: tc, 557 }) 558 require.NoError(t, err) 559 defer func() { 560 require.NoError(t, db2.Close()) 561 }() 562 563 var wg sync.WaitGroup 564 wg.Add(2) 565 566 // Now that both dbs have a reference to the table cache, 567 // we'll run go routines which will use the DBs concurrently. 568 concFunc := func(db *DB) { 569 for i := 0; i < 1000; i++ { 570 start := []byte("a") 571 end := []byte("z") 572 require.NoError(t, db.Set(start, nil, nil)) 573 require.NoError(t, db.Flush()) 574 require.NoError(t, db.DeleteRange(start, end, nil)) 575 require.NoError(t, db.Compact(start, end, false)) 576 } 577 wg.Done() 578 } 579 580 go concFunc(db1) 581 go concFunc(db2) 582 583 wg.Wait() 584 } 585 586 func testTableCacheRandomAccess(t *testing.T, concurrent bool) { 587 const N = 2000 588 c, fs, err := newTableCacheContainerTest(nil, "") 589 require.NoError(t, err) 590 591 rngMu := sync.Mutex{} 592 rng := rand.New(rand.NewSource(1)) 593 594 errc := make(chan error, N) 595 for i := 0; i < N; i++ { 596 go func(i int) { 597 rngMu.Lock() 598 fileNum, sleepTime := rng.Intn(tableCacheTestNumTables), rng.Intn(1000) 599 rngMu.Unlock() 600 m := &fileMetadata{FileNum: FileNum(fileNum)} 601 m.InitPhysicalBacking() 602 m.Ref() 603 defer m.Unref() 604 iter, _, err := c.newIters(context.Background(), m, nil, internalIterOpts{}) 605 if err != nil { 606 errc <- errors.Errorf("i=%d, fileNum=%d: find: %v", i, fileNum, err) 607 return 608 } 609 key, value := iter.SeekGE([]byte("k"), base.SeekGEFlagsNone) 610 if concurrent { 611 time.Sleep(time.Duration(sleepTime) * time.Microsecond) 612 } 613 if key == nil { 614 errc <- errors.Errorf("i=%d, fileNum=%d: valid.0: got false, want true", i, fileNum) 615 return 616 } 617 v, _, err := value.Value(nil) 618 if err != nil { 619 errc <- errors.Errorf("i=%d, fileNum=%d: err extracting value: %v", err) 620 } 621 if got := len(v); got != fileNum { 622 errc <- errors.Errorf("i=%d, fileNum=%d: value: got %d bytes, want %d", i, fileNum, got, fileNum) 623 return 624 } 625 if key, _ := iter.Next(); key != nil { 626 errc <- errors.Errorf("i=%d, fileNum=%d: next.1: got true, want false", i, fileNum) 627 return 628 } 629 if err := iter.Close(); err != nil { 630 errc <- errors.Wrapf(err, "close error i=%d, fileNum=%dv", i, fileNum) 631 return 632 } 633 errc <- nil 634 }(i) 635 if !concurrent { 636 require.NoError(t, <-errc) 637 } 638 } 639 if concurrent { 640 for i := 0; i < N; i++ { 641 require.NoError(t, <-errc) 642 } 643 } 644 fs.validate(t, c, nil) 645 } 646 647 func TestTableCacheRandomAccessSequential(t *testing.T) { testTableCacheRandomAccess(t, false) } 648 func TestTableCacheRandomAccessConcurrent(t *testing.T) { testTableCacheRandomAccess(t, true) } 649 650 func testTableCacheFrequentlyUsedInternal(t *testing.T, rangeIter bool) { 651 const ( 652 N = 1000 653 pinned0 = 7 654 pinned1 = 11 655 ) 656 c, fs, err := newTableCacheContainerTest(nil, "") 657 require.NoError(t, err) 658 659 for i := 0; i < N; i++ { 660 for _, j := range [...]int{pinned0, i % tableCacheTestNumTables, pinned1} { 661 var iter io.Closer 662 var err error 663 m := &fileMetadata{FileNum: FileNum(j)} 664 m.InitPhysicalBacking() 665 m.Ref() 666 if rangeIter { 667 iter, err = c.newRangeKeyIter(m, keyspan.SpanIterOptions{}) 668 } else { 669 iter, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}) 670 } 671 if err != nil { 672 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 673 } 674 if err := iter.Close(); err != nil { 675 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 676 } 677 } 678 } 679 680 fs.validate(t, c, func(i, gotO, gotC int) error { 681 if i == pinned0 || i == pinned1 { 682 if gotO != 1 || gotC != 0 { 683 return errors.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 684 } 685 } 686 return nil 687 }) 688 } 689 690 func TestTableCacheFrequentlyUsed(t *testing.T) { 691 for i, iterType := range []string{"point", "range"} { 692 t.Run(fmt.Sprintf("iter=%s", iterType), func(t *testing.T) { 693 testTableCacheFrequentlyUsedInternal(t, i == 1) 694 }) 695 } 696 } 697 698 func TestSharedTableCacheFrequentlyUsed(t *testing.T) { 699 const ( 700 N = 1000 701 pinned0 = 7 702 pinned1 = 11 703 ) 704 tc := newTableCacheTest(8<<20, 2*tableCacheTestCacheSize, 16) 705 c1, fs1, err := newTableCacheContainerTest(tc, "") 706 require.NoError(t, err) 707 c2, fs2, err := newTableCacheContainerTest(tc, "") 708 require.NoError(t, err) 709 tc.Unref() 710 711 for i := 0; i < N; i++ { 712 for _, j := range [...]int{pinned0, i % tableCacheTestNumTables, pinned1} { 713 m := &fileMetadata{FileNum: FileNum(j)} 714 m.InitPhysicalBacking() 715 m.Ref() 716 iter1, _, err := c1.newIters(context.Background(), m, nil, internalIterOpts{}) 717 if err != nil { 718 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 719 } 720 iter2, _, err := c2.newIters(context.Background(), m, nil, internalIterOpts{}) 721 if err != nil { 722 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 723 } 724 725 if err := iter1.Close(); err != nil { 726 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 727 } 728 if err := iter2.Close(); err != nil { 729 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 730 } 731 } 732 } 733 734 fs1.validate(t, c1, func(i, gotO, gotC int) error { 735 if i == pinned0 || i == pinned1 { 736 if gotO != 1 || gotC != 0 { 737 return errors.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 738 } 739 } 740 return nil 741 }) 742 743 fs2.validate(t, c2, func(i, gotO, gotC int) error { 744 if i == pinned0 || i == pinned1 { 745 if gotO != 1 || gotC != 0 { 746 return errors.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 747 } 748 } 749 return nil 750 }) 751 } 752 753 func testTableCacheEvictionsInternal(t *testing.T, rangeIter bool) { 754 const ( 755 N = 1000 756 lo, hi = 10, 20 757 ) 758 c, fs, err := newTableCacheContainerTest(nil, "") 759 require.NoError(t, err) 760 761 rng := rand.New(rand.NewSource(2)) 762 for i := 0; i < N; i++ { 763 j := rng.Intn(tableCacheTestNumTables) 764 var iter io.Closer 765 var err error 766 m := &fileMetadata{FileNum: FileNum(j)} 767 m.InitPhysicalBacking() 768 m.Ref() 769 if rangeIter { 770 iter, err = c.newRangeKeyIter(m, keyspan.SpanIterOptions{}) 771 } else { 772 iter, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}) 773 } 774 if err != nil { 775 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 776 } 777 if err := iter.Close(); err != nil { 778 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 779 } 780 781 c.evict(base.FileNum(lo + rng.Uint64n(hi-lo)).DiskFileNum()) 782 } 783 784 sumEvicted, nEvicted := 0, 0 785 sumSafe, nSafe := 0, 0 786 fs.validate(t, c, func(i, gotO, gotC int) error { 787 if lo <= i && i < hi { 788 sumEvicted += gotO 789 nEvicted++ 790 } else { 791 sumSafe += gotO 792 nSafe++ 793 } 794 return nil 795 }) 796 fEvicted := float64(sumEvicted) / float64(nEvicted) 797 fSafe := float64(sumSafe) / float64(nSafe) 798 // The magic 1.25 number isn't derived from formal modeling. It's just a guess. For 799 // (lo, hi, tableCacheTestCacheSize, tableCacheTestNumTables) = (10, 20, 100, 300), 800 // the ratio seems to converge on roughly 1.5 for large N, compared to 1.0 if we do 801 // not evict any cache entries. 802 if ratio := fEvicted / fSafe; ratio < 1.25 { 803 t.Errorf("evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 804 fEvicted, fSafe, ratio) 805 } 806 } 807 808 func TestTableCacheEvictions(t *testing.T) { 809 for i, iterType := range []string{"point", "range"} { 810 t.Run(fmt.Sprintf("iter=%s", iterType), func(t *testing.T) { 811 testTableCacheEvictionsInternal(t, i == 1) 812 }) 813 } 814 } 815 816 func TestSharedTableCacheEvictions(t *testing.T) { 817 const ( 818 N = 1000 819 lo, hi = 10, 20 820 ) 821 tc := newTableCacheTest(8<<20, 2*tableCacheTestCacheSize, 16) 822 c1, fs1, err := newTableCacheContainerTest(tc, "") 823 require.NoError(t, err) 824 c2, fs2, err := newTableCacheContainerTest(tc, "") 825 require.NoError(t, err) 826 tc.Unref() 827 828 rng := rand.New(rand.NewSource(2)) 829 for i := 0; i < N; i++ { 830 j := rng.Intn(tableCacheTestNumTables) 831 m := &fileMetadata{FileNum: FileNum(j)} 832 m.InitPhysicalBacking() 833 m.Ref() 834 iter1, _, err := c1.newIters(context.Background(), m, nil, internalIterOpts{}) 835 if err != nil { 836 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 837 } 838 839 iter2, _, err := c2.newIters(context.Background(), m, nil, internalIterOpts{}) 840 if err != nil { 841 t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 842 } 843 844 if err := iter1.Close(); err != nil { 845 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 846 } 847 848 if err := iter2.Close(); err != nil { 849 t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 850 } 851 852 c1.evict(base.FileNum(lo + rng.Uint64n(hi-lo)).DiskFileNum()) 853 c2.evict(base.FileNum(lo + rng.Uint64n(hi-lo)).DiskFileNum()) 854 } 855 856 check := func(fs *tableCacheTestFS, c *tableCacheContainer) (float64, float64, float64) { 857 sumEvicted, nEvicted := 0, 0 858 sumSafe, nSafe := 0, 0 859 fs.validate(t, c, func(i, gotO, gotC int) error { 860 if lo <= i && i < hi { 861 sumEvicted += gotO 862 nEvicted++ 863 } else { 864 sumSafe += gotO 865 nSafe++ 866 } 867 return nil 868 }) 869 fEvicted := float64(sumEvicted) / float64(nEvicted) 870 fSafe := float64(sumSafe) / float64(nSafe) 871 872 return fEvicted, fSafe, fEvicted / fSafe 873 } 874 875 // The magic 1.25 number isn't derived from formal modeling. It's just a guess. For 876 // (lo, hi, tableCacheTestCacheSize, tableCacheTestNumTables) = (10, 20, 100, 300), 877 // the ratio seems to converge on roughly 1.5 for large N, compared to 1.0 if we do 878 // not evict any cache entries. 879 if fEvicted, fSafe, ratio := check(fs1, c1); ratio < 1.25 { 880 t.Errorf( 881 "evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 882 fEvicted, fSafe, ratio, 883 ) 884 } 885 886 if fEvicted, fSafe, ratio := check(fs2, c2); ratio < 1.25 { 887 t.Errorf( 888 "evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 889 fEvicted, fSafe, ratio, 890 ) 891 } 892 } 893 894 func TestTableCacheIterLeak(t *testing.T) { 895 c, _, err := newTableCacheContainerTest(nil, "") 896 require.NoError(t, err) 897 898 m := &fileMetadata{FileNum: 0} 899 m.InitPhysicalBacking() 900 m.Ref() 901 defer m.Unref() 902 iter, _, err := c.newIters(context.Background(), m, nil, internalIterOpts{}) 903 require.NoError(t, err) 904 905 if err := c.close(); err == nil { 906 t.Fatalf("expected failure, but found success") 907 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 908 t.Fatalf("expected leaked iterators, but found %+v", err) 909 } else { 910 t.Log(err.Error()) 911 } 912 require.NoError(t, iter.Close()) 913 } 914 915 func TestSharedTableCacheIterLeak(t *testing.T) { 916 tc := newTableCacheTest(8<<20, 2*tableCacheTestCacheSize, 16) 917 c1, _, err := newTableCacheContainerTest(tc, "") 918 require.NoError(t, err) 919 c2, _, err := newTableCacheContainerTest(tc, "") 920 require.NoError(t, err) 921 c3, _, err := newTableCacheContainerTest(tc, "") 922 require.NoError(t, err) 923 tc.Unref() 924 925 m := &fileMetadata{FileNum: 0} 926 m.InitPhysicalBacking() 927 m.Ref() 928 defer m.Unref() 929 iter, _, err := c1.newIters(context.Background(), m, nil, internalIterOpts{}) 930 require.NoError(t, err) 931 932 if err := c1.close(); err == nil { 933 t.Fatalf("expected failure, but found success") 934 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 935 t.Fatalf("expected leaked iterators, but found %+v", err) 936 } else { 937 t.Log(err.Error()) 938 } 939 940 // Closing c2 shouldn't error out since c2 isn't leaking any iterators. 941 require.NoError(t, c2.close()) 942 943 // Closing c3 should error out since c3 holds the last reference to 944 // the TableCache, and when the TableCache closes, it will detect 945 // that there was a leaked iterator. 946 if err := c3.close(); err == nil { 947 t.Fatalf("expected failure, but found success") 948 } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { 949 t.Fatalf("expected leaked iterators, but found %+v", err) 950 } else { 951 t.Log(err.Error()) 952 } 953 954 require.NoError(t, iter.Close()) 955 } 956 957 func TestTableCacheRetryAfterFailure(t *testing.T) { 958 // Test a retry can succeed after a failure, i.e., errors are not cached. 959 c, fs, err := newTableCacheContainerTest(nil, "") 960 require.NoError(t, err) 961 962 fs.setOpenError(true /* enabled */) 963 m := &fileMetadata{FileNum: 0} 964 m.InitPhysicalBacking() 965 m.Ref() 966 defer m.Unref() 967 if _, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}); err == nil { 968 t.Fatalf("expected failure, but found success") 969 } 970 require.Equal(t, "pebble: backing file 000000 error: injected error", err.Error()) 971 fs.setOpenError(false /* enabled */) 972 var iter internalIterator 973 iter, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}) 974 require.NoError(t, err) 975 require.NoError(t, iter.Close()) 976 fs.validate(t, c, nil) 977 } 978 979 // memFile is a file-like struct that buffers all data written to it in memory. 980 // Implements the objstorage.Writable interface. 981 type memFile struct { 982 buf bytes.Buffer 983 } 984 985 var _ objstorage.Writable = (*memFile)(nil) 986 987 // Finish is part of the objstorage.Writable interface. 988 func (*memFile) Finish() error { 989 return nil 990 } 991 992 // Abort is part of the objstorage.Writable interface. 993 func (*memFile) Abort() {} 994 995 // Write is part of the objstorage.Writable interface. 996 func (f *memFile) Write(p []byte) error { 997 _, err := f.buf.Write(p) 998 return err 999 } 1000 1001 func TestTableCacheErrorBadMagicNumber(t *testing.T) { 1002 var file memFile 1003 tw := sstable.NewWriter(&file, sstable.WriterOptions{TableFormat: sstable.TableFormatPebblev2}) 1004 tw.Set([]byte("a"), nil) 1005 require.NoError(t, tw.Close()) 1006 buf := file.buf.Bytes() 1007 // Bad magic number. 1008 buf[len(buf)-1] = 0 1009 fs := &tableCacheTestFS{ 1010 FS: vfs.NewMem(), 1011 } 1012 const testFileNum = 3 1013 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(fs, "")) 1014 require.NoError(t, err) 1015 w, _, err := objProvider.Create(context.Background(), fileTypeTable, 1016 base.FileNum(testFileNum).DiskFileNum(), objstorage.CreateOptions{}) 1017 w.Write(buf) 1018 require.NoError(t, w.Finish()) 1019 opts := &Options{} 1020 opts.EnsureDefaults() 1021 opts.Cache = NewCache(8 << 20) // 8 MB 1022 defer opts.Cache.Unref() 1023 c := newTableCacheContainer(nil, opts.Cache.NewID(), objProvider, opts, tableCacheTestCacheSize, 1024 &sstable.CategoryStatsCollector{}) 1025 require.NoError(t, err) 1026 defer c.close() 1027 1028 m := &fileMetadata{FileNum: testFileNum} 1029 m.InitPhysicalBacking() 1030 m.Ref() 1031 defer m.Unref() 1032 if _, _, err = c.newIters(context.Background(), m, nil, internalIterOpts{}); err == nil { 1033 t.Fatalf("expected failure, but found success") 1034 } 1035 require.Equal(t, 1036 "pebble: backing file 000003 error: pebble/table: invalid table (bad magic number: 0xf09faab3f09faa00)", 1037 err.Error()) 1038 } 1039 1040 func TestTableCacheEvictClose(t *testing.T) { 1041 errs := make(chan error, 10) 1042 db, err := Open("test", 1043 &Options{ 1044 FS: vfs.NewMem(), 1045 EventListener: &EventListener{ 1046 TableDeleted: func(info TableDeleteInfo) { 1047 errs <- info.Err 1048 }, 1049 }, 1050 }) 1051 require.NoError(t, err) 1052 1053 start := []byte("a") 1054 end := []byte("z") 1055 require.NoError(t, db.Set(start, nil, nil)) 1056 require.NoError(t, db.Flush()) 1057 require.NoError(t, db.DeleteRange(start, end, nil)) 1058 require.NoError(t, db.Compact(start, end, false)) 1059 require.NoError(t, db.Close()) 1060 close(errs) 1061 1062 for err := range errs { 1063 require.NoError(t, err) 1064 } 1065 } 1066 1067 func TestTableCacheClockPro(t *testing.T) { 1068 // Test data was generated from the python code. See also 1069 // internal/cache/clockpro_test.go:TestCache. 1070 f, err := os.Open("internal/cache/testdata/cache") 1071 require.NoError(t, err) 1072 1073 mem := vfs.NewMem() 1074 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) 1075 require.NoError(t, err) 1076 defer objProvider.Close() 1077 1078 makeTable := func(dfn base.DiskFileNum) { 1079 require.NoError(t, err) 1080 f, _, err := objProvider.Create(context.Background(), fileTypeTable, dfn, objstorage.CreateOptions{}) 1081 require.NoError(t, err) 1082 w := sstable.NewWriter(f, sstable.WriterOptions{}) 1083 require.NoError(t, w.Set([]byte("a"), nil)) 1084 require.NoError(t, w.Close()) 1085 } 1086 1087 opts := &Options{ 1088 Cache: NewCache(8 << 20), // 8 MB 1089 } 1090 opts.EnsureDefaults() 1091 defer opts.Cache.Unref() 1092 1093 cache := &tableCacheShard{} 1094 // NB: The table cache size of 200 is required for the expected test values. 1095 cache.init(200) 1096 dbOpts := &tableCacheOpts{} 1097 dbOpts.loggerAndTracer = &base.LoggerWithNoopTracer{Logger: opts.Logger} 1098 dbOpts.cacheID = 0 1099 dbOpts.objProvider = objProvider 1100 dbOpts.opts = opts.MakeReaderOptions() 1101 1102 scanner := bufio.NewScanner(f) 1103 tables := make(map[int]bool) 1104 line := 1 1105 1106 for scanner.Scan() { 1107 fields := bytes.Fields(scanner.Bytes()) 1108 1109 key, err := strconv.Atoi(string(fields[0])) 1110 require.NoError(t, err) 1111 1112 // Ensure that underlying sstables exist on disk, creating each table the 1113 // first time it is seen. 1114 if !tables[key] { 1115 makeTable(base.FileNum(uint64(key)).DiskFileNum()) 1116 tables[key] = true 1117 } 1118 1119 oldHits := cache.hits.Load() 1120 m := &fileMetadata{FileNum: FileNum(key)} 1121 m.InitPhysicalBacking() 1122 m.Ref() 1123 v := cache.findNode(m, dbOpts) 1124 cache.unrefValue(v) 1125 1126 hit := cache.hits.Load() != oldHits 1127 wantHit := fields[1][0] == 'h' 1128 if hit != wantHit { 1129 t.Errorf("%d: cache hit mismatch: got %v, want %v\n", line, hit, wantHit) 1130 } 1131 line++ 1132 m.Unref() 1133 } 1134 } 1135 1136 func BenchmarkNewItersAlloc(b *testing.B) { 1137 opts := &Options{ 1138 FS: vfs.NewMem(), 1139 FormatMajorVersion: internalFormatNewest, 1140 } 1141 d, err := Open("", opts) 1142 require.NoError(b, err) 1143 defer func() { require.NoError(b, d.Close()) }() 1144 1145 require.NoError(b, d.Set([]byte{'a'}, []byte{'a'}, nil)) 1146 require.NoError(b, d.Flush()) 1147 require.NoError(b, d.Compact([]byte{'a'}, []byte{'z'}, false)) 1148 1149 d.mu.Lock() 1150 currVersion := d.mu.versions.currentVersion() 1151 it := currVersion.Levels[6].Iter() 1152 m := it.First() 1153 require.NotNil(b, m) 1154 d.mu.Unlock() 1155 1156 // Open once so that the Reader is cached. 1157 iter, _, err := d.newIters(context.Background(), m, nil, internalIterOpts{}) 1158 require.NoError(b, iter.Close()) 1159 require.NoError(b, err) 1160 1161 for i := 0; i < b.N; i++ { 1162 b.StartTimer() 1163 iter, _, err := d.newIters(context.Background(), m, nil, internalIterOpts{}) 1164 b.StopTimer() 1165 require.NoError(b, err) 1166 require.NoError(b, iter.Close()) 1167 } 1168 } 1169 1170 // TestTableCacheNoSuchFileError verifies that when the table cache hits a "no 1171 // such file" error, it generates a useful fatal message. 1172 func TestTableCacheNoSuchFileError(t *testing.T) { 1173 const dirname = "test" 1174 mem := vfs.NewMem() 1175 logger := &catchFatalLogger{} 1176 1177 d, err := Open(dirname, &Options{ 1178 FS: mem, 1179 Logger: logger, 1180 }) 1181 require.NoError(t, err) 1182 defer func() { _ = d.Close() }() 1183 require.NoError(t, d.Set([]byte("a"), []byte("val_a"), nil)) 1184 require.NoError(t, d.Set([]byte("b"), []byte("val_b"), nil)) 1185 require.NoError(t, d.Flush()) 1186 ls, err := mem.List(dirname) 1187 require.NoError(t, err) 1188 1189 // Find the sst file. 1190 var sst string 1191 for _, file := range ls { 1192 if strings.HasSuffix(file, ".sst") { 1193 if sst != "" { 1194 t.Fatalf("multiple SSTs found: %s, %s", sst, file) 1195 } 1196 sst = file 1197 } 1198 } 1199 if sst == "" { 1200 t.Fatalf("no SST found after flush") 1201 } 1202 require.NoError(t, mem.Remove(path.Join(dirname, sst))) 1203 1204 _, _, _ = d.Get([]byte("a")) 1205 require.NotZero(t, len(logger.fatalMsgs), "no fatal message emitted") 1206 require.Equal(t, 1, len(logger.fatalMsgs), "expected one fatal message; got: %v", logger.fatalMsgs) 1207 require.Contains(t, logger.fatalMsgs[0], "directory contains 6 files, 0 unknown, 0 tables, 2 logs, 1 manifests") 1208 } 1209 1210 func BenchmarkTableCacheHotPath(b *testing.B) { 1211 mem := vfs.NewMem() 1212 objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) 1213 require.NoError(b, err) 1214 defer objProvider.Close() 1215 1216 makeTable := func(dfn base.DiskFileNum) { 1217 require.NoError(b, err) 1218 f, _, err := objProvider.Create(context.Background(), fileTypeTable, dfn, objstorage.CreateOptions{}) 1219 require.NoError(b, err) 1220 w := sstable.NewWriter(f, sstable.WriterOptions{}) 1221 require.NoError(b, w.Set([]byte("a"), nil)) 1222 require.NoError(b, w.Close()) 1223 } 1224 1225 opts := &Options{ 1226 Cache: NewCache(8 << 20), // 8 MB 1227 } 1228 opts.EnsureDefaults() 1229 defer opts.Cache.Unref() 1230 1231 cache := &tableCacheShard{} 1232 cache.init(2) 1233 dbOpts := &tableCacheOpts{} 1234 dbOpts.loggerAndTracer = &base.LoggerWithNoopTracer{Logger: opts.Logger} 1235 dbOpts.cacheID = 0 1236 dbOpts.objProvider = objProvider 1237 dbOpts.opts = opts.MakeReaderOptions() 1238 1239 makeTable(1) 1240 1241 m := &fileMetadata{FileNum: 1} 1242 m.InitPhysicalBacking() 1243 m.Ref() 1244 1245 b.ResetTimer() 1246 for i := 0; i < b.N; i++ { 1247 v := cache.findNode(m, dbOpts) 1248 cache.unrefValue(v) 1249 } 1250 } 1251 1252 type catchFatalLogger struct { 1253 fatalMsgs []string 1254 } 1255 1256 var _ Logger = (*catchFatalLogger)(nil) 1257 1258 func (tl *catchFatalLogger) Infof(format string, args ...interface{}) {} 1259 func (tl *catchFatalLogger) Errorf(format string, args ...interface{}) {} 1260 1261 func (tl *catchFatalLogger) Fatalf(format string, args ...interface{}) { 1262 tl.fatalMsgs = append(tl.fatalMsgs, fmt.Sprintf(format, args...)) 1263 }