github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/open_test.go (about) 1 // Copyright 2012 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bitalostable 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "os" 13 "path/filepath" 14 "reflect" 15 "runtime/debug" 16 "sort" 17 "strconv" 18 "strings" 19 "sync/atomic" 20 "syscall" 21 "testing" 22 23 "github.com/cockroachdb/errors/oserror" 24 "github.com/kr/pretty" 25 "github.com/stretchr/testify/require" 26 "github.com/zuoyebang/bitalostable/internal/base" 27 "github.com/zuoyebang/bitalostable/internal/cache" 28 "github.com/zuoyebang/bitalostable/internal/errorfs" 29 "github.com/zuoyebang/bitalostable/vfs" 30 "github.com/zuoyebang/bitalostable/vfs/atomicfs" 31 ) 32 33 func TestOpenSharedTableCache(t *testing.T) { 34 c := cache.New(cacheDefaultSize) 35 tc := NewTableCache(c, 16, 100) 36 defer tc.Unref() 37 defer c.Unref() 38 39 d0, err := Open("", testingRandomized(&Options{ 40 FS: vfs.NewMem(), 41 Cache: c, 42 TableCache: tc, 43 })) 44 if err != nil { 45 t.Errorf("d0 Open: %s", err.Error()) 46 } 47 defer d0.Close() 48 49 d1, err := Open("", testingRandomized(&Options{ 50 FS: vfs.NewMem(), 51 Cache: c, 52 TableCache: tc, 53 })) 54 if err != nil { 55 t.Errorf("d1 Open: %s", err.Error()) 56 } 57 defer d1.Close() 58 59 // Make sure that the Open function is using the passed in table cache 60 // when the TableCache option is set. 61 require.Equalf( 62 t, d0.tableCache.tableCache, d1.tableCache.tableCache, 63 "expected tableCache for both d0 and d1 to be the same", 64 ) 65 } 66 67 func TestErrorIfExists(t *testing.T) { 68 for _, b := range [...]bool{false, true} { 69 t.Run(fmt.Sprintf("%t", b), func(t *testing.T) { 70 mem := vfs.NewMem() 71 d0, err := Open("", testingRandomized(&Options{ 72 FS: mem, 73 })) 74 if err != nil { 75 t.Errorf("b=%v: d0 Open: %v", b, err) 76 return 77 } 78 if err := d0.Close(); err != nil { 79 t.Errorf("b=%v: d0 Close: %v", b, err) 80 return 81 } 82 83 opts := testingRandomized(&Options{ 84 FS: mem, 85 ErrorIfExists: b, 86 }) 87 defer ensureFilesClosed(t, opts)() 88 d1, err := Open("", opts) 89 if d1 != nil { 90 defer d1.Close() 91 } 92 if got := err != nil; got != b { 93 t.Errorf("b=%v: d1 Open: err is %v, got (err != nil) is %v, want %v", b, err, got, b) 94 return 95 } 96 }) 97 } 98 } 99 100 func TestErrorIfNotExists(t *testing.T) { 101 t.Run("does-not-exist", func(t *testing.T) { 102 opts := testingRandomized(&Options{ 103 FS: vfs.NewMem(), 104 ErrorIfNotExists: true, 105 }) 106 defer ensureFilesClosed(t, opts)() 107 108 _, err := Open("", opts) 109 if err == nil { 110 t.Fatalf("expected error, but found success") 111 } else if !strings.HasSuffix(err.Error(), oserror.ErrNotExist.Error()) { 112 t.Fatalf("expected not exists, but found %q", err) 113 } 114 }) 115 116 t.Run("does-exist", func(t *testing.T) { 117 opts := testingRandomized(&Options{ 118 FS: vfs.NewMem(), 119 ErrorIfNotExists: false, 120 }) 121 defer ensureFilesClosed(t, opts)() 122 123 // Create the DB and try again. 124 d, err := Open("", opts) 125 require.NoError(t, err) 126 require.NoError(t, d.Close()) 127 128 opts.ErrorIfNotExists = true 129 // The DB exists, so the setting of ErrorIfNotExists is a no-op. 130 d, err = Open("", opts) 131 require.NoError(t, err) 132 require.NoError(t, d.Close()) 133 }) 134 } 135 136 func TestNewDBFilenames(t *testing.T) { 137 versions := map[FormatMajorVersion][]string{ 138 FormatMostCompatible: { 139 "000002.log", 140 "CURRENT", 141 "LOCK", 142 "MANIFEST-000001", 143 "OPTIONS-000003", 144 }, 145 FormatNewest: { 146 "000002.log", 147 "CURRENT", 148 "LOCK", 149 "MANIFEST-000001", 150 "OPTIONS-000003", 151 "marker.format-version.000010.011", 152 "marker.manifest.000001.MANIFEST-000001", 153 }, 154 } 155 156 for formatVers, want := range versions { 157 t.Run(fmt.Sprintf("vers=%s", formatVers), func(t *testing.T) { 158 mem := vfs.NewMem() 159 fooBar := mem.PathJoin("foo", "bar") 160 d, err := Open(fooBar, &Options{ 161 FS: mem, 162 FormatMajorVersion: formatVers, 163 }) 164 if err != nil { 165 t.Fatalf("Open: %v", err) 166 } 167 if err := d.Close(); err != nil { 168 t.Fatalf("Close: %v", err) 169 } 170 got, err := mem.List(fooBar) 171 if err != nil { 172 t.Fatalf("List: %v", err) 173 } 174 sort.Strings(got) 175 if !reflect.DeepEqual(got, want) { 176 t.Errorf("\ngot %v\nwant %v", got, want) 177 } 178 }) 179 } 180 } 181 182 func testOpenCloseOpenClose(t *testing.T, fs vfs.FS, root string) { 183 opts := testingRandomized(&Options{FS: fs}) 184 185 for _, startFromEmpty := range []bool{false, true} { 186 for _, walDirname := range []string{"", "wal"} { 187 for _, length := range []int{-1, 0, 1, 1000, 10000, 100000} { 188 dirname := "sharedDatabase" + walDirname 189 if startFromEmpty { 190 dirname = "startFromEmpty" + walDirname + strconv.Itoa(length) 191 } 192 dirname = fs.PathJoin(root, dirname) 193 if walDirname == "" { 194 opts.WALDir = "" 195 } else { 196 opts.WALDir = fs.PathJoin(dirname, walDirname) 197 } 198 199 got, xxx := []byte(nil), "" 200 if length >= 0 { 201 xxx = strings.Repeat("x", length) 202 } 203 204 d0, err := Open(dirname, opts) 205 if err != nil { 206 t.Fatalf("sfe=%t, length=%d: Open #0: %v", 207 startFromEmpty, length, err) 208 continue 209 } 210 if length >= 0 { 211 err = d0.Set([]byte("key"), []byte(xxx), nil) 212 if err != nil { 213 t.Errorf("sfe=%t, length=%d: Set: %v", 214 startFromEmpty, length, err) 215 continue 216 } 217 } 218 err = d0.Close() 219 if err != nil { 220 t.Errorf("sfe=%t, length=%d: Close #0: %v", 221 startFromEmpty, length, err) 222 continue 223 } 224 225 d1, err := Open(dirname, opts) 226 if err != nil { 227 t.Errorf("sfe=%t, length=%d: Open #1: %v", 228 startFromEmpty, length, err) 229 continue 230 } 231 if length >= 0 { 232 var closer io.Closer 233 got, closer, err = d1.Get([]byte("key")) 234 if err != nil { 235 t.Errorf("sfe=%t, length=%d: Get: %v", 236 startFromEmpty, length, err) 237 continue 238 } 239 got = append([]byte(nil), got...) 240 closer.Close() 241 } 242 err = d1.Close() 243 if err != nil { 244 t.Errorf("sfe=%t, length=%d: Close #1: %v", 245 startFromEmpty, length, err) 246 continue 247 } 248 249 if length >= 0 && string(got) != xxx { 250 t.Errorf("sfe=%t, length=%d: got value differs from set value", 251 startFromEmpty, length) 252 continue 253 } 254 255 { 256 got, err := opts.FS.List(dirname) 257 if err != nil { 258 t.Fatalf("List: %v", err) 259 } 260 var optionsCount int 261 for _, s := range got { 262 if t, _, ok := base.ParseFilename(opts.FS, s); ok && t == fileTypeOptions { 263 optionsCount++ 264 } 265 } 266 if optionsCount != 1 { 267 t.Fatalf("expected 1 OPTIONS file, but found %d", optionsCount) 268 } 269 } 270 } 271 } 272 } 273 } 274 275 func TestOpenCloseOpenClose(t *testing.T) { 276 for _, fstype := range []string{"disk", "mem"} { 277 t.Run(fstype, func(t *testing.T) { 278 var fs vfs.FS 279 var dir string 280 switch fstype { 281 case "disk": 282 var err error 283 dir, err = ioutil.TempDir("", "open-close") 284 require.NoError(t, err) 285 defer func() { 286 _ = os.RemoveAll(dir) 287 }() 288 fs = vfs.Default 289 case "mem": 290 dir = "" 291 fs = vfs.NewMem() 292 } 293 testOpenCloseOpenClose(t, fs, dir) 294 }) 295 } 296 } 297 298 func TestOpenOptionsCheck(t *testing.T) { 299 mem := vfs.NewMem() 300 opts := &Options{FS: mem} 301 302 d, err := Open("", opts) 303 require.NoError(t, err) 304 require.NoError(t, d.Close()) 305 306 opts = &Options{ 307 Comparer: &Comparer{Name: "foo"}, 308 FS: mem, 309 } 310 _, err = Open("", opts) 311 require.Regexp(t, `comparer name from file.*!=.*`, err) 312 313 opts = &Options{ 314 Merger: &Merger{Name: "bar"}, 315 FS: mem, 316 } 317 _, err = Open("", opts) 318 require.Regexp(t, `merger name from file.*!=.*`, err) 319 } 320 321 func TestOpenCrashWritingOptions(t *testing.T) { 322 memFS := vfs.NewMem() 323 324 d, err := Open("", &Options{FS: memFS}) 325 require.NoError(t, err) 326 require.NoError(t, d.Close()) 327 328 // Open the database again, this time with a mocked filesystem that 329 // will only succeed in partially writing the OPTIONS file. 330 fs := optionsTornWriteFS{FS: memFS} 331 _, err = Open("", &Options{FS: fs}) 332 require.Error(t, err) 333 334 // Re-opening the database must succeed. 335 d, err = Open("", &Options{FS: memFS}) 336 require.NoError(t, err) 337 require.NoError(t, d.Close()) 338 } 339 340 type optionsTornWriteFS struct { 341 vfs.FS 342 } 343 344 func (fs optionsTornWriteFS) Create(name string) (vfs.File, error) { 345 file, err := fs.FS.Create(name) 346 if file != nil { 347 file = optionsTornWriteFile{File: file} 348 } 349 return file, err 350 } 351 352 type optionsTornWriteFile struct { 353 vfs.File 354 } 355 356 func (f optionsTornWriteFile) Write(b []byte) (int, error) { 357 // Look for the OPTIONS-XXXXXX file's `comparer=` field. 358 comparerKey := []byte("comparer=") 359 i := bytes.Index(b, comparerKey) 360 if i == -1 { 361 return f.File.Write(b) 362 } 363 // Write only the contents through `comparer=` and return an error. 364 n, _ := f.File.Write(b[:i+len(comparerKey)]) 365 return n, syscall.EIO 366 } 367 368 func TestOpenReadOnly(t *testing.T) { 369 mem := vfs.NewMem() 370 371 { 372 // Opening a non-existent DB in read-only mode should result in no mutable 373 // filesystem operations. 374 var buf syncedBuffer 375 _, err := Open("non-existent", testingRandomized(&Options{ 376 FS: loggingFS{mem, &buf}, 377 ReadOnly: true, 378 WALDir: "non-existent-waldir", 379 })) 380 if err == nil { 381 t.Fatalf("expected error, but found success") 382 } 383 const expected = `open-dir: non-existent` 384 if trimmed := strings.TrimSpace(buf.String()); expected != trimmed { 385 t.Fatalf("expected %q, but found %q", expected, trimmed) 386 } 387 } 388 389 { 390 // Opening a DB with a non-existent WAL dir in read-only mode should result 391 // in no mutable filesystem operations other than the LOCK. 392 var buf syncedBuffer 393 _, err := Open("", testingRandomized(&Options{ 394 FS: loggingFS{mem, &buf}, 395 ReadOnly: true, 396 WALDir: "non-existent-waldir", 397 })) 398 if err == nil { 399 t.Fatalf("expected error, but found success") 400 } 401 const expected = "open-dir: \nopen-dir: non-existent-waldir\nclose:" 402 if trimmed := strings.TrimSpace(buf.String()); expected != trimmed { 403 t.Fatalf("expected %q, but found %q", expected, trimmed) 404 } 405 } 406 407 var contents []string 408 { 409 // Create a new DB and populate it with a small amount of data. 410 d, err := Open("", testingRandomized(&Options{ 411 FS: mem, 412 })) 413 require.NoError(t, err) 414 require.NoError(t, d.Set([]byte("test"), nil, nil)) 415 require.NoError(t, d.Close()) 416 contents, err = mem.List("") 417 require.NoError(t, err) 418 sort.Strings(contents) 419 } 420 421 { 422 // Re-open the DB read-only. The directory contents should be unchanged. 423 d, err := Open("", testingRandomized(&Options{ 424 FS: mem, 425 ReadOnly: true, 426 })) 427 require.NoError(t, err) 428 429 // Verify various write operations fail in read-only mode. 430 require.EqualValues(t, ErrReadOnly, d.Compact(nil, []byte("\xff"), false)) 431 require.EqualValues(t, ErrReadOnly, d.Flush()) 432 require.EqualValues(t, ErrReadOnly, func() error { _, err := d.AsyncFlush(); return err }()) 433 434 require.EqualValues(t, ErrReadOnly, d.Delete(nil, nil)) 435 require.EqualValues(t, ErrReadOnly, d.DeleteRange(nil, nil, nil)) 436 require.EqualValues(t, ErrReadOnly, d.Ingest(nil)) 437 require.EqualValues(t, ErrReadOnly, d.LogData(nil, nil)) 438 require.EqualValues(t, ErrReadOnly, d.Merge(nil, nil, nil)) 439 require.EqualValues(t, ErrReadOnly, d.Set(nil, nil, nil)) 440 441 // Verify we can still read in read-only mode. 442 require.NoError(t, func() error { 443 _, closer, err := d.Get([]byte("test")) 444 if closer != nil { 445 closer.Close() 446 } 447 return err 448 }()) 449 450 checkIter := func(iter *Iterator) { 451 t.Helper() 452 453 var keys []string 454 for valid := iter.First(); valid; valid = iter.Next() { 455 keys = append(keys, string(iter.Key())) 456 } 457 require.NoError(t, iter.Close()) 458 expectedKeys := []string{"test"} 459 if diff := pretty.Diff(keys, expectedKeys); diff != nil { 460 t.Fatalf("%s\n%s", strings.Join(diff, "\n"), keys) 461 } 462 } 463 464 checkIter(d.NewIter(nil)) 465 466 b := d.NewIndexedBatch() 467 checkIter(b.NewIter(nil)) 468 require.EqualValues(t, ErrReadOnly, b.Commit(nil)) 469 require.EqualValues(t, ErrReadOnly, d.Apply(b, nil)) 470 471 s := d.NewSnapshot() 472 checkIter(s.NewIter(nil)) 473 require.NoError(t, s.Close()) 474 475 require.NoError(t, d.Close()) 476 477 newContents, err := mem.List("") 478 require.NoError(t, err) 479 480 sort.Strings(newContents) 481 if diff := pretty.Diff(contents, newContents); diff != nil { 482 t.Fatalf("%s", strings.Join(diff, "\n")) 483 } 484 } 485 } 486 487 func TestOpenWALReplay(t *testing.T) { 488 largeValue := []byte(strings.Repeat("a", 100<<10)) 489 hugeValue := []byte(strings.Repeat("b", 10<<20)) 490 checkIter := func(iter *Iterator) { 491 t.Helper() 492 493 var keys []string 494 for valid := iter.First(); valid; valid = iter.Next() { 495 keys = append(keys, string(iter.Key())) 496 } 497 require.NoError(t, iter.Close()) 498 expectedKeys := []string{"1", "2", "3", "4", "5"} 499 if diff := pretty.Diff(keys, expectedKeys); diff != nil { 500 t.Fatalf("%s\n%s", strings.Join(diff, "\n"), keys) 501 } 502 } 503 504 for _, readOnly := range []bool{false, true} { 505 t.Run(fmt.Sprintf("read-only=%t", readOnly), func(t *testing.T) { 506 // Create a new DB and populate it with some data. 507 const dir = "" 508 mem := vfs.NewMem() 509 d, err := Open(dir, testingRandomized(&Options{ 510 FS: mem, 511 MemTableSize: 32 << 20, 512 })) 513 require.NoError(t, err) 514 // All these values will fit in a single memtable, so on closing the db there 515 // will be no sst and all the data is in a single WAL. 516 require.NoError(t, d.Set([]byte("1"), largeValue, nil)) 517 require.NoError(t, d.Set([]byte("2"), largeValue, nil)) 518 require.NoError(t, d.Set([]byte("3"), largeValue, nil)) 519 require.NoError(t, d.Set([]byte("4"), hugeValue, nil)) 520 require.NoError(t, d.Set([]byte("5"), largeValue, nil)) 521 checkIter(d.NewIter(nil)) 522 require.NoError(t, d.Close()) 523 files, err := mem.List(dir) 524 require.NoError(t, err) 525 sort.Strings(files) 526 logCount, sstCount := 0, 0 527 for _, fname := range files { 528 if strings.HasSuffix(fname, ".sst") { 529 sstCount++ 530 } 531 if strings.HasSuffix(fname, ".log") { 532 logCount++ 533 } 534 } 535 require.Equal(t, 0, sstCount) 536 // The memtable size starts at 256KB and doubles up to 32MB so we expect 5 537 // logs (one for each doubling). 538 require.Equal(t, 7, logCount) 539 540 // Re-open the DB with a smaller memtable. Values for 1, 2 will fit in the first memtable; 541 // value for 3 will go in the next memtable; value for 4 will be in a flushable batch 542 // which will cause the previous memtable to be flushed; value for 5 will go in the next 543 // memtable 544 d, err = Open(dir, testingRandomized(&Options{ 545 FS: mem, 546 MemTableSize: 300 << 10, 547 ReadOnly: readOnly, 548 })) 549 require.NoError(t, err) 550 551 if readOnly { 552 m := d.Metrics() 553 require.Equal(t, int64(logCount), m.WAL.Files) 554 d.mu.Lock() 555 require.NotNil(t, d.mu.mem.mutable) 556 d.mu.Unlock() 557 } 558 checkIter(d.NewIter(nil)) 559 require.NoError(t, d.Close()) 560 }) 561 } 562 } 563 564 // Similar to TestOpenWALReplay, except we test replay behavior after a 565 // memtable has been flushed. We test all 3 reasons for flushing: forced, size, 566 // and large-batch. 567 func TestOpenWALReplay2(t *testing.T) { 568 for _, readOnly := range []bool{false, true} { 569 t.Run(fmt.Sprintf("read-only=%t", readOnly), func(t *testing.T) { 570 for _, reason := range []string{"forced", "size", "large-batch"} { 571 t.Run(reason, func(t *testing.T) { 572 mem := vfs.NewMem() 573 d, err := Open("", testingRandomized(&Options{ 574 FS: mem, 575 MemTableSize: 256 << 10, 576 })) 577 require.NoError(t, err) 578 579 switch reason { 580 case "forced": 581 require.NoError(t, d.Set([]byte("1"), nil, nil)) 582 require.NoError(t, d.Flush()) 583 require.NoError(t, d.Set([]byte("2"), nil, nil)) 584 case "size": 585 largeValue := []byte(strings.Repeat("a", 100<<10)) 586 require.NoError(t, d.Set([]byte("1"), largeValue, nil)) 587 require.NoError(t, d.Set([]byte("2"), largeValue, nil)) 588 require.NoError(t, d.Set([]byte("3"), largeValue, nil)) 589 case "large-batch": 590 largeValue := []byte(strings.Repeat("a", d.largeBatchThreshold)) 591 require.NoError(t, d.Set([]byte("1"), nil, nil)) 592 require.NoError(t, d.Set([]byte("2"), largeValue, nil)) 593 require.NoError(t, d.Set([]byte("3"), nil, nil)) 594 } 595 require.NoError(t, d.Close()) 596 597 files, err := mem.List("") 598 require.NoError(t, err) 599 sort.Strings(files) 600 sstCount := 0 601 for _, fname := range files { 602 if strings.HasSuffix(fname, ".sst") { 603 sstCount++ 604 } 605 } 606 require.Equal(t, 1, sstCount) 607 608 // Re-open the DB with a smaller memtable. Values for 1, 2 will fit in the first memtable; 609 // value for 3 will go in the next memtable; value for 4 will be in a flushable batch 610 // which will cause the previous memtable to be flushed; value for 5 will go in the next 611 // memtable 612 d, err = Open("", testingRandomized(&Options{ 613 FS: mem, 614 MemTableSize: 300 << 10, 615 ReadOnly: readOnly, 616 })) 617 require.NoError(t, err) 618 require.NoError(t, d.Close()) 619 }) 620 } 621 }) 622 } 623 } 624 625 // TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two 626 // WALs is corrupted with an sstable checksum error. Replay must stop at the 627 // first WAL because otherwise we may violate point-in-time recovery 628 // semantics. See #864. 629 func TestTwoWALReplayCorrupt(t *testing.T) { 630 // Use the real filesystem so that we can seek and overwrite WAL data 631 // easily. 632 dir, err := ioutil.TempDir("", "wal-replay") 633 require.NoError(t, err) 634 defer os.RemoveAll(dir) 635 636 d, err := Open(dir, testingRandomized(&Options{ 637 MemTableStopWritesThreshold: 4, 638 MemTableSize: 2048, 639 })) 640 require.NoError(t, err) 641 d.mu.Lock() 642 d.mu.compact.flushing = true 643 d.mu.Unlock() 644 require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil)) 645 require.NoError(t, d.Set([]byte("2"), nil, nil)) 646 d.mu.Lock() 647 d.mu.compact.flushing = false 648 d.mu.Unlock() 649 require.NoError(t, d.Close()) 650 651 // We should have two WALs. 652 var logs []string 653 ls, err := vfs.Default.List(dir) 654 require.NoError(t, err) 655 for _, name := range ls { 656 if filepath.Ext(name) == ".log" { 657 logs = append(logs, name) 658 } 659 } 660 sort.Strings(logs) 661 if len(logs) < 2 { 662 t.Fatalf("expected at least two log files, found %d", len(logs)) 663 } 664 665 // Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end 666 // of the file. 667 f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm) 668 require.NoError(t, err) 669 off, err := f.Seek(-100, 2) 670 require.NoError(t, err) 671 _, err = f.Write([]byte{0, 0, 0, 0}) 672 require.NoError(t, err) 673 require.NoError(t, f.Close()) 674 t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off) 675 676 // Re-opening the database should detect and report the corruption. 677 _, err = Open(dir, nil) 678 require.Error(t, err, "bitalostable: corruption") 679 } 680 681 // TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two 682 // WALs is corrupted with an sstable checksum error and the OPTIONS file does 683 // not enable the private strict_wal_tail option, indicating that the WAL was 684 // produced by a database that did not guarantee clean WAL tails. See #864. 685 func TestTwoWALReplayPermissive(t *testing.T) { 686 // Use the real filesystem so that we can seek and overwrite WAL data 687 // easily. 688 dir, err := ioutil.TempDir("", "wal-replay") 689 require.NoError(t, err) 690 defer os.RemoveAll(dir) 691 692 opts := &Options{ 693 MemTableStopWritesThreshold: 4, 694 MemTableSize: 2048, 695 } 696 opts.testingRandomized() 697 opts.EnsureDefaults() 698 d, err := Open(dir, opts) 699 require.NoError(t, err) 700 d.mu.Lock() 701 d.mu.compact.flushing = true 702 d.mu.Unlock() 703 require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil)) 704 require.NoError(t, d.Set([]byte("2"), nil, nil)) 705 d.mu.Lock() 706 d.mu.compact.flushing = false 707 d.mu.Unlock() 708 require.NoError(t, d.Close()) 709 710 // We should have two WALs. 711 var logs []string 712 var optionFilename string 713 ls, err := vfs.Default.List(dir) 714 require.NoError(t, err) 715 for _, name := range ls { 716 if filepath.Ext(name) == ".log" { 717 logs = append(logs, name) 718 } 719 if strings.HasPrefix(filepath.Base(name), "OPTIONS") { 720 optionFilename = name 721 } 722 } 723 sort.Strings(logs) 724 if len(logs) < 2 { 725 t.Fatalf("expected at least two log files, found %d", len(logs)) 726 } 727 728 // Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end 729 // of the file. 730 f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm) 731 require.NoError(t, err) 732 off, err := f.Seek(-100, 2) 733 require.NoError(t, err) 734 _, err = f.Write([]byte{0, 0, 0, 0}) 735 require.NoError(t, err) 736 require.NoError(t, f.Close()) 737 t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off) 738 739 // Remove the OPTIONS file containing the strict_wal_tail option. 740 require.NoError(t, vfs.Default.Remove(filepath.Join(dir, optionFilename))) 741 742 // Re-opening the database should not report the corruption. 743 d, err = Open(dir, nil) 744 require.NoError(t, err) 745 require.NoError(t, d.Close()) 746 } 747 748 // TestCrashOpenCrashAfterWALCreation tests a database that exits 749 // ungracefully, begins recovery, creates the new WAL but promptly exits 750 // ungracefully again. 751 // 752 // This sequence has the potential to be problematic with the strict_wal_tail 753 // behavior because the first crash's WAL has an unclean tail. By the time the 754 // new WAL is created, the current manifest's MinUnflushedLogNum must be 755 // higher than the previous WAL. 756 func TestCrashOpenCrashAfterWALCreation(t *testing.T) { 757 fs := vfs.NewStrictMem() 758 759 getLogs := func() (logs []string) { 760 ls, err := fs.List("") 761 require.NoError(t, err) 762 for _, name := range ls { 763 if filepath.Ext(name) == ".log" { 764 logs = append(logs, name) 765 } 766 } 767 return logs 768 } 769 770 { 771 d, err := Open("", testingRandomized(&Options{FS: fs})) 772 require.NoError(t, err) 773 require.NoError(t, d.Set([]byte("abc"), nil, Sync)) 774 775 // Ignore syncs during close to simulate a crash. This will leave the WAL 776 // without an EOF trailer. It won't be an 'unclean tail' yet since the 777 // log file was not recycled, but we'll fix that down below. 778 fs.SetIgnoreSyncs(true) 779 require.NoError(t, d.Close()) 780 fs.ResetToSyncedState() 781 fs.SetIgnoreSyncs(false) 782 } 783 784 // There should be one WAL. 785 logs := getLogs() 786 if len(logs) != 1 { 787 t.Fatalf("expected one log file, found %d", len(logs)) 788 } 789 790 // The one WAL file doesn't have an EOF trailer, but since it wasn't 791 // recycled it won't have garbage at the end. Rewrite it so that it has 792 // the same contents it currently has, followed by garbage. 793 { 794 f, err := fs.Open(logs[0]) 795 require.NoError(t, err) 796 b, err := ioutil.ReadAll(f) 797 require.NoError(t, err) 798 require.NoError(t, f.Close()) 799 f, err = fs.Create(logs[0]) 800 require.NoError(t, err) 801 _, err = f.Write(b) 802 require.NoError(t, err) 803 _, err = f.Write([]byte{0xde, 0xad, 0xbe, 0xef}) 804 require.NoError(t, err) 805 require.NoError(t, f.Sync()) 806 require.NoError(t, f.Close()) 807 dir, err := fs.OpenDir("") 808 require.NoError(t, err) 809 require.NoError(t, dir.Sync()) 810 require.NoError(t, dir.Close()) 811 } 812 813 // Open the database again (with syncs respected again). Wrap the 814 // filesystem with an errorfs that will turn off syncs after a new .log 815 // file is created and after a subsequent directory sync occurs. This 816 // simulates a crash after the new log file is created and synced. 817 { 818 var atomicWALCreated, atomicDirSynced uint32 819 d, err := Open("", &Options{ 820 FS: errorfs.Wrap(fs, errorfs.InjectorFunc(func(op errorfs.Op, path string) error { 821 if atomic.LoadUint32(&atomicDirSynced) == 1 { 822 fs.SetIgnoreSyncs(true) 823 } 824 if op == errorfs.OpCreate && filepath.Ext(path) == ".log" { 825 atomic.StoreUint32(&atomicWALCreated, 1) 826 } 827 // Record when there's a sync of the data directory after the 828 // WAL was created. The data directory will have an empty 829 // path because that's what we passed into Open. 830 if op == errorfs.OpFileSync && path == "" && atomic.LoadUint32(&atomicWALCreated) == 1 { 831 atomic.StoreUint32(&atomicDirSynced, 1) 832 } 833 return nil 834 })), 835 }) 836 require.NoError(t, err) 837 require.NoError(t, d.Close()) 838 } 839 840 fs.ResetToSyncedState() 841 fs.SetIgnoreSyncs(false) 842 843 if n := len(getLogs()); n != 2 { 844 t.Fatalf("expected two logs, found %d\n", n) 845 } 846 847 // Finally, open the database with syncs enabled. 848 d, err := Open("", testingRandomized(&Options{FS: fs})) 849 require.NoError(t, err) 850 require.NoError(t, d.Close()) 851 } 852 853 // TestOpenWALReplayReadOnlySeqNums tests opening a database: 854 // - in read-only mode 855 // - with multiple unflushed log files that must replayed 856 // - a MANIFEST that sets the last sequence number to a number greater than 857 // the unflushed log files 858 // 859 // See cockroachdb/cockroach#48660. 860 func TestOpenWALReplayReadOnlySeqNums(t *testing.T) { 861 const root = "" 862 mem := vfs.NewMem() 863 864 copyFiles := func(srcDir, dstDir string) { 865 files, err := mem.List(srcDir) 866 require.NoError(t, err) 867 for _, f := range files { 868 require.NoError(t, vfs.Copy(mem, mem.PathJoin(srcDir, f), mem.PathJoin(dstDir, f))) 869 } 870 } 871 872 // Create a new database under `/original` with a couple sstables. 873 dir := mem.PathJoin(root, "original") 874 d, err := Open(dir, testingRandomized(&Options{FS: mem})) 875 require.NoError(t, err) 876 require.NoError(t, d.Set([]byte("a"), nil, nil)) 877 require.NoError(t, d.Flush()) 878 require.NoError(t, d.Set([]byte("a"), nil, nil)) 879 require.NoError(t, d.Flush()) 880 881 // Prevent flushes so that multiple unflushed log files build up. 882 d.mu.Lock() 883 d.mu.compact.flushing = true 884 d.mu.Unlock() 885 886 require.NoError(t, d.Set([]byte("b"), nil, nil)) 887 d.AsyncFlush() 888 require.NoError(t, d.Set([]byte("c"), nil, nil)) 889 d.AsyncFlush() 890 require.NoError(t, d.Set([]byte("e"), nil, nil)) 891 892 // Manually compact some of the key space so that the latest `logSeqNum` is 893 // written to the MANIFEST. This produces a MANIFEST where the `logSeqNum` 894 // is greater than the sequence numbers contained in the 895 // `minUnflushedLogNum` log file 896 require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) 897 d.mu.Lock() 898 for d.mu.compact.compactingCount > 0 { 899 d.mu.compact.cond.Wait() 900 } 901 d.mu.Unlock() 902 903 // While the MANIFEST is still in this state, copy all the files in the 904 // database to a new directory. 905 replayDir := mem.PathJoin(root, "replay") 906 require.NoError(t, mem.MkdirAll(replayDir, os.ModePerm)) 907 copyFiles(dir, replayDir) 908 909 d.mu.Lock() 910 d.mu.compact.flushing = false 911 d.mu.Unlock() 912 require.NoError(t, d.Close()) 913 914 // Open the copy of the database in read-only mode. Since we copied all 915 // the files before the flushes were allowed to complete, there should be 916 // multiple unflushed log files that need to replay. Since the manual 917 // compaction completed, the `logSeqNum` read from the manifest should be 918 // greater than the unflushed log files' sequence numbers. 919 d, err = Open(replayDir, testingRandomized(&Options{ 920 FS: mem, 921 ReadOnly: true, 922 })) 923 require.NoError(t, err) 924 require.NoError(t, d.Close()) 925 } 926 927 func TestOpenWALReplayMemtableGrowth(t *testing.T) { 928 mem := vfs.NewMem() 929 const memTableSize = 64 * 1024 * 1024 930 opts := &Options{ 931 MemTableSize: memTableSize, 932 FS: mem, 933 } 934 opts.testingRandomized() 935 func() { 936 db, err := Open("", opts) 937 require.NoError(t, err) 938 defer db.Close() 939 b := db.NewBatch() 940 defer b.Close() 941 key := make([]byte, 8) 942 val := make([]byte, 16*1024*1024) 943 b.Set(key, val, nil) 944 require.NoError(t, db.Apply(b, Sync)) 945 }() 946 db, err := Open("", opts) 947 require.NoError(t, err) 948 db.Close() 949 } 950 951 func TestGetVersion(t *testing.T) { 952 mem := vfs.NewMem() 953 opts := &Options{ 954 FS: mem, 955 } 956 opts.testingRandomized() 957 958 // Case 1: No options file. 959 version, err := GetVersion("", mem) 960 require.NoError(t, err) 961 require.Empty(t, version) 962 963 // Case 2: Pebble created file. 964 db, err := Open("", opts) 965 require.NoError(t, err) 966 require.NoError(t, db.Close()) 967 version, err = GetVersion("", mem) 968 require.NoError(t, err) 969 require.Equal(t, "0.1", version) 970 971 // Case 3: Manually created OPTIONS file with a higher number. 972 highestOptionsNum := FileNum(0) 973 ls, err := mem.List("") 974 require.NoError(t, err) 975 for _, filename := range ls { 976 ft, fn, ok := base.ParseFilename(mem, filename) 977 if !ok { 978 continue 979 } 980 switch ft { 981 case fileTypeOptions: 982 if fn > highestOptionsNum { 983 highestOptionsNum = fn 984 } 985 } 986 } 987 f, _ := mem.Create(fmt.Sprintf("OPTIONS-%d", highestOptionsNum+1)) 988 _, err = f.Write([]byte("[Version]\n bitalostable_version=0.2\n")) 989 require.NoError(t, err) 990 err = f.Close() 991 require.NoError(t, err) 992 version, err = GetVersion("", mem) 993 require.NoError(t, err) 994 require.Equal(t, "0.2", version) 995 996 // Case 4: Manually created OPTIONS file with a RocksDB number. 997 f, _ = mem.Create(fmt.Sprintf("OPTIONS-%d", highestOptionsNum+2)) 998 _, err = f.Write([]byte("[Version]\n rocksdb_version=6.2.1\n")) 999 require.NoError(t, err) 1000 err = f.Close() 1001 require.NoError(t, err) 1002 version, err = GetVersion("", mem) 1003 require.NoError(t, err) 1004 require.Equal(t, "rocksdb v6.2.1", version) 1005 } 1006 1007 func TestRocksDBNoFlushManifest(t *testing.T) { 1008 mem := vfs.NewMem() 1009 // Have the comparer and merger names match what's in the testdata 1010 // directory. 1011 comparer := *DefaultComparer 1012 merger := *DefaultMerger 1013 comparer.Name = "cockroach_comparator" 1014 merger.Name = "cockroach_merge_operator" 1015 opts := &Options{ 1016 FS: mem, 1017 Comparer: &comparer, 1018 Merger: &merger, 1019 } 1020 1021 // rocksdb-ingest-only is a RocksDB-generated db directory that has not had 1022 // a single flush yet, only ingestion operations. The manifest contains 1023 // a next-log-num but no log-num entry. Ensure that bitalostable can read these 1024 // directories without an issue. 1025 _, err := vfs.Clone(vfs.Default, mem, "testdata/rocksdb-ingest-only", "testdata") 1026 require.NoError(t, err) 1027 1028 db, err := Open("testdata", opts) 1029 require.NoError(t, err) 1030 defer db.Close() 1031 1032 val, closer, err := db.Get([]byte("ajulxeiombjiyw\x00\x00\x00\x00\x00\x00\x00\x01\x12\x09")) 1033 require.NoError(t, err) 1034 require.NotEmpty(t, val) 1035 require.NoError(t, closer.Close()) 1036 } 1037 1038 func TestOpen_ErrorIfUnknownFormatVersion(t *testing.T) { 1039 fs := vfs.NewMem() 1040 d, err := Open("", &Options{ 1041 FS: fs, 1042 FormatMajorVersion: FormatVersioned, 1043 }) 1044 require.NoError(t, err) 1045 require.NoError(t, d.Close()) 1046 1047 // Move the marker to a version that does not exist. 1048 m, _, err := atomicfs.LocateMarker(fs, "", formatVersionMarkerName) 1049 require.NoError(t, err) 1050 require.NoError(t, m.Move("999999")) 1051 require.NoError(t, m.Close()) 1052 1053 _, err = Open("", &Options{ 1054 FS: fs, 1055 FormatMajorVersion: FormatVersioned, 1056 }) 1057 require.Error(t, err) 1058 require.EqualError(t, err, `bitalostable: database "" written in format major version 999999`) 1059 } 1060 1061 // ensureFilesClosed updates the provided Options to wrap the filesystem. It 1062 // returns a closure that when invoked fails the test if any files opened by the 1063 // filesystem are not closed. 1064 // 1065 // This function is intended to be used in tests with defer. 1066 // 1067 // opts := &Options{FS: vfs.NewMem()} 1068 // defer ensureFilesClosed(t, opts)() 1069 // /* test code */ 1070 func ensureFilesClosed(t *testing.T, o *Options) func() { 1071 fs := &closeTrackingFS{ 1072 FS: o.FS, 1073 files: map[*closeTrackingFile]struct{}{}, 1074 } 1075 o.FS = fs 1076 return func() { 1077 // fs.files should be empty if all the files were closed. 1078 for f := range fs.files { 1079 t.Errorf("An open file was never closed. Opened at:\n%s", f.stack) 1080 } 1081 } 1082 } 1083 1084 type closeTrackingFS struct { 1085 vfs.FS 1086 files map[*closeTrackingFile]struct{} 1087 } 1088 1089 func (fs *closeTrackingFS) wrap(file vfs.File, err error) (vfs.File, error) { 1090 if err != nil { 1091 return nil, err 1092 } 1093 f := &closeTrackingFile{ 1094 File: file, 1095 fs: fs, 1096 stack: debug.Stack(), 1097 } 1098 fs.files[f] = struct{}{} 1099 return f, err 1100 } 1101 1102 func (fs *closeTrackingFS) Create(name string) (vfs.File, error) { 1103 return fs.wrap(fs.FS.Create(name)) 1104 } 1105 1106 func (fs *closeTrackingFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) { 1107 return fs.wrap(fs.FS.Open(name)) 1108 } 1109 1110 func (fs *closeTrackingFS) OpenDir(name string) (vfs.File, error) { 1111 return fs.wrap(fs.FS.OpenDir(name)) 1112 } 1113 1114 func (fs *closeTrackingFS) ReuseForWrite(oldname, newname string) (vfs.File, error) { 1115 return fs.wrap(fs.FS.ReuseForWrite(oldname, newname)) 1116 } 1117 1118 type closeTrackingFile struct { 1119 vfs.File 1120 fs *closeTrackingFS 1121 stack []byte 1122 } 1123 1124 func (f *closeTrackingFile) Close() error { 1125 delete(f.fs.files, f) 1126 return f.File.Close() 1127 }