github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/range_del_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package bitalostable 6 7 import ( 8 "bytes" 9 "fmt" 10 "runtime" 11 "strconv" 12 "strings" 13 "sync" 14 "testing" 15 "time" 16 17 "github.com/stretchr/testify/require" 18 "github.com/zuoyebang/bitalostable/internal/base" 19 "github.com/zuoyebang/bitalostable/internal/datadriven" 20 "github.com/zuoyebang/bitalostable/internal/testkeys" 21 "github.com/zuoyebang/bitalostable/sstable" 22 "github.com/zuoyebang/bitalostable/vfs" 23 "golang.org/x/exp/rand" 24 ) 25 26 func TestRangeDel(t *testing.T) { 27 var d *DB 28 defer func() { 29 if d != nil { 30 require.NoError(t, d.Close()) 31 } 32 }() 33 opts := &Options{} 34 opts.DisableAutomaticCompactions = true 35 36 datadriven.RunTest(t, "testdata/range_del", func(td *datadriven.TestData) string { 37 switch td.Cmd { 38 case "define": 39 if d != nil { 40 if err := d.Close(); err != nil { 41 return err.Error() 42 } 43 } 44 45 var err error 46 if d, err = runDBDefineCmd(td, opts); err != nil { 47 return err.Error() 48 } 49 50 d.mu.Lock() 51 // Disable the "dynamic base level" code for this test. 52 d.mu.versions.picker.forceBaseLevel1() 53 s := fmt.Sprintf("mem: %d\n%s", len(d.mu.mem.queue), d.mu.versions.currentVersion().String()) 54 d.mu.Unlock() 55 return s 56 57 case "wait-pending-table-stats": 58 return runTableStatsCmd(td, d) 59 60 case "compact": 61 if err := runCompactCmd(td, d); err != nil { 62 return err.Error() 63 } 64 d.mu.Lock() 65 // Disable the "dynamic base level" code for this test. 66 d.mu.versions.picker.forceBaseLevel1() 67 s := d.mu.versions.currentVersion().String() 68 d.mu.Unlock() 69 return s 70 71 case "get": 72 return runGetCmd(td, d) 73 74 case "iter": 75 snap := Snapshot{ 76 db: d, 77 seqNum: InternalKeySeqNumMax, 78 } 79 80 for _, arg := range td.CmdArgs { 81 if len(arg.Vals) != 1 { 82 return fmt.Sprintf("%s: %s=<value>", td.Cmd, arg.Key) 83 } 84 switch arg.Key { 85 case "seq": 86 var err error 87 snap.seqNum, err = strconv.ParseUint(arg.Vals[0], 10, 64) 88 if err != nil { 89 return err.Error() 90 } 91 default: 92 return fmt.Sprintf("%s: unknown arg: %s", td.Cmd, arg.Key) 93 } 94 } 95 96 iter := snap.NewIter(nil) 97 return runIterCmd(td, iter, true) 98 99 default: 100 return fmt.Sprintf("unknown command: %s", td.Cmd) 101 } 102 }) 103 } 104 105 func TestFlushDelay(t *testing.T) { 106 opts := &Options{ 107 FS: vfs.NewMem(), 108 Comparer: testkeys.Comparer, 109 FlushDelayDeleteRange: 10 * time.Millisecond, 110 FlushDelayRangeKey: 10 * time.Millisecond, 111 FormatMajorVersion: FormatNewest, 112 } 113 d, err := Open("", opts) 114 require.NoError(t, err) 115 116 // Ensure that all the various means of writing a rangedel or range key 117 // trigger their respective flush delays. 118 cases := []func(){ 119 func() { 120 require.NoError(t, d.DeleteRange([]byte("a"), []byte("z"), nil)) 121 }, 122 func() { 123 b := d.NewBatch() 124 require.NoError(t, b.DeleteRange([]byte("a"), []byte("z"), nil)) 125 require.NoError(t, b.Commit(nil)) 126 }, 127 func() { 128 b := d.NewBatch() 129 op := b.DeleteRangeDeferred(1, 1) 130 op.Key[0] = 'a' 131 op.Value[0] = 'z' 132 op.Finish() 133 require.NoError(t, b.Commit(nil)) 134 }, 135 func() { 136 b := d.NewBatch() 137 b2 := d.NewBatch() 138 require.NoError(t, b.DeleteRange([]byte("a"), []byte("z"), nil)) 139 require.NoError(t, b2.SetRepr(b.Repr())) 140 require.NoError(t, b2.Commit(nil)) 141 require.NoError(t, b.Close()) 142 }, 143 func() { 144 b := d.NewBatch() 145 b2 := d.NewBatch() 146 require.NoError(t, b.DeleteRange([]byte("a"), []byte("z"), nil)) 147 require.NoError(t, b2.Apply(b, nil)) 148 require.NoError(t, b2.Commit(nil)) 149 require.NoError(t, b.Close()) 150 }, 151 func() { 152 require.NoError(t, d.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 153 }, 154 func() { 155 require.NoError(t, d.RangeKeyUnset([]byte("a"), []byte("z"), nil, nil)) 156 }, 157 func() { 158 require.NoError(t, d.RangeKeyDelete([]byte("a"), []byte("z"), nil)) 159 }, 160 func() { 161 b := d.NewBatch() 162 require.NoError(t, b.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 163 require.NoError(t, b.Commit(nil)) 164 }, 165 func() { 166 b := d.NewBatch() 167 require.NoError(t, b.RangeKeyUnset([]byte("a"), []byte("z"), nil, nil)) 168 require.NoError(t, b.Commit(nil)) 169 }, 170 func() { 171 b := d.NewBatch() 172 require.NoError(t, b.RangeKeyDelete([]byte("a"), []byte("z"), nil)) 173 require.NoError(t, b.Commit(nil)) 174 }, 175 func() { 176 b := d.NewBatch() 177 b2 := d.NewBatch() 178 require.NoError(t, b.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 179 require.NoError(t, b2.SetRepr(b.Repr())) 180 require.NoError(t, b2.Commit(nil)) 181 require.NoError(t, b.Close()) 182 }, 183 func() { 184 b := d.NewBatch() 185 b2 := d.NewBatch() 186 require.NoError(t, b.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 187 require.NoError(t, b2.Apply(b, nil)) 188 require.NoError(t, b2.Commit(nil)) 189 require.NoError(t, b.Close()) 190 }, 191 } 192 193 for _, f := range cases { 194 d.mu.Lock() 195 flushed := d.mu.mem.queue[len(d.mu.mem.queue)-1].flushed 196 d.mu.Unlock() 197 f() 198 <-flushed 199 } 200 require.NoError(t, d.Close()) 201 } 202 203 func TestFlushDelayStress(t *testing.T) { 204 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 205 opts := &Options{ 206 FS: vfs.NewMem(), 207 Comparer: testkeys.Comparer, 208 FlushDelayDeleteRange: time.Duration(rng.Intn(10)+1) * time.Millisecond, 209 FlushDelayRangeKey: time.Duration(rng.Intn(10)+1) * time.Millisecond, 210 FormatMajorVersion: FormatNewest, 211 MemTableSize: 8192, 212 } 213 214 const runs = 100 215 for run := 0; run < runs; run++ { 216 d, err := Open("", opts) 217 require.NoError(t, err) 218 219 now := time.Now().UnixNano() 220 writers := runtime.GOMAXPROCS(0) 221 var wg sync.WaitGroup 222 wg.Add(writers) 223 for i := 0; i < writers; i++ { 224 rng := rand.New(rand.NewSource(uint64(now) + uint64(i))) 225 go func() { 226 const ops = 100 227 defer wg.Done() 228 229 var k1, k2 [32]byte 230 for j := 0; j < ops; j++ { 231 switch rng.Intn(3) { 232 case 0: 233 randStr(k1[:], rng) 234 randStr(k2[:], rng) 235 require.NoError(t, d.DeleteRange(k1[:], k2[:], nil)) 236 case 1: 237 randStr(k1[:], rng) 238 randStr(k2[:], rng) 239 require.NoError(t, d.RangeKeySet(k1[:], k2[:], []byte("@2"), nil, nil)) 240 case 2: 241 randStr(k1[:], rng) 242 randStr(k2[:], rng) 243 require.NoError(t, d.Set(k1[:], k2[:], nil)) 244 default: 245 panic("unreachable") 246 } 247 } 248 }() 249 } 250 wg.Wait() 251 time.Sleep(time.Duration(rng.Intn(10)+1) * time.Millisecond) 252 require.NoError(t, d.Close()) 253 } 254 } 255 256 // Verify that range tombstones at higher levels do not unintentionally delete 257 // newer keys at lower levels. This test sets up one such scenario. The base 258 // problem is that range tombstones are not truncated to sstable boundaries on 259 // disk, only in memory. 260 func TestRangeDelCompactionTruncation(t *testing.T) { 261 runTest := func(formatVersion FormatMajorVersion) { 262 // Use a small target file size so that there is a single key per sstable. 263 d, err := Open("", &Options{ 264 FS: vfs.NewMem(), 265 Levels: []LevelOptions{ 266 {TargetFileSize: 100}, 267 {TargetFileSize: 100}, 268 {TargetFileSize: 1}, 269 }, 270 DebugCheck: DebugCheckLevels, 271 FormatMajorVersion: formatVersion, 272 }) 273 require.NoError(t, err) 274 defer d.Close() 275 276 d.mu.Lock() 277 d.mu.versions.dynamicBaseLevel = false 278 d.mu.Unlock() 279 280 lsm := func() string { 281 d.mu.Lock() 282 s := d.mu.versions.currentVersion().String() 283 d.mu.Unlock() 284 return s 285 } 286 expectLSM := func(expected string) { 287 t.Helper() 288 expected = strings.TrimSpace(expected) 289 actual := strings.TrimSpace(lsm()) 290 if expected != actual { 291 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 292 } 293 } 294 295 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("b"), 100), nil)) 296 snap1 := d.NewSnapshot() 297 defer snap1.Close() 298 // Flush so that each version of "a" ends up in its own L0 table. If we 299 // allowed both versions in the same L0 table, compaction could trivially 300 // move the single L0 table to L1. 301 require.NoError(t, d.Flush()) 302 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("c"), 100), nil)) 303 304 snap2 := d.NewSnapshot() 305 defer snap2.Close() 306 require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil)) 307 308 // Compact to produce the L1 tables. 309 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 310 expectLSM(` 311 1: 312 000008:[a#3,RANGEDEL-b#72057594037927935,RANGEDEL] 313 000009:[b#3,RANGEDEL-d#72057594037927935,RANGEDEL] 314 `) 315 316 // Compact again to move one of the tables to L2. 317 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 318 expectLSM(` 319 1: 320 000008:[a#3,RANGEDEL-b#72057594037927935,RANGEDEL] 321 2: 322 000009:[b#3,RANGEDEL-d#72057594037927935,RANGEDEL] 323 `) 324 325 // Write "b" and "c" to a new table. 326 require.NoError(t, d.Set([]byte("b"), []byte("d"), nil)) 327 require.NoError(t, d.Set([]byte("c"), []byte("e"), nil)) 328 require.NoError(t, d.Flush()) 329 expectLSM(` 330 0.0: 331 000011:[b#4,SET-c#5,SET] 332 1: 333 000008:[a#3,RANGEDEL-b#72057594037927935,RANGEDEL] 334 2: 335 000009:[b#3,RANGEDEL-d#72057594037927935,RANGEDEL] 336 `) 337 338 // "b" is still visible at this point as it should be. 339 if _, closer, err := d.Get([]byte("b")); err != nil { 340 t.Fatalf("expected success, but found %v", err) 341 } else { 342 closer.Close() 343 } 344 345 keys := func() string { 346 iter := d.NewIter(nil) 347 defer iter.Close() 348 var buf bytes.Buffer 349 var sep string 350 for iter.First(); iter.Valid(); iter.Next() { 351 fmt.Fprintf(&buf, "%s%s", sep, iter.Key()) 352 sep = " " 353 } 354 return buf.String() 355 } 356 357 if expected, actual := `b c`, keys(); expected != actual { 358 t.Fatalf("expected %q, but found %q", expected, actual) 359 } 360 361 // Compact the L0 table. This will compact the L0 table into L1 and do to the 362 // sstable target size settings will create 2 tables in L1. Then L1 table 363 // containing "c" will be compacted again with the L2 table creating two 364 // tables in L2. Lastly, the L2 table containing "c" will be compacted 365 // creating the L3 table. 366 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 367 if formatVersion < FormatSetWithDelete { 368 expectLSM(` 369 1: 370 000008:[a#3,RANGEDEL-b#72057594037927935,RANGEDEL] 371 2: 372 000012:[b#4,SET-c#72057594037927935,RANGEDEL] 373 3: 374 000013:[c#5,SET-d#72057594037927935,RANGEDEL] 375 `) 376 } else { 377 expectLSM(` 378 1: 379 000008:[a#3,RANGEDEL-b#72057594037927935,RANGEDEL] 380 2: 381 000012:[b#4,SETWITHDEL-c#72057594037927935,RANGEDEL] 382 3: 383 000013:[c#5,SET-d#72057594037927935,RANGEDEL] 384 `) 385 } 386 387 // The L1 table still contains a tombstone from [a,d) which will improperly 388 // delete the newer version of "b" in L2. 389 if _, closer, err := d.Get([]byte("b")); err != nil { 390 t.Errorf("expected success, but found %v", err) 391 } else { 392 closer.Close() 393 } 394 395 if expected, actual := `b c`, keys(); expected != actual { 396 t.Errorf("expected %q, but found %q", expected, actual) 397 } 398 } 399 400 versions := []FormatMajorVersion{ 401 FormatMostCompatible, 402 FormatSetWithDelete - 1, 403 FormatSetWithDelete, 404 FormatNewest, 405 } 406 for _, version := range versions { 407 t.Run(fmt.Sprintf("version-%s", version), func(t *testing.T) { 408 runTest(version) 409 }) 410 } 411 } 412 413 // This is an alternate scenario to the one created in 414 // TestRangeDelCompactionTruncation that would result in the bounds for an 415 // sstable expanding to overlap its left neighbor if we failed to truncate an 416 // sstable's boundaries to the compaction input boundaries. 417 func TestRangeDelCompactionTruncation2(t *testing.T) { 418 // Use a small target file size so that there is a single key per sstable. 419 d, err := Open("", &Options{ 420 FS: vfs.NewMem(), 421 Levels: []LevelOptions{ 422 {TargetFileSize: 100}, 423 {TargetFileSize: 100}, 424 {TargetFileSize: 1}, 425 }, 426 DebugCheck: DebugCheckLevels, 427 }) 428 require.NoError(t, err) 429 defer d.Close() 430 431 lsm := func() string { 432 d.mu.Lock() 433 s := d.mu.versions.currentVersion().String() 434 d.mu.Unlock() 435 return s 436 } 437 expectLSM := func(expected string) { 438 t.Helper() 439 expected = strings.TrimSpace(expected) 440 actual := strings.TrimSpace(lsm()) 441 if expected != actual { 442 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 443 } 444 } 445 446 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 100), nil)) 447 snap1 := d.NewSnapshot() 448 defer snap1.Close() 449 // Flush so that each version of "b" ends up in its own L0 table. If we 450 // allowed both versions in the same L0 table, compaction could trivially 451 // move the single L0 table to L1. 452 require.NoError(t, d.Flush()) 453 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("c"), 100), nil)) 454 snap2 := d.NewSnapshot() 455 defer snap2.Close() 456 require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil)) 457 458 // Compact to produce the L1 tables. 459 require.NoError(t, d.Compact([]byte("b"), []byte("b\x00"), false)) 460 expectLSM(` 461 6: 462 000009:[a#3,RANGEDEL-d#72057594037927935,RANGEDEL] 463 `) 464 465 require.NoError(t, d.Set([]byte("c"), bytes.Repeat([]byte("d"), 100), nil)) 466 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 467 expectLSM(` 468 6: 469 000012:[a#3,RANGEDEL-c#72057594037927935,RANGEDEL] 470 000013:[c#4,SET-d#72057594037927935,RANGEDEL] 471 `) 472 } 473 474 // TODO(peter): rewrite this test, TestRangeDelCompactionTruncation, and 475 // TestRangeDelCompactionTruncation2 as data-driven tests. 476 func TestRangeDelCompactionTruncation3(t *testing.T) { 477 // Use a small target file size so that there is a single key per sstable. 478 d, err := Open("tmp", &Options{ 479 Cleaner: ArchiveCleaner{}, 480 FS: vfs.NewMem(), 481 Levels: []LevelOptions{ 482 {TargetFileSize: 100}, 483 {TargetFileSize: 100}, 484 {TargetFileSize: 1}, 485 }, 486 DebugCheck: DebugCheckLevels, 487 }) 488 require.NoError(t, err) 489 defer d.Close() 490 491 d.mu.Lock() 492 d.mu.versions.dynamicBaseLevel = false 493 d.mu.Unlock() 494 495 lsm := func() string { 496 d.mu.Lock() 497 s := d.mu.versions.currentVersion().String() 498 d.mu.Unlock() 499 return s 500 } 501 expectLSM := func(expected string) { 502 t.Helper() 503 expected = strings.TrimSpace(expected) 504 actual := strings.TrimSpace(lsm()) 505 if expected != actual { 506 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 507 } 508 } 509 510 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 100), nil)) 511 snap1 := d.NewSnapshot() 512 defer snap1.Close() 513 514 // Flush so that each version of "b" ends up in its own L0 table. If we 515 // allowed both versions in the same L0 table, compaction could trivially 516 // move the single L0 table to L1. 517 require.NoError(t, d.Flush()) 518 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("c"), 100), nil)) 519 snap2 := d.NewSnapshot() 520 defer snap2.Close() 521 522 require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil)) 523 snap3 := d.NewSnapshot() 524 defer snap3.Close() 525 526 if _, _, err := d.Get([]byte("b")); err != ErrNotFound { 527 t.Fatalf("expected not found, but found %v", err) 528 } 529 530 // Compact a few times to move the tables down to L3. 531 for i := 0; i < 3; i++ { 532 require.NoError(t, d.Compact([]byte("b"), []byte("b\x00"), false)) 533 } 534 expectLSM(` 535 3: 536 000009:[a#3,RANGEDEL-d#72057594037927935,RANGEDEL] 537 `) 538 539 require.NoError(t, d.Set([]byte("c"), bytes.Repeat([]byte("d"), 100), nil)) 540 541 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 542 expectLSM(` 543 3: 544 000013:[a#3,RANGEDEL-c#72057594037927935,RANGEDEL] 545 4: 546 000014:[c#4,SET-d#72057594037927935,RANGEDEL] 547 `) 548 549 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 550 expectLSM(` 551 3: 552 000013:[a#3,RANGEDEL-c#72057594037927935,RANGEDEL] 553 5: 554 000014:[c#4,SET-d#72057594037927935,RANGEDEL] 555 `) 556 557 if _, _, err := d.Get([]byte("b")); err != ErrNotFound { 558 t.Fatalf("expected not found, but found %v", err) 559 } 560 561 require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) 562 expectLSM(` 563 4: 564 000013:[a#3,RANGEDEL-c#72057594037927935,RANGEDEL] 565 5: 566 000014:[c#4,SET-d#72057594037927935,RANGEDEL] 567 `) 568 569 if v, _, err := d.Get([]byte("b")); err != ErrNotFound { 570 t.Fatalf("expected not found, but found %v [%s]", err, v) 571 } 572 } 573 574 func BenchmarkRangeDelIterate(b *testing.B) { 575 for _, entries := range []int{10, 1000, 100000} { 576 b.Run(fmt.Sprintf("entries=%d", entries), func(b *testing.B) { 577 for _, deleted := range []int{entries, entries - 1} { 578 b.Run(fmt.Sprintf("deleted=%d", deleted), func(b *testing.B) { 579 for _, snapshotCompact := range []bool{false, true} { 580 b.Run(fmt.Sprintf("snapshotAndCompact=%t", snapshotCompact), func(b *testing.B) { 581 benchmarkRangeDelIterate(b, entries, deleted, snapshotCompact) 582 }) 583 } 584 }) 585 } 586 }) 587 } 588 } 589 590 func benchmarkRangeDelIterate(b *testing.B, entries, deleted int, snapshotCompact bool) { 591 mem := vfs.NewMem() 592 cache := NewCache(128 << 20) // 128 MB 593 defer cache.Unref() 594 595 d, err := Open("", &Options{ 596 Cache: cache, 597 FS: mem, 598 DebugCheck: DebugCheckLevels, 599 }) 600 if err != nil { 601 b.Fatal(err) 602 } 603 defer d.Close() 604 605 makeKey := func(i int) []byte { 606 return []byte(fmt.Sprintf("%09d", i)) 607 } 608 609 // Create an sstable with N entries and ingest it. This is a fast way 610 // to get a lot of entries into bitalostable. 611 f, err := mem.Create("ext") 612 if err != nil { 613 b.Fatal(err) 614 } 615 w := sstable.NewWriter(f, sstable.WriterOptions{ 616 BlockSize: 32 << 10, // 32 KB 617 }) 618 for i := 0; i < entries; i++ { 619 key := base.MakeInternalKey(makeKey(i), 0, InternalKeyKindSet) 620 if err := w.Add(key, nil); err != nil { 621 b.Fatal(err) 622 } 623 } 624 if err := w.Close(); err != nil { 625 b.Fatal(err) 626 } 627 if err := d.Ingest([]string{"ext"}); err != nil { 628 b.Fatal(err) 629 } 630 631 // Some benchmarks test snapshots that force the range tombstone into the 632 // same level as the covered data. 633 // See https://github.com/zuoyebang/bitalostable/issues/1070. 634 if snapshotCompact { 635 s := d.NewSnapshot() 636 defer func() { require.NoError(b, s.Close()) }() 637 } 638 639 // Create a range tombstone that deletes most (or all) of those entries. 640 from := makeKey(0) 641 to := makeKey(deleted) 642 if err := d.DeleteRange(from, to, nil); err != nil { 643 b.Fatal(err) 644 } 645 646 if snapshotCompact { 647 require.NoError(b, d.Compact(makeKey(0), makeKey(entries), false)) 648 } 649 650 b.ResetTimer() 651 for i := 0; i < b.N; i++ { 652 iter := d.NewIter(nil) 653 iter.SeekGE(from) 654 if deleted < entries { 655 if !iter.Valid() { 656 b.Fatal("key not found") 657 } 658 } else if iter.Valid() { 659 b.Fatal("unexpected key found") 660 } 661 if err := iter.Close(); err != nil { 662 b.Fatal(err) 663 } 664 } 665 }