github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/range_del_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "bytes" 9 "fmt" 10 "runtime" 11 "strings" 12 "sync" 13 "testing" 14 "time" 15 16 "github.com/cockroachdb/datadriven" 17 "github.com/cockroachdb/pebble/internal/base" 18 "github.com/cockroachdb/pebble/internal/testkeys" 19 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 20 "github.com/cockroachdb/pebble/sstable" 21 "github.com/cockroachdb/pebble/vfs" 22 "github.com/stretchr/testify/require" 23 "golang.org/x/exp/rand" 24 ) 25 26 func TestRangeDel(t *testing.T) { 27 var d *DB 28 defer func() { 29 if d != nil { 30 require.NoError(t, closeAllSnapshots(d)) 31 require.NoError(t, d.Close()) 32 } 33 }() 34 opts := &Options{} 35 opts.DisableAutomaticCompactions = true 36 37 datadriven.RunTest(t, "testdata/range_del", func(t *testing.T, td *datadriven.TestData) string { 38 switch td.Cmd { 39 case "define": 40 if d != nil { 41 if err := closeAllSnapshots(d); err != nil { 42 return err.Error() 43 } 44 if err := d.Close(); err != nil { 45 return err.Error() 46 } 47 } 48 49 var err error 50 if d, err = runDBDefineCmd(td, opts); err != nil { 51 return err.Error() 52 } 53 54 d.mu.Lock() 55 // Disable the "dynamic base level" code for this test. 56 d.mu.versions.picker.forceBaseLevel1() 57 s := fmt.Sprintf("mem: %d\n%s", len(d.mu.mem.queue), d.mu.versions.currentVersion().String()) 58 d.mu.Unlock() 59 return s 60 61 case "wait-pending-table-stats": 62 return runTableStatsCmd(td, d) 63 64 case "compact": 65 if err := runCompactCmd(td, d); err != nil { 66 return err.Error() 67 } 68 d.mu.Lock() 69 // Disable the "dynamic base level" code for this test. 70 d.mu.versions.picker.forceBaseLevel1() 71 s := d.mu.versions.currentVersion().String() 72 d.mu.Unlock() 73 return s 74 75 case "get": 76 return runGetCmd(t, td, d) 77 78 case "iter": 79 snap := Snapshot{ 80 db: d, 81 seqNum: InternalKeySeqNumMax, 82 } 83 td.MaybeScanArgs(t, "seq", &snap.seqNum) 84 iter, _ := snap.NewIter(nil) 85 return runIterCmd(td, iter, true) 86 87 default: 88 return fmt.Sprintf("unknown command: %s", td.Cmd) 89 } 90 }) 91 } 92 93 func TestFlushDelay(t *testing.T) { 94 opts := &Options{ 95 FS: vfs.NewMem(), 96 Comparer: testkeys.Comparer, 97 FlushDelayDeleteRange: 10 * time.Millisecond, 98 FlushDelayRangeKey: 10 * time.Millisecond, 99 FormatMajorVersion: internalFormatNewest, 100 } 101 d, err := Open("", opts) 102 require.NoError(t, err) 103 104 // Ensure that all the various means of writing a rangedel or range key 105 // trigger their respective flush delays. 106 cases := []func(){ 107 func() { 108 require.NoError(t, d.DeleteRange([]byte("a"), []byte("z"), nil)) 109 }, 110 func() { 111 b := d.NewBatch() 112 require.NoError(t, b.DeleteRange([]byte("a"), []byte("z"), nil)) 113 require.NoError(t, b.Commit(nil)) 114 }, 115 func() { 116 b := d.NewBatch() 117 op := b.DeleteRangeDeferred(1, 1) 118 op.Key[0] = 'a' 119 op.Value[0] = 'z' 120 op.Finish() 121 require.NoError(t, b.Commit(nil)) 122 }, 123 func() { 124 b := d.NewBatch() 125 b2 := d.NewBatch() 126 require.NoError(t, b.DeleteRange([]byte("a"), []byte("z"), nil)) 127 require.NoError(t, b2.SetRepr(b.Repr())) 128 require.NoError(t, b2.Commit(nil)) 129 require.NoError(t, b.Close()) 130 }, 131 func() { 132 b := d.NewBatch() 133 b2 := d.NewBatch() 134 require.NoError(t, b.DeleteRange([]byte("a"), []byte("z"), nil)) 135 require.NoError(t, b2.Apply(b, nil)) 136 require.NoError(t, b2.Commit(nil)) 137 require.NoError(t, b.Close()) 138 }, 139 func() { 140 require.NoError(t, d.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 141 }, 142 func() { 143 require.NoError(t, d.RangeKeyUnset([]byte("a"), []byte("z"), nil, nil)) 144 }, 145 func() { 146 require.NoError(t, d.RangeKeyDelete([]byte("a"), []byte("z"), nil)) 147 }, 148 func() { 149 b := d.NewBatch() 150 require.NoError(t, b.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 151 require.NoError(t, b.Commit(nil)) 152 }, 153 func() { 154 b := d.NewBatch() 155 require.NoError(t, b.RangeKeyUnset([]byte("a"), []byte("z"), nil, nil)) 156 require.NoError(t, b.Commit(nil)) 157 }, 158 func() { 159 b := d.NewBatch() 160 require.NoError(t, b.RangeKeyDelete([]byte("a"), []byte("z"), nil)) 161 require.NoError(t, b.Commit(nil)) 162 }, 163 func() { 164 b := d.NewBatch() 165 b2 := d.NewBatch() 166 require.NoError(t, b.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 167 require.NoError(t, b2.SetRepr(b.Repr())) 168 require.NoError(t, b2.Commit(nil)) 169 require.NoError(t, b.Close()) 170 }, 171 func() { 172 b := d.NewBatch() 173 b2 := d.NewBatch() 174 require.NoError(t, b.RangeKeySet([]byte("a"), []byte("z"), nil, nil, nil)) 175 require.NoError(t, b2.Apply(b, nil)) 176 require.NoError(t, b2.Commit(nil)) 177 require.NoError(t, b.Close()) 178 }, 179 } 180 181 for _, f := range cases { 182 d.mu.Lock() 183 flushed := d.mu.mem.queue[len(d.mu.mem.queue)-1].flushed 184 d.mu.Unlock() 185 f() 186 <-flushed 187 } 188 require.NoError(t, d.Close()) 189 } 190 191 func TestFlushDelayStress(t *testing.T) { 192 rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) 193 opts := &Options{ 194 FS: vfs.NewMem(), 195 Comparer: testkeys.Comparer, 196 FlushDelayDeleteRange: time.Duration(rng.Intn(10)+1) * time.Millisecond, 197 FlushDelayRangeKey: time.Duration(rng.Intn(10)+1) * time.Millisecond, 198 FormatMajorVersion: internalFormatNewest, 199 MemTableSize: 8192, 200 } 201 202 const runs = 100 203 for run := 0; run < runs; run++ { 204 d, err := Open("", opts) 205 require.NoError(t, err) 206 207 now := time.Now().UnixNano() 208 writers := runtime.GOMAXPROCS(0) 209 var wg sync.WaitGroup 210 wg.Add(writers) 211 for i := 0; i < writers; i++ { 212 rng := rand.New(rand.NewSource(uint64(now) + uint64(i))) 213 go func() { 214 const ops = 100 215 defer wg.Done() 216 217 var k1, k2 [32]byte 218 for j := 0; j < ops; j++ { 219 switch rng.Intn(3) { 220 case 0: 221 randStr(k1[:], rng) 222 randStr(k2[:], rng) 223 require.NoError(t, d.DeleteRange(k1[:], k2[:], nil)) 224 case 1: 225 randStr(k1[:], rng) 226 randStr(k2[:], rng) 227 require.NoError(t, d.RangeKeySet(k1[:], k2[:], []byte("@2"), nil, nil)) 228 case 2: 229 randStr(k1[:], rng) 230 randStr(k2[:], rng) 231 require.NoError(t, d.Set(k1[:], k2[:], nil)) 232 default: 233 panic("unreachable") 234 } 235 } 236 }() 237 } 238 wg.Wait() 239 time.Sleep(time.Duration(rng.Intn(10)+1) * time.Millisecond) 240 require.NoError(t, d.Close()) 241 } 242 } 243 244 // Verify that range tombstones at higher levels do not unintentionally delete 245 // newer keys at lower levels. This test sets up one such scenario. The base 246 // problem is that range tombstones are not truncated to sstable boundaries on 247 // disk, only in memory. 248 func TestRangeDelCompactionTruncation(t *testing.T) { 249 runTest := func(formatVersion FormatMajorVersion) { 250 // Use a small target file size so that there is a single key per sstable. 251 d, err := Open("", &Options{ 252 FS: vfs.NewMem(), 253 Levels: []LevelOptions{ 254 {TargetFileSize: 100}, 255 {TargetFileSize: 100}, 256 {TargetFileSize: 1}, 257 }, 258 DebugCheck: DebugCheckLevels, 259 FormatMajorVersion: formatVersion, 260 }) 261 require.NoError(t, err) 262 defer d.Close() 263 264 d.mu.Lock() 265 d.mu.versions.dynamicBaseLevel = false 266 d.mu.Unlock() 267 268 lsm := func() string { 269 d.mu.Lock() 270 s := d.mu.versions.currentVersion().String() 271 d.mu.Unlock() 272 return s 273 } 274 expectLSM := func(expected string) { 275 t.Helper() 276 expected = strings.TrimSpace(expected) 277 actual := strings.TrimSpace(lsm()) 278 if expected != actual { 279 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 280 } 281 } 282 283 require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("b"), 100), nil)) 284 snap1 := d.NewSnapshot() 285 defer snap1.Close() 286 // Flush so that each version of "a" ends up in its own L0 table. If we 287 // allowed both versions in the same L0 table, compaction could trivially 288 // move the single L0 table to L1. 289 require.NoError(t, d.Flush()) 290 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("c"), 100), nil)) 291 292 snap2 := d.NewSnapshot() 293 defer snap2.Close() 294 require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil)) 295 296 // Compact to produce the L1 tables. 297 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 298 expectLSM(` 299 1: 300 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] 301 000009:[b#12,RANGEDEL-d#inf,RANGEDEL] 302 `) 303 304 // Compact again to move one of the tables to L2. 305 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 306 expectLSM(` 307 1: 308 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] 309 2: 310 000009:[b#12,RANGEDEL-d#inf,RANGEDEL] 311 `) 312 313 // Write "b" and "c" to a new table. 314 require.NoError(t, d.Set([]byte("b"), []byte("d"), nil)) 315 require.NoError(t, d.Set([]byte("c"), []byte("e"), nil)) 316 require.NoError(t, d.Flush()) 317 expectLSM(` 318 0.0: 319 000011:[b#13,SET-c#14,SET] 320 1: 321 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] 322 2: 323 000009:[b#12,RANGEDEL-d#inf,RANGEDEL] 324 `) 325 326 // "b" is still visible at this point as it should be. 327 if _, closer, err := d.Get([]byte("b")); err != nil { 328 t.Fatalf("expected success, but found %v", err) 329 } else { 330 closer.Close() 331 } 332 333 keys := func() string { 334 iter, _ := d.NewIter(nil) 335 defer iter.Close() 336 var buf bytes.Buffer 337 var sep string 338 for iter.First(); iter.Valid(); iter.Next() { 339 fmt.Fprintf(&buf, "%s%s", sep, iter.Key()) 340 sep = " " 341 } 342 return buf.String() 343 } 344 345 if expected, actual := `b c`, keys(); expected != actual { 346 t.Fatalf("expected %q, but found %q", expected, actual) 347 } 348 349 // Compact the L0 table. This will compact the L0 table into L1 and do to the 350 // sstable target size settings will create 2 tables in L1. Then L1 table 351 // containing "c" will be compacted again with the L2 table creating two 352 // tables in L2. Lastly, the L2 table containing "c" will be compacted 353 // creating the L3 table. 354 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 355 if formatVersion < FormatSetWithDelete { 356 expectLSM(` 357 1: 358 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] 359 2: 360 000012:[b#13,SET-c#inf,RANGEDEL] 361 3: 362 000013:[c#14,SET-d#inf,RANGEDEL] 363 `) 364 } else { 365 expectLSM(` 366 1: 367 000008:[a#12,RANGEDEL-b#inf,RANGEDEL] 368 2: 369 000012:[b#13,SETWITHDEL-c#inf,RANGEDEL] 370 3: 371 000013:[c#14,SET-d#inf,RANGEDEL] 372 `) 373 } 374 375 // The L1 table still contains a tombstone from [a,d) which will improperly 376 // delete the newer version of "b" in L2. 377 if _, closer, err := d.Get([]byte("b")); err != nil { 378 t.Errorf("expected success, but found %v", err) 379 } else { 380 closer.Close() 381 } 382 383 if expected, actual := `b c`, keys(); expected != actual { 384 t.Errorf("expected %q, but found %q", expected, actual) 385 } 386 } 387 388 versions := []FormatMajorVersion{ 389 FormatMostCompatible, 390 FormatSetWithDelete - 1, 391 FormatSetWithDelete, 392 FormatNewest, 393 } 394 for _, version := range versions { 395 t.Run(fmt.Sprintf("version-%s", version), func(t *testing.T) { 396 runTest(version) 397 }) 398 } 399 } 400 401 // This is an alternate scenario to the one created in 402 // TestRangeDelCompactionTruncation that would result in the bounds for an 403 // sstable expanding to overlap its left neighbor if we failed to truncate an 404 // sstable's boundaries to the compaction input boundaries. 405 func TestRangeDelCompactionTruncation2(t *testing.T) { 406 // Use a small target file size so that there is a single key per sstable. 407 d, err := Open("", &Options{ 408 FS: vfs.NewMem(), 409 Levels: []LevelOptions{ 410 {TargetFileSize: 200}, 411 {TargetFileSize: 200}, 412 {TargetFileSize: 1}, 413 }, 414 DebugCheck: DebugCheckLevels, 415 }) 416 require.NoError(t, err) 417 defer d.Close() 418 419 lsm := func() string { 420 d.mu.Lock() 421 s := d.mu.versions.currentVersion().String() 422 d.mu.Unlock() 423 return s 424 } 425 expectLSM := func(expected string) { 426 t.Helper() 427 expected = strings.TrimSpace(expected) 428 actual := strings.TrimSpace(lsm()) 429 if expected != actual { 430 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 431 } 432 } 433 434 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 100), nil)) 435 snap1 := d.NewSnapshot() 436 defer snap1.Close() 437 // Flush so that each version of "b" ends up in its own L0 table. If we 438 // allowed both versions in the same L0 table, compaction could trivially 439 // move the single L0 table to L1. 440 require.NoError(t, d.Flush()) 441 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("c"), 100), nil)) 442 snap2 := d.NewSnapshot() 443 defer snap2.Close() 444 require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil)) 445 446 // Compact to produce the L1 tables. 447 require.NoError(t, d.Compact([]byte("b"), []byte("b\x00"), false)) 448 expectLSM(` 449 6: 450 000009:[a#12,RANGEDEL-d#inf,RANGEDEL] 451 `) 452 453 require.NoError(t, d.Set([]byte("c"), bytes.Repeat([]byte("d"), 100), nil)) 454 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 455 expectLSM(` 456 6: 457 000012:[a#12,RANGEDEL-c#inf,RANGEDEL] 458 000013:[c#13,SET-d#inf,RANGEDEL] 459 `) 460 } 461 462 // TODO(peter): rewrite this test, TestRangeDelCompactionTruncation, and 463 // TestRangeDelCompactionTruncation2 as data-driven tests. 464 func TestRangeDelCompactionTruncation3(t *testing.T) { 465 // Use a small target file size so that there is a single key per sstable. 466 d, err := Open("tmp", &Options{ 467 Cleaner: ArchiveCleaner{}, 468 FS: vfs.NewMem(), 469 Levels: []LevelOptions{ 470 {TargetFileSize: 200}, 471 {TargetFileSize: 200}, 472 {TargetFileSize: 1}, 473 }, 474 DebugCheck: DebugCheckLevels, 475 }) 476 require.NoError(t, err) 477 defer d.Close() 478 479 d.mu.Lock() 480 d.mu.versions.dynamicBaseLevel = false 481 d.mu.Unlock() 482 483 lsm := func() string { 484 d.mu.Lock() 485 s := d.mu.versions.currentVersion().String() 486 d.mu.Unlock() 487 return s 488 } 489 expectLSM := func(expected string) { 490 t.Helper() 491 expected = strings.TrimSpace(expected) 492 actual := strings.TrimSpace(lsm()) 493 if expected != actual { 494 t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) 495 } 496 } 497 498 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 100), nil)) 499 snap1 := d.NewSnapshot() 500 defer snap1.Close() 501 502 // Flush so that each version of "b" ends up in its own L0 table. If we 503 // allowed both versions in the same L0 table, compaction could trivially 504 // move the single L0 table to L1. 505 require.NoError(t, d.Flush()) 506 require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("c"), 100), nil)) 507 snap2 := d.NewSnapshot() 508 defer snap2.Close() 509 510 require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil)) 511 snap3 := d.NewSnapshot() 512 defer snap3.Close() 513 514 if _, _, err := d.Get([]byte("b")); err != ErrNotFound { 515 t.Fatalf("expected not found, but found %v", err) 516 } 517 518 // Compact a few times to move the tables down to L3. 519 for i := 0; i < 3; i++ { 520 require.NoError(t, d.Compact([]byte("b"), []byte("b\x00"), false)) 521 } 522 expectLSM(` 523 3: 524 000009:[a#12,RANGEDEL-d#inf,RANGEDEL] 525 `) 526 527 require.NoError(t, d.Set([]byte("c"), bytes.Repeat([]byte("d"), 100), nil)) 528 529 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 530 expectLSM(` 531 3: 532 000013:[a#12,RANGEDEL-c#inf,RANGEDEL] 533 4: 534 000014:[c#13,SET-d#inf,RANGEDEL] 535 `) 536 537 require.NoError(t, d.Compact([]byte("c"), []byte("c\x00"), false)) 538 expectLSM(` 539 3: 540 000013:[a#12,RANGEDEL-c#inf,RANGEDEL] 541 5: 542 000014:[c#13,SET-d#inf,RANGEDEL] 543 `) 544 545 if _, _, err := d.Get([]byte("b")); err != ErrNotFound { 546 t.Fatalf("expected not found, but found %v", err) 547 } 548 549 require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) 550 expectLSM(` 551 4: 552 000013:[a#12,RANGEDEL-c#inf,RANGEDEL] 553 5: 554 000014:[c#13,SET-d#inf,RANGEDEL] 555 `) 556 557 if v, _, err := d.Get([]byte("b")); err != ErrNotFound { 558 t.Fatalf("expected not found, but found %v [%s]", err, v) 559 } 560 } 561 562 func BenchmarkRangeDelIterate(b *testing.B) { 563 for _, entries := range []int{10, 1000, 100000} { 564 b.Run(fmt.Sprintf("entries=%d", entries), func(b *testing.B) { 565 for _, deleted := range []int{entries, entries - 1} { 566 b.Run(fmt.Sprintf("deleted=%d", deleted), func(b *testing.B) { 567 for _, snapshotCompact := range []bool{false, true} { 568 b.Run(fmt.Sprintf("snapshotAndCompact=%t", snapshotCompact), func(b *testing.B) { 569 benchmarkRangeDelIterate(b, entries, deleted, snapshotCompact) 570 }) 571 } 572 }) 573 } 574 }) 575 } 576 } 577 578 func benchmarkRangeDelIterate(b *testing.B, entries, deleted int, snapshotCompact bool) { 579 mem := vfs.NewMem() 580 cache := NewCache(128 << 20) // 128 MB 581 defer cache.Unref() 582 583 d, err := Open("", &Options{ 584 Cache: cache, 585 FS: mem, 586 DebugCheck: DebugCheckLevels, 587 }) 588 if err != nil { 589 b.Fatal(err) 590 } 591 defer d.Close() 592 593 makeKey := func(i int) []byte { 594 return []byte(fmt.Sprintf("%09d", i)) 595 } 596 597 // Create an sstable with N entries and ingest it. This is a fast way 598 // to get a lot of entries into pebble. 599 f, err := mem.Create("ext") 600 if err != nil { 601 b.Fatal(err) 602 } 603 w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ 604 BlockSize: 32 << 10, // 32 KB 605 }) 606 for i := 0; i < entries; i++ { 607 key := base.MakeInternalKey(makeKey(i), 0, InternalKeyKindSet) 608 if err := w.Add(key, nil); err != nil { 609 b.Fatal(err) 610 } 611 } 612 if err := w.Close(); err != nil { 613 b.Fatal(err) 614 } 615 if err := d.Ingest([]string{"ext"}); err != nil { 616 b.Fatal(err) 617 } 618 619 // Some benchmarks test snapshots that force the range tombstone into the 620 // same level as the covered data. 621 // See https://github.com/cockroachdb/pebble/issues/1070. 622 if snapshotCompact { 623 s := d.NewSnapshot() 624 defer func() { require.NoError(b, s.Close()) }() 625 } 626 627 // Create a range tombstone that deletes most (or all) of those entries. 628 from := makeKey(0) 629 to := makeKey(deleted) 630 if err := d.DeleteRange(from, to, nil); err != nil { 631 b.Fatal(err) 632 } 633 634 if snapshotCompact { 635 require.NoError(b, d.Compact(makeKey(0), makeKey(entries), false)) 636 } 637 638 b.ResetTimer() 639 for i := 0; i < b.N; i++ { 640 iter, _ := d.NewIter(nil) 641 iter.SeekGE(from) 642 if deleted < entries { 643 if !iter.Valid() { 644 b.Fatal("key not found") 645 } 646 } else if iter.Valid() { 647 b.Fatal("unexpected key found") 648 } 649 if err := iter.Close(); err != nil { 650 b.Fatal(err) 651 } 652 } 653 }