github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/writer_test.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "math/rand" 12 "strconv" 13 "strings" 14 "sync" 15 "testing" 16 "unsafe" 17 18 "github.com/cockroachdb/datadriven" 19 "github.com/cockroachdb/errors" 20 "github.com/cockroachdb/pebble/bloom" 21 "github.com/cockroachdb/pebble/internal/base" 22 "github.com/cockroachdb/pebble/internal/cache" 23 "github.com/cockroachdb/pebble/internal/humanize" 24 "github.com/cockroachdb/pebble/internal/testkeys" 25 "github.com/cockroachdb/pebble/objstorage" 26 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 27 "github.com/cockroachdb/pebble/vfs" 28 "github.com/stretchr/testify/require" 29 ) 30 31 func testWriterParallelism(t *testing.T, parallelism bool) { 32 for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { 33 tdFile := "testdata/writer" 34 if format == TableFormatPebblev3 { 35 tdFile = "testdata/writer_v3" 36 } 37 t.Run(format.String(), func(t *testing.T) { runDataDriven(t, tdFile, format, parallelism) }) 38 } 39 } 40 func TestWriter(t *testing.T) { 41 testWriterParallelism(t, false) 42 } 43 44 func testRewriterParallelism(t *testing.T, parallelism bool) { 45 for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { 46 tdFile := "testdata/rewriter" 47 if format == TableFormatPebblev3 { 48 tdFile = "testdata/rewriter_v3" 49 } 50 t.Run(format.String(), func(t *testing.T) { runDataDriven(t, tdFile, format, parallelism) }) 51 } 52 } 53 54 func TestRewriter(t *testing.T) { 55 testRewriterParallelism(t, false) 56 } 57 58 func TestWriterParallel(t *testing.T) { 59 testWriterParallelism(t, true) 60 } 61 62 func TestRewriterParallel(t *testing.T) { 63 testRewriterParallelism(t, true) 64 } 65 66 func runDataDriven(t *testing.T, file string, tableFormat TableFormat, parallelism bool) { 67 var r *Reader 68 defer func() { 69 if r != nil { 70 require.NoError(t, r.Close()) 71 } 72 }() 73 74 format := func(td *datadriven.TestData, m *WriterMetadata) string { 75 var requestedProps []string 76 for _, cmdArg := range td.CmdArgs { 77 switch cmdArg.Key { 78 case "props": 79 requestedProps = cmdArg.Vals 80 } 81 } 82 83 var b bytes.Buffer 84 if m.HasPointKeys { 85 fmt.Fprintf(&b, "point: [%s-%s]\n", m.SmallestPoint, m.LargestPoint) 86 } 87 if m.HasRangeDelKeys { 88 fmt.Fprintf(&b, "rangedel: [%s-%s]\n", m.SmallestRangeDel, m.LargestRangeDel) 89 } 90 if m.HasRangeKeys { 91 fmt.Fprintf(&b, "rangekey: [%s-%s]\n", m.SmallestRangeKey, m.LargestRangeKey) 92 } 93 fmt.Fprintf(&b, "seqnums: [%d-%d]\n", m.SmallestSeqNum, m.LargestSeqNum) 94 95 if len(requestedProps) > 0 { 96 props := strings.Split(r.Properties.String(), "\n") 97 for _, requestedProp := range requestedProps { 98 fmt.Fprintf(&b, "props %q:\n", requestedProp) 99 for _, prop := range props { 100 if strings.Contains(prop, requestedProp) { 101 fmt.Fprintf(&b, " %s\n", prop) 102 } 103 } 104 } 105 } 106 107 return b.String() 108 } 109 110 datadriven.RunTest(t, file, func(t *testing.T, td *datadriven.TestData) string { 111 switch td.Cmd { 112 case "build": 113 if r != nil { 114 _ = r.Close() 115 r = nil 116 } 117 var meta *WriterMetadata 118 var err error 119 meta, r, err = runBuildCmd(td, &WriterOptions{ 120 TableFormat: tableFormat, 121 Parallelism: parallelism, 122 }, 0) 123 if err != nil { 124 return err.Error() 125 } 126 return format(td, meta) 127 128 case "build-raw": 129 if r != nil { 130 _ = r.Close() 131 r = nil 132 } 133 var meta *WriterMetadata 134 var err error 135 meta, r, err = runBuildRawCmd(td, &WriterOptions{ 136 TableFormat: tableFormat, 137 }) 138 if err != nil { 139 return err.Error() 140 } 141 return format(td, meta) 142 143 case "scan": 144 origIter, err := r.NewIter(nil /* lower */, nil /* upper */) 145 if err != nil { 146 return err.Error() 147 } 148 iter := newIterAdapter(origIter) 149 defer iter.Close() 150 151 var buf bytes.Buffer 152 for valid := iter.First(); valid; valid = iter.Next() { 153 fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), iter.Value()) 154 } 155 return buf.String() 156 157 case "get": 158 var buf bytes.Buffer 159 for _, k := range strings.Split(td.Input, "\n") { 160 value, err := r.get([]byte(k)) 161 if err != nil { 162 fmt.Fprintf(&buf, "get %s: %s\n", k, err.Error()) 163 } else { 164 fmt.Fprintf(&buf, "%s\n", value) 165 } 166 } 167 return buf.String() 168 169 case "scan-range-del": 170 iter, err := r.NewRawRangeDelIter() 171 if err != nil { 172 return err.Error() 173 } 174 if iter == nil { 175 return "" 176 } 177 defer iter.Close() 178 179 var buf bytes.Buffer 180 for s := iter.First(); s != nil; s = iter.Next() { 181 fmt.Fprintf(&buf, "%s\n", s) 182 } 183 return buf.String() 184 185 case "scan-range-key": 186 iter, err := r.NewRawRangeKeyIter() 187 if err != nil { 188 return err.Error() 189 } 190 if iter == nil { 191 return "" 192 } 193 defer iter.Close() 194 195 var buf bytes.Buffer 196 for s := iter.First(); s != nil; s = iter.Next() { 197 fmt.Fprintf(&buf, "%s\n", s) 198 } 199 return buf.String() 200 201 case "layout": 202 l, err := r.Layout() 203 if err != nil { 204 return err.Error() 205 } 206 verbose := false 207 if len(td.CmdArgs) > 0 { 208 if td.CmdArgs[0].Key == "verbose" { 209 verbose = true 210 } else { 211 return "unknown arg" 212 } 213 } 214 var buf bytes.Buffer 215 l.Describe(&buf, verbose, r, nil) 216 return buf.String() 217 218 case "rewrite": 219 var meta *WriterMetadata 220 var err error 221 meta, r, err = runRewriteCmd(td, r, WriterOptions{ 222 TableFormat: tableFormat, 223 }) 224 if err != nil { 225 return err.Error() 226 } 227 if err != nil { 228 return err.Error() 229 } 230 return format(td, meta) 231 232 default: 233 return fmt.Sprintf("unknown command: %s", td.Cmd) 234 } 235 }) 236 } 237 238 func TestWriterWithValueBlocks(t *testing.T) { 239 var r *Reader 240 defer func() { 241 if r != nil { 242 require.NoError(t, r.Close()) 243 } 244 }() 245 formatVersion := TableFormatMax 246 formatMeta := func(m *WriterMetadata) string { 247 return fmt.Sprintf("value-blocks: num-values %d, num-blocks: %d, size: %d", 248 m.Properties.NumValuesInValueBlocks, m.Properties.NumValueBlocks, 249 m.Properties.ValueBlocksSize) 250 } 251 252 parallelism := false 253 if rand.Intn(2) == 0 { 254 parallelism = true 255 } 256 t.Logf("writer parallelism %t", parallelism) 257 attributeExtractor := func( 258 key []byte, keyPrefixLen int, value []byte) (base.ShortAttribute, error) { 259 require.NotNil(t, key) 260 require.Less(t, 0, keyPrefixLen) 261 attribute := base.ShortAttribute(len(value) & '\x07') 262 return attribute, nil 263 } 264 265 datadriven.RunTest(t, "testdata/writer_value_blocks", func(t *testing.T, td *datadriven.TestData) string { 266 switch td.Cmd { 267 case "build": 268 if r != nil { 269 _ = r.Close() 270 r = nil 271 } 272 var meta *WriterMetadata 273 var err error 274 var blockSize int 275 if td.HasArg("block-size") { 276 td.ScanArgs(t, "block-size", &blockSize) 277 } 278 var inPlaceValueBound UserKeyPrefixBound 279 if td.HasArg("in-place-bound") { 280 var l, u string 281 td.ScanArgs(t, "in-place-bound", &l, &u) 282 inPlaceValueBound.Lower = []byte(l) 283 inPlaceValueBound.Upper = []byte(u) 284 } 285 meta, r, err = runBuildCmd(td, &WriterOptions{ 286 BlockSize: blockSize, 287 Comparer: testkeys.Comparer, 288 TableFormat: formatVersion, 289 Parallelism: parallelism, 290 RequiredInPlaceValueBound: inPlaceValueBound, 291 ShortAttributeExtractor: attributeExtractor, 292 }, 0) 293 if err != nil { 294 return err.Error() 295 } 296 return formatMeta(meta) 297 298 case "layout": 299 l, err := r.Layout() 300 if err != nil { 301 return err.Error() 302 } 303 var buf bytes.Buffer 304 l.Describe(&buf, true, r, func(key *base.InternalKey, value []byte) { 305 fmt.Fprintf(&buf, " %s:%s\n", key.String(), string(value)) 306 }) 307 return buf.String() 308 309 case "scan-raw": 310 // Raw scan does not fetch from value blocks. 311 origIter, err := r.NewIter(nil /* lower */, nil /* upper */) 312 if err != nil { 313 return err.Error() 314 } 315 forceIgnoreValueBlocks := func(i *singleLevelIterator) { 316 i.vbReader = nil 317 i.data.lazyValueHandling.vbr = nil 318 i.data.lazyValueHandling.hasValuePrefix = false 319 } 320 switch i := origIter.(type) { 321 case *twoLevelIterator: 322 forceIgnoreValueBlocks(&i.singleLevelIterator) 323 case *singleLevelIterator: 324 forceIgnoreValueBlocks(i) 325 } 326 iter := newIterAdapter(origIter) 327 defer iter.Close() 328 329 var buf bytes.Buffer 330 for valid := iter.First(); valid; valid = iter.Next() { 331 v := iter.Value() 332 if iter.Key().Kind() == InternalKeyKindSet { 333 prefix := valuePrefix(v[0]) 334 setWithSamePrefix := setHasSamePrefix(prefix) 335 if isValueHandle(prefix) { 336 attribute := getShortAttribute(prefix) 337 vh := decodeValueHandle(v[1:]) 338 fmt.Fprintf(&buf, "%s:value-handle len %d block %d offset %d, att %d, same-pre %t\n", 339 iter.Key(), vh.valueLen, vh.blockNum, vh.offsetInBlock, attribute, setWithSamePrefix) 340 } else { 341 fmt.Fprintf(&buf, "%s:in-place %s, same-pre %t\n", iter.Key(), v[1:], setWithSamePrefix) 342 } 343 } else { 344 fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), v) 345 } 346 } 347 return buf.String() 348 349 case "scan": 350 origIter, err := r.NewIter(nil /* lower */, nil /* upper */) 351 if err != nil { 352 return err.Error() 353 } 354 iter := newIterAdapter(origIter) 355 defer iter.Close() 356 var buf bytes.Buffer 357 for valid := iter.First(); valid; valid = iter.Next() { 358 fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), iter.Value()) 359 } 360 return buf.String() 361 362 case "scan-cloned-lazy-values": 363 iter, err := r.NewIter(nil /* lower */, nil /* upper */) 364 if err != nil { 365 return err.Error() 366 } 367 var fetchers [100]base.LazyFetcher 368 var values []base.LazyValue 369 n := 0 370 var b []byte 371 for k, lv := iter.First(); k != nil; k, lv = iter.Next() { 372 var lvClone base.LazyValue 373 lvClone, b = lv.Clone(b, &fetchers[n]) 374 if lv.Fetcher != nil { 375 _, callerOwned, err := lv.Value(nil) 376 require.False(t, callerOwned) 377 require.NoError(t, err) 378 } 379 n++ 380 values = append(values, lvClone) 381 } 382 require.NoError(t, iter.Error()) 383 iter.Close() 384 var buf bytes.Buffer 385 for i := range values { 386 fmt.Fprintf(&buf, "%d", i) 387 v, callerOwned, err := values[i].Value(nil) 388 require.NoError(t, err) 389 if values[i].Fetcher != nil { 390 require.True(t, callerOwned) 391 fmt.Fprintf(&buf, "(lazy: len %d, attr: %d): %s\n", 392 values[i].Len(), values[i].Fetcher.Attribute.ShortAttribute, string(v)) 393 v2, callerOwned, err := values[i].Value(nil) 394 require.NoError(t, err) 395 require.True(t, callerOwned) 396 require.Equal(t, &v[0], &v2[0]) 397 398 } else { 399 require.False(t, callerOwned) 400 fmt.Fprintf(&buf, "(in-place: len %d): %s\n", values[i].Len(), string(v)) 401 } 402 } 403 return buf.String() 404 405 default: 406 return fmt.Sprintf("unknown command: %s", td.Cmd) 407 } 408 }) 409 } 410 411 func testBlockBufClear(t *testing.T, b1, b2 *blockBuf) { 412 require.Equal(t, b1.tmp, b2.tmp) 413 } 414 415 func TestBlockBufClear(t *testing.T) { 416 b1 := &blockBuf{} 417 b1.tmp[0] = 1 418 b1.compressedBuf = make([]byte, 1) 419 b1.clear() 420 testBlockBufClear(t, b1, &blockBuf{}) 421 } 422 423 func TestClearDataBlockBuf(t *testing.T) { 424 d := newDataBlockBuf(1, ChecksumTypeCRC32c) 425 d.blockBuf.compressedBuf = make([]byte, 1) 426 d.dataBlock.add(ikey("apple"), nil) 427 d.dataBlock.add(ikey("banana"), nil) 428 429 d.clear() 430 testBlockCleared(t, &d.dataBlock, &blockWriter{}) 431 testBlockBufClear(t, &d.blockBuf, &blockBuf{}) 432 433 dataBlockBufPool.Put(d) 434 } 435 436 func TestClearIndexBlockBuf(t *testing.T) { 437 i := newIndexBlockBuf(false) 438 i.block.add(ikey("apple"), nil) 439 i.block.add(ikey("banana"), nil) 440 i.clear() 441 442 testBlockCleared(t, &i.block, &blockWriter{}) 443 require.Equal( 444 t, i.size.estimate, sizeEstimate{emptySize: emptyBlockSize}, 445 ) 446 indexBlockBufPool.Put(i) 447 } 448 449 func TestClearWriteTask(t *testing.T) { 450 w := writeTaskPool.Get().(*writeTask) 451 ch := make(chan bool, 1) 452 w.compressionDone = ch 453 w.buf = &dataBlockBuf{} 454 w.flushableIndexBlock = &indexBlockBuf{} 455 w.currIndexBlock = &indexBlockBuf{} 456 w.indexEntrySep = ikey("apple") 457 w.indexInflightSize = 1 458 w.finishedIndexProps = []byte{'a', 'v'} 459 460 w.clear() 461 462 var nilDataBlockBuf *dataBlockBuf 463 var nilIndexBlockBuf *indexBlockBuf 464 // Channels should be the same(no new channel should be allocated) 465 require.Equal(t, w.compressionDone, ch) 466 require.Equal(t, w.buf, nilDataBlockBuf) 467 require.Equal(t, w.flushableIndexBlock, nilIndexBlockBuf) 468 require.Equal(t, w.currIndexBlock, nilIndexBlockBuf) 469 require.Equal(t, w.indexEntrySep, base.InvalidInternalKey) 470 require.Equal(t, w.indexInflightSize, 0) 471 require.Equal(t, w.finishedIndexProps, []byte(nil)) 472 473 writeTaskPool.Put(w) 474 } 475 476 func TestDoubleClose(t *testing.T) { 477 // There is code in Cockroach land which relies on Writer.Close being 478 // idempotent. We should test this in Pebble, so that we don't cause 479 // Cockroach test failures. 480 f := &discardFile{} 481 w := NewWriter(f, WriterOptions{ 482 BlockSize: 1, 483 TableFormat: TableFormatPebblev1, 484 }) 485 w.Set(ikey("a").UserKey, nil) 486 w.Set(ikey("b").UserKey, nil) 487 err := w.Close() 488 require.NoError(t, err) 489 err = w.Close() 490 require.Equal(t, err, errWriterClosed) 491 } 492 493 func TestParallelWriterErrorProp(t *testing.T) { 494 fs := vfs.NewMem() 495 f, err := fs.Create("test") 496 require.NoError(t, err) 497 opts := WriterOptions{ 498 TableFormat: TableFormatPebblev1, BlockSize: 1, Parallelism: true, 499 } 500 501 w := NewWriter(objstorageprovider.NewFileWritable(f), opts) 502 // Directly testing this, because it's difficult to get the Writer to 503 // encounter an error, precisely when the writeQueue is doing block writes. 504 w.coordination.writeQueue.err = errors.New("write queue write error") 505 w.Set(ikey("a").UserKey, nil) 506 w.Set(ikey("b").UserKey, nil) 507 err = w.Close() 508 require.Equal(t, err.Error(), "write queue write error") 509 } 510 511 func TestSizeEstimate(t *testing.T) { 512 var sizeEstimate sizeEstimate 513 datadriven.RunTest(t, "testdata/size_estimate", 514 func(t *testing.T, td *datadriven.TestData) string { 515 switch td.Cmd { 516 case "init": 517 if len(td.CmdArgs) != 1 { 518 return "init <empty size>" 519 } 520 emptySize, err := strconv.Atoi(td.CmdArgs[0].String()) 521 if err != nil { 522 return "invalid empty size" 523 } 524 sizeEstimate.init(uint64(emptySize)) 525 return "success" 526 case "clear": 527 sizeEstimate.clear() 528 return fmt.Sprintf("%d", sizeEstimate.size()) 529 case "size": 530 return fmt.Sprintf("%d", sizeEstimate.size()) 531 case "add_inflight": 532 if len(td.CmdArgs) != 1 { 533 return "add_inflight <inflight size estimate>" 534 } 535 inflightSize, err := strconv.Atoi(td.CmdArgs[0].String()) 536 if err != nil { 537 return "invalid inflight size" 538 } 539 sizeEstimate.addInflight(inflightSize) 540 return fmt.Sprintf("%d", sizeEstimate.size()) 541 case "entry_written": 542 if len(td.CmdArgs) != 2 { 543 return "entry_written <new_total_size> <prev_inflight_size>" 544 } 545 newTotalSize, err := strconv.Atoi(td.CmdArgs[0].String()) 546 if err != nil { 547 return "invalid inflight size" 548 } 549 inflightSize, err := strconv.Atoi(td.CmdArgs[1].String()) 550 if err != nil { 551 return "invalid inflight size" 552 } 553 sizeEstimate.writtenWithTotal(uint64(newTotalSize), inflightSize) 554 return fmt.Sprintf("%d", sizeEstimate.size()) 555 case "num_written_entries": 556 return fmt.Sprintf("%d", sizeEstimate.numWrittenEntries) 557 case "num_inflight_entries": 558 return fmt.Sprintf("%d", sizeEstimate.numInflightEntries) 559 case "num_entries": 560 return fmt.Sprintf("%d", sizeEstimate.numWrittenEntries+sizeEstimate.numInflightEntries) 561 default: 562 return fmt.Sprintf("unknown command: %s", td.Cmd) 563 } 564 }) 565 } 566 567 func TestWriterClearCache(t *testing.T) { 568 // Verify that Writer clears the cache of blocks that it writes. 569 mem := vfs.NewMem() 570 opts := ReaderOptions{ 571 Cache: cache.New(64 << 20), 572 Comparer: testkeys.Comparer, 573 } 574 defer opts.Cache.Unref() 575 576 writerOpts := WriterOptions{ 577 Cache: opts.Cache, 578 Comparer: testkeys.Comparer, 579 TableFormat: TableFormatPebblev3, 580 } 581 cacheOpts := &cacheOpts{cacheID: 1, fileNum: base.FileNum(1).DiskFileNum()} 582 invalidData := func() *cache.Value { 583 invalid := []byte("invalid data") 584 v := cache.Alloc(len(invalid)) 585 copy(v.Buf(), invalid) 586 return v 587 } 588 589 build := func(name string) { 590 f, err := mem.Create(name) 591 require.NoError(t, err) 592 593 w := NewWriter(objstorageprovider.NewFileWritable(f), writerOpts, cacheOpts) 594 require.NoError(t, w.Set([]byte("hello"), []byte("world"))) 595 require.NoError(t, w.Set([]byte("hello@42"), []byte("world@42"))) 596 require.NoError(t, w.Set([]byte("hello@5"), []byte("world@5"))) 597 require.NoError(t, w.Close()) 598 } 599 600 // Build the sstable a first time so that we can determine the locations of 601 // all of the blocks. 602 build("test") 603 604 f, err := mem.Open("test") 605 require.NoError(t, err) 606 607 r, err := newReader(f, opts) 608 require.NoError(t, err) 609 610 layout, err := r.Layout() 611 require.NoError(t, err) 612 613 foreachBH := func(layout *Layout, f func(bh BlockHandle)) { 614 for _, bh := range layout.Data { 615 f(bh.BlockHandle) 616 } 617 for _, bh := range layout.Index { 618 f(bh) 619 } 620 f(layout.TopIndex) 621 f(layout.Filter) 622 f(layout.RangeDel) 623 for _, bh := range layout.ValueBlock { 624 f(bh) 625 } 626 if layout.ValueIndex.Length != 0 { 627 f(layout.ValueIndex) 628 } 629 f(layout.Properties) 630 f(layout.MetaIndex) 631 } 632 633 // Poison the cache for each of the blocks. 634 poison := func(bh BlockHandle) { 635 opts.Cache.Set(cacheOpts.cacheID, cacheOpts.fileNum, bh.Offset, invalidData()).Release() 636 } 637 foreachBH(layout, poison) 638 639 // Build the table a second time. This should clear the cache for the blocks 640 // that are written. 641 build("test") 642 643 // Verify that the written blocks have been cleared from the cache. 644 check := func(bh BlockHandle) { 645 h := opts.Cache.Get(cacheOpts.cacheID, cacheOpts.fileNum, bh.Offset) 646 if h.Get() != nil { 647 t.Fatalf("%d: expected cache to be cleared, but found %q", bh.Offset, h.Get()) 648 } 649 } 650 foreachBH(layout, check) 651 652 require.NoError(t, r.Close()) 653 } 654 655 type discardFile struct { 656 wrote int64 657 } 658 659 var _ objstorage.Writable = (*discardFile)(nil) 660 661 func (f *discardFile) Finish() error { 662 return nil 663 } 664 665 func (f *discardFile) Abort() {} 666 667 func (f *discardFile) Write(p []byte) error { 668 f.wrote += int64(len(p)) 669 return nil 670 } 671 672 type blockPropErrSite uint 673 674 const ( 675 errSiteAdd blockPropErrSite = iota 676 errSiteFinishBlock 677 errSiteFinishIndex 678 errSiteFinishTable 679 errSiteNone 680 ) 681 682 type testBlockPropCollector struct { 683 errSite blockPropErrSite 684 err error 685 } 686 687 func (c *testBlockPropCollector) Name() string { return "testBlockPropCollector" } 688 689 func (c *testBlockPropCollector) Add(_ InternalKey, _ []byte) error { 690 if c.errSite == errSiteAdd { 691 return c.err 692 } 693 return nil 694 } 695 696 func (c *testBlockPropCollector) FinishDataBlock(_ []byte) ([]byte, error) { 697 if c.errSite == errSiteFinishBlock { 698 return nil, c.err 699 } 700 return nil, nil 701 } 702 703 func (c *testBlockPropCollector) AddPrevDataBlockToIndexBlock() {} 704 705 func (c *testBlockPropCollector) FinishIndexBlock(_ []byte) ([]byte, error) { 706 if c.errSite == errSiteFinishIndex { 707 return nil, c.err 708 } 709 return nil, nil 710 } 711 712 func (c *testBlockPropCollector) FinishTable(_ []byte) ([]byte, error) { 713 if c.errSite == errSiteFinishTable { 714 return nil, c.err 715 } 716 return nil, nil 717 } 718 719 func TestWriterBlockPropertiesErrors(t *testing.T) { 720 blockPropErr := errors.Newf("block property collector failed") 721 testCases := []blockPropErrSite{ 722 errSiteAdd, 723 errSiteFinishBlock, 724 errSiteFinishIndex, 725 errSiteFinishTable, 726 errSiteNone, 727 } 728 729 var ( 730 k1 = base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet) 731 v1 = []byte("apples") 732 k2 = base.MakeInternalKey([]byte("b"), 0, base.InternalKeyKindSet) 733 v2 = []byte("bananas") 734 k3 = base.MakeInternalKey([]byte("c"), 0, base.InternalKeyKindSet) 735 v3 = []byte("carrots") 736 ) 737 738 for _, tc := range testCases { 739 t.Run("", func(t *testing.T) { 740 fs := vfs.NewMem() 741 f, err := fs.Create("test") 742 require.NoError(t, err) 743 744 w := NewWriter(objstorageprovider.NewFileWritable(f), WriterOptions{ 745 BlockSize: 1, 746 BlockPropertyCollectors: []func() BlockPropertyCollector{ 747 func() BlockPropertyCollector { 748 return &testBlockPropCollector{ 749 errSite: tc, 750 err: blockPropErr, 751 } 752 }, 753 }, 754 TableFormat: TableFormatPebblev1, 755 }) 756 757 err = w.Add(k1, v1) 758 switch tc { 759 case errSiteAdd: 760 require.Error(t, err) 761 require.Equal(t, blockPropErr, err) 762 return 763 case errSiteFinishBlock: 764 require.NoError(t, err) 765 // Addition of a second key completes the first block. 766 err = w.Add(k2, v2) 767 require.Error(t, err) 768 require.Equal(t, blockPropErr, err) 769 return 770 case errSiteFinishIndex: 771 require.NoError(t, err) 772 // Addition of a second key completes the first block. 773 err = w.Add(k2, v2) 774 require.NoError(t, err) 775 // The index entry for the first block is added after the completion of 776 // the second block, which is triggered by adding a third key. 777 err = w.Add(k3, v3) 778 require.Error(t, err) 779 require.Equal(t, blockPropErr, err) 780 return 781 } 782 783 err = w.Close() 784 if tc == errSiteFinishTable { 785 require.Error(t, err) 786 require.Equal(t, blockPropErr, err) 787 } else { 788 require.NoError(t, err) 789 } 790 }) 791 } 792 } 793 794 func TestWriter_TableFormatCompatibility(t *testing.T) { 795 testCases := []struct { 796 name string 797 minFormat TableFormat 798 configureFn func(opts *WriterOptions) 799 writeFn func(w *Writer) error 800 }{ 801 { 802 name: "block properties", 803 minFormat: TableFormatPebblev1, 804 configureFn: func(opts *WriterOptions) { 805 opts.BlockPropertyCollectors = []func() BlockPropertyCollector{ 806 func() BlockPropertyCollector { 807 return NewBlockIntervalCollector( 808 "collector", &valueCharBlockIntervalCollector{charIdx: 0}, nil, 809 ) 810 }, 811 } 812 }, 813 }, 814 { 815 name: "range keys", 816 minFormat: TableFormatPebblev2, 817 writeFn: func(w *Writer) error { 818 return w.RangeKeyDelete([]byte("a"), []byte("b")) 819 }, 820 }, 821 } 822 823 for _, tc := range testCases { 824 t.Run(tc.name, func(t *testing.T) { 825 for tf := TableFormatLevelDB; tf <= TableFormatMax; tf++ { 826 t.Run(tf.String(), func(t *testing.T) { 827 fs := vfs.NewMem() 828 f, err := fs.Create("sst") 829 require.NoError(t, err) 830 831 opts := WriterOptions{TableFormat: tf} 832 if tc.configureFn != nil { 833 tc.configureFn(&opts) 834 } 835 836 w := NewWriter(objstorageprovider.NewFileWritable(f), opts) 837 if tc.writeFn != nil { 838 err = tc.writeFn(w) 839 require.NoError(t, err) 840 } 841 842 err = w.Close() 843 if tf < tc.minFormat { 844 require.Error(t, err) 845 } else { 846 require.NoError(t, err) 847 } 848 }) 849 } 850 }) 851 } 852 } 853 854 // Tests for races, such as https://github.com/cockroachdb/cockroach/issues/77194, 855 // in the Writer. 856 func TestWriterRace(t *testing.T) { 857 ks := testkeys.Alpha(5) 858 ks = ks.EveryN(ks.Count() / 1_000) 859 keys := make([][]byte, ks.Count()) 860 for ki := 0; ki < len(keys); ki++ { 861 keys[ki] = testkeys.Key(ks, int64(ki)) 862 } 863 readerOpts := ReaderOptions{ 864 Comparer: testkeys.Comparer, 865 Filters: map[string]base.FilterPolicy{}, 866 } 867 868 var wg sync.WaitGroup 869 for i := 0; i < 16; i++ { 870 wg.Add(1) 871 go func() { 872 val := make([]byte, rand.Intn(1000)) 873 opts := WriterOptions{ 874 Comparer: testkeys.Comparer, 875 BlockSize: rand.Intn(1 << 10), 876 Compression: NoCompression, 877 } 878 defer wg.Done() 879 f := &memFile{} 880 w := NewWriter(f, opts) 881 for ki := 0; ki < len(keys); ki++ { 882 require.NoError( 883 t, 884 w.Add(base.MakeInternalKey(keys[ki], uint64(ki), InternalKeyKindSet), val), 885 ) 886 require.Equal( 887 t, w.dataBlockBuf.dataBlock.getCurKey().UserKey, keys[ki], 888 ) 889 } 890 require.NoError(t, w.Close()) 891 require.Equal(t, w.meta.LargestPoint.UserKey, keys[len(keys)-1]) 892 r, err := NewMemReader(f.Data(), readerOpts) 893 require.NoError(t, err) 894 defer r.Close() 895 it, err := r.NewIter(nil, nil) 896 require.NoError(t, err) 897 defer it.Close() 898 ki := 0 899 for k, v := it.First(); k != nil; k, v = it.Next() { 900 require.Equal(t, k.UserKey, keys[ki]) 901 vBytes, _, err := v.Value(nil) 902 require.NoError(t, err) 903 require.Equal(t, vBytes, val) 904 ki++ 905 } 906 }() 907 } 908 wg.Wait() 909 } 910 911 func TestObsoleteBlockPropertyCollectorFilter(t *testing.T) { 912 var c obsoleteKeyBlockPropertyCollector 913 var f obsoleteKeyBlockPropertyFilter 914 require.Equal(t, c.Name(), f.Name()) 915 // Data block with 1 obsolete and 1 non-obsolete point. 916 c.AddPoint(false) 917 c.AddPoint(true) 918 finishAndCheck := func(finishFunc func([]byte) ([]byte, error), expectedIntersects bool) { 919 var buf [1]byte 920 prop, err := finishFunc(buf[:0:1]) 921 require.NoError(t, err) 922 expectedLength := 1 923 if expectedIntersects { 924 // The common case is encoded in 0 bytes 925 expectedLength = 0 926 } 927 require.Equal(t, expectedLength, len(prop)) 928 // Confirm that the collector used the slice. 929 require.Equal(t, unsafe.Pointer(&buf[0]), unsafe.Pointer(&prop[:1][0])) 930 intersects, err := f.Intersects(prop) 931 require.NoError(t, err) 932 require.Equal(t, expectedIntersects, intersects) 933 } 934 finishAndCheck(c.FinishDataBlock, true) 935 c.AddPrevDataBlockToIndexBlock() 936 // Data block with only obsolete points. 937 c.AddPoint(true) 938 c.AddPoint(true) 939 finishAndCheck(c.FinishDataBlock, false) 940 c.AddPrevDataBlockToIndexBlock() 941 // Index block has one obsolete block and one non-obsolete block. 942 finishAndCheck(c.FinishIndexBlock, true) 943 944 // Data block with obsolete point. 945 c.AddPoint(true) 946 finishAndCheck(c.FinishDataBlock, false) 947 c.AddPrevDataBlockToIndexBlock() 948 // Data block with obsolete point. 949 c.AddPoint(true) 950 finishAndCheck(c.FinishDataBlock, false) 951 c.AddPrevDataBlockToIndexBlock() 952 // Index block has only obsolete blocks. 953 finishAndCheck(c.FinishIndexBlock, false) 954 // Table is not obsolete. 955 finishAndCheck(c.FinishTable, true) 956 957 // Reset the collector state. 958 c = obsoleteKeyBlockPropertyCollector{} 959 // Table with only obsolete blocks. 960 961 // Data block with obsolete point. 962 c.AddPoint(true) 963 finishAndCheck(c.FinishDataBlock, false) 964 c.AddPrevDataBlockToIndexBlock() 965 // Data block with obsolete point. 966 c.AddPoint(true) 967 finishAndCheck(c.FinishDataBlock, false) 968 c.AddPrevDataBlockToIndexBlock() 969 // Index block has only obsolete blocks. 970 finishAndCheck(c.FinishIndexBlock, false) 971 // Table is obsolete. 972 finishAndCheck(c.FinishTable, false) 973 } 974 975 func BenchmarkWriter(b *testing.B) { 976 keys := make([][]byte, 1e6) 977 const keyLen = 24 978 keySlab := make([]byte, keyLen*len(keys)) 979 for i := range keys { 980 key := keySlab[i*keyLen : i*keyLen+keyLen] 981 binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix 982 binary.BigEndian.PutUint64(key[8:16], 456) 983 binary.BigEndian.PutUint64(key[16:], uint64(i)) 984 keys[i] = key 985 } 986 for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { 987 b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) { 988 runWriterBench(b, keys, nil, format) 989 }) 990 } 991 } 992 993 func BenchmarkWriterWithVersions(b *testing.B) { 994 keys := make([][]byte, 1e6) 995 const keyLen = 26 996 keySlab := make([]byte, keyLen*len(keys)) 997 for i := range keys { 998 key := keySlab[i*keyLen : i*keyLen+keyLen] 999 binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix 1000 binary.BigEndian.PutUint64(key[8:16], 456) 1001 // @ is ascii value 64. Placing any byte with value 64 in these 8 bytes 1002 // will confuse testkeys.Comparer, when we pass it a key after splitting 1003 // of the suffix, since Comparer thinks this prefix is also a key with a 1004 // suffix. Hence, we print as a base 10 string. 1005 require.Equal(b, 8, copy(key[16:], fmt.Sprintf("%8d", i/2))) 1006 key[24] = '@' 1007 // Ascii representation of single digit integer 2-(i%2). 1008 key[25] = byte(48 + 2 - (i % 2)) 1009 keys[i] = key 1010 } 1011 // TableFormatPebblev3 can sometimes be ~50% slower than 1012 // TableFormatPebblev2, since testkeys.Compare is expensive (mainly due to 1013 // split) and with v3 we have to call it twice for 50% of the Set calls, 1014 // since they have the same prefix as the preceding key. 1015 for _, format := range []TableFormat{TableFormatPebblev2, TableFormatPebblev3} { 1016 b.Run(fmt.Sprintf("format=%s", format.String()), func(b *testing.B) { 1017 runWriterBench(b, keys, testkeys.Comparer, format) 1018 }) 1019 } 1020 } 1021 1022 func runWriterBench(b *testing.B, keys [][]byte, comparer *base.Comparer, format TableFormat) { 1023 for _, bs := range []int{base.DefaultBlockSize, 32 << 10} { 1024 b.Run(fmt.Sprintf("block=%s", humanize.Bytes.Int64(int64(bs))), func(b *testing.B) { 1025 for _, filter := range []bool{true, false} { 1026 b.Run(fmt.Sprintf("filter=%t", filter), func(b *testing.B) { 1027 for _, comp := range []Compression{NoCompression, SnappyCompression, ZstdCompression} { 1028 b.Run(fmt.Sprintf("compression=%s", comp), func(b *testing.B) { 1029 opts := WriterOptions{ 1030 BlockRestartInterval: 16, 1031 BlockSize: bs, 1032 Comparer: comparer, 1033 Compression: comp, 1034 TableFormat: format, 1035 } 1036 if filter { 1037 opts.FilterPolicy = bloom.FilterPolicy(10) 1038 } 1039 f := &discardFile{} 1040 b.ResetTimer() 1041 for i := 0; i < b.N; i++ { 1042 f.wrote = 0 1043 w := NewWriter(f, opts) 1044 1045 for j := range keys { 1046 if err := w.Set(keys[j], keys[j]); err != nil { 1047 b.Fatal(err) 1048 } 1049 } 1050 if err := w.Close(); err != nil { 1051 b.Fatal(err) 1052 } 1053 b.SetBytes(int64(f.wrote)) 1054 } 1055 }) 1056 } 1057 }) 1058 } 1059 }) 1060 } 1061 } 1062 1063 var test4bSuffixComparer = &base.Comparer{ 1064 Compare: base.DefaultComparer.Compare, 1065 Equal: base.DefaultComparer.Equal, 1066 Separator: base.DefaultComparer.Separator, 1067 Successor: base.DefaultComparer.Successor, 1068 Split: func(key []byte) int { 1069 if len(key) > 4 { 1070 return len(key) - 4 1071 } 1072 return len(key) 1073 }, 1074 Name: "comparer-split-4b-suffix", 1075 }