github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/scan_internal_test.go (about) 1 // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "context" 9 "fmt" 10 "math" 11 "strconv" 12 "strings" 13 "testing" 14 "time" 15 16 "github.com/cockroachdb/datadriven" 17 "github.com/cockroachdb/errors" 18 "github.com/cockroachdb/pebble/bloom" 19 "github.com/cockroachdb/pebble/internal/base" 20 "github.com/cockroachdb/pebble/internal/itertest" 21 "github.com/cockroachdb/pebble/internal/keyspan" 22 "github.com/cockroachdb/pebble/internal/rangekey" 23 "github.com/cockroachdb/pebble/internal/testkeys" 24 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 25 "github.com/cockroachdb/pebble/objstorage/remote" 26 "github.com/cockroachdb/pebble/sstable" 27 "github.com/cockroachdb/pebble/vfs" 28 "github.com/stretchr/testify/require" 29 ) 30 31 func TestScanStatistics(t *testing.T) { 32 var d *DB 33 type scanInternalReader interface { 34 ScanStatistics( 35 ctx context.Context, 36 lower, upper []byte, 37 opts ScanStatisticsOptions, 38 ) (LSMKeyStatistics, error) 39 } 40 batches := map[string]*Batch{} 41 snaps := map[string]*Snapshot{} 42 ctx := context.TODO() 43 44 getOpts := func() *Options { 45 opts := &Options{ 46 FS: vfs.NewMem(), 47 Logger: testLogger{t: t}, 48 Comparer: testkeys.Comparer, 49 FormatMajorVersion: FormatRangeKeys, 50 BlockPropertyCollectors: []func() BlockPropertyCollector{ 51 sstable.NewTestKeysBlockPropertyCollector, 52 }, 53 } 54 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 55 "": remote.NewInMem(), 56 }) 57 opts.Experimental.CreateOnShared = remote.CreateOnSharedAll 58 opts.Experimental.CreateOnSharedLocator = "" 59 opts.DisableAutomaticCompactions = true 60 opts.EnsureDefaults() 61 opts.WithFSDefaults() 62 return opts 63 } 64 cleanup := func() (err error) { 65 for key, batch := range batches { 66 err = firstError(err, batch.Close()) 67 delete(batches, key) 68 } 69 for key, snap := range snaps { 70 err = firstError(err, snap.Close()) 71 delete(snaps, key) 72 } 73 if d != nil { 74 err = firstError(err, d.Close()) 75 d = nil 76 } 77 return err 78 } 79 defer cleanup() 80 81 datadriven.RunTest(t, "testdata/scan_statistics", func(t *testing.T, td *datadriven.TestData) string { 82 switch td.Cmd { 83 case "reset": 84 if err := cleanup(); err != nil { 85 t.Fatal(err) 86 return err.Error() 87 } 88 var err error 89 d, err = Open("", getOpts()) 90 require.NoError(t, err) 91 require.NoError(t, d.SetCreatorID(1)) 92 return "" 93 case "snapshot": 94 s := d.NewSnapshot() 95 var name string 96 td.ScanArgs(t, "name", &name) 97 snaps[name] = s 98 return "" 99 case "batch": 100 var name string 101 td.MaybeScanArgs(t, "name", &name) 102 commit := td.HasArg("commit") 103 b := d.NewIndexedBatch() 104 require.NoError(t, runBatchDefineCmd(td, b)) 105 var err error 106 if commit { 107 func() { 108 defer func() { 109 if r := recover(); r != nil { 110 err = errors.New(r.(string)) 111 } 112 }() 113 err = b.Commit(nil) 114 }() 115 } else if name != "" { 116 batches[name] = b 117 } 118 if err != nil { 119 return err.Error() 120 } 121 count := b.Count() 122 if commit { 123 return fmt.Sprintf("committed %d keys\n", count) 124 } 125 return fmt.Sprintf("wrote %d keys to batch %q\n", count, name) 126 case "compact": 127 if err := runCompactCmd(td, d); err != nil { 128 return err.Error() 129 } 130 return runLSMCmd(td, d) 131 case "flush": 132 err := d.Flush() 133 if err != nil { 134 return err.Error() 135 } 136 return "" 137 case "commit": 138 name := pluckStringCmdArg(td, "batch") 139 b := batches[name] 140 defer b.Close() 141 count := b.Count() 142 require.NoError(t, d.Apply(b, nil)) 143 delete(batches, name) 144 return fmt.Sprintf("committed %d keys\n", count) 145 case "scan-statistics": 146 var lower, upper []byte 147 var reader scanInternalReader = d 148 var b strings.Builder 149 var showSnapshotPinned = false 150 var keyKindsToDisplay []InternalKeyKind 151 var showLevels []string 152 153 for _, arg := range td.CmdArgs { 154 switch arg.Key { 155 case "lower": 156 lower = []byte(arg.Vals[0]) 157 case "upper": 158 upper = []byte(arg.Vals[0]) 159 case "show-snapshot-pinned": 160 showSnapshotPinned = true 161 case "keys": 162 for _, key := range arg.Vals { 163 keyKindsToDisplay = append(keyKindsToDisplay, base.ParseKind(key)) 164 } 165 case "levels": 166 showLevels = append(showLevels, arg.Vals...) 167 default: 168 } 169 } 170 stats, err := reader.ScanStatistics(ctx, lower, upper, ScanStatisticsOptions{}) 171 if err != nil { 172 return err.Error() 173 } 174 175 for _, level := range showLevels { 176 lvl, err := strconv.Atoi(level) 177 if err != nil || lvl >= numLevels { 178 return fmt.Sprintf("invalid level %s", level) 179 } 180 181 fmt.Fprintf(&b, "Level %d:\n", lvl) 182 if showSnapshotPinned { 183 fmt.Fprintf(&b, " compaction pinned count: %d\n", stats.Levels[lvl].SnapshotPinnedKeys) 184 } 185 for _, kind := range keyKindsToDisplay { 186 fmt.Fprintf(&b, " %s key count: %d\n", kind.String(), stats.Levels[lvl].KindsCount[kind]) 187 if stats.Levels[lvl].LatestKindsCount[kind] > 0 { 188 fmt.Fprintf(&b, " %s latest count: %d\n", kind.String(), stats.Levels[lvl].LatestKindsCount[kind]) 189 } 190 } 191 } 192 193 fmt.Fprintf(&b, "Aggregate:\n") 194 if showSnapshotPinned { 195 fmt.Fprintf(&b, " snapshot pinned count: %d\n", stats.Accumulated.SnapshotPinnedKeys) 196 } 197 for _, kind := range keyKindsToDisplay { 198 fmt.Fprintf(&b, " %s key count: %d\n", kind.String(), stats.Accumulated.KindsCount[kind]) 199 if stats.Accumulated.LatestKindsCount[kind] > 0 { 200 fmt.Fprintf(&b, " %s latest count: %d\n", kind.String(), stats.Accumulated.LatestKindsCount[kind]) 201 } 202 } 203 return b.String() 204 default: 205 return fmt.Sprintf("unknown command %q", td.Cmd) 206 } 207 }) 208 } 209 210 func TestScanInternal(t *testing.T) { 211 var d *DB 212 type scanInternalReader interface { 213 ScanInternal( 214 ctx context.Context, 215 categoryAndQoS sstable.CategoryAndQoS, 216 lower, upper []byte, 217 visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error, 218 visitRangeDel func(start, end []byte, seqNum uint64) error, 219 visitRangeKey func(start, end []byte, keys []keyspan.Key) error, 220 visitSharedFile func(sst *SharedSSTMeta) error, 221 ) error 222 } 223 batches := map[string]*Batch{} 224 snaps := map[string]*Snapshot{} 225 efos := map[string]*EventuallyFileOnlySnapshot{} 226 parseOpts := func(td *datadriven.TestData) (*Options, error) { 227 opts := &Options{ 228 FS: vfs.NewMem(), 229 Logger: testLogger{t: t}, 230 Comparer: testkeys.Comparer, 231 FormatMajorVersion: FormatVirtualSSTables, 232 BlockPropertyCollectors: []func() BlockPropertyCollector{ 233 sstable.NewTestKeysBlockPropertyCollector, 234 }, 235 } 236 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 237 "": remote.NewInMem(), 238 }) 239 opts.Experimental.CreateOnShared = remote.CreateOnSharedAll 240 opts.Experimental.CreateOnSharedLocator = "" 241 opts.DisableAutomaticCompactions = true 242 opts.EnsureDefaults() 243 opts.WithFSDefaults() 244 245 for _, cmdArg := range td.CmdArgs { 246 switch cmdArg.Key { 247 case "format-major-version": 248 v, err := strconv.Atoi(cmdArg.Vals[0]) 249 if err != nil { 250 return nil, err 251 } 252 // Override the DB version. 253 opts.FormatMajorVersion = FormatMajorVersion(v) 254 case "block-size": 255 v, err := strconv.Atoi(cmdArg.Vals[0]) 256 if err != nil { 257 return nil, err 258 } 259 for i := range opts.Levels { 260 opts.Levels[i].BlockSize = v 261 } 262 case "index-block-size": 263 v, err := strconv.Atoi(cmdArg.Vals[0]) 264 if err != nil { 265 return nil, err 266 } 267 for i := range opts.Levels { 268 opts.Levels[i].IndexBlockSize = v 269 } 270 case "target-file-size": 271 v, err := strconv.Atoi(cmdArg.Vals[0]) 272 if err != nil { 273 return nil, err 274 } 275 for i := range opts.Levels { 276 opts.Levels[i].TargetFileSize = int64(v) 277 } 278 case "bloom-bits-per-key": 279 v, err := strconv.Atoi(cmdArg.Vals[0]) 280 if err != nil { 281 return nil, err 282 } 283 fp := bloom.FilterPolicy(v) 284 opts.Filters = map[string]FilterPolicy{fp.Name(): fp} 285 for i := range opts.Levels { 286 opts.Levels[i].FilterPolicy = fp 287 } 288 case "merger": 289 switch cmdArg.Vals[0] { 290 case "appender": 291 opts.Merger = base.DefaultMerger 292 default: 293 return nil, errors.Newf("unrecognized Merger %q\n", cmdArg.Vals[0]) 294 } 295 } 296 } 297 return opts, nil 298 } 299 cleanup := func() (err error) { 300 for key, batch := range batches { 301 err = firstError(err, batch.Close()) 302 delete(batches, key) 303 } 304 for key, snap := range snaps { 305 err = firstError(err, snap.Close()) 306 delete(snaps, key) 307 } 308 for key, es := range efos { 309 err = firstError(err, es.Close()) 310 delete(efos, key) 311 } 312 if d != nil { 313 err = firstError(err, d.Close()) 314 d = nil 315 } 316 return err 317 } 318 defer cleanup() 319 320 datadriven.RunTest(t, "testdata/scan_internal", func(t *testing.T, td *datadriven.TestData) string { 321 switch td.Cmd { 322 case "define": 323 if err := cleanup(); err != nil { 324 return err.Error() 325 } 326 opts, err := parseOpts(td) 327 if err != nil { 328 return err.Error() 329 } 330 d, err = runDBDefineCmd(td, opts) 331 if err != nil { 332 return err.Error() 333 } 334 return runLSMCmd(td, d) 335 336 case "reset": 337 if err := cleanup(); err != nil { 338 t.Fatal(err) 339 return err.Error() 340 } 341 opts, err := parseOpts(td) 342 if err != nil { 343 t.Fatal(err) 344 return err.Error() 345 } 346 347 d, err = Open("", opts) 348 require.NoError(t, err) 349 require.NoError(t, d.SetCreatorID(1)) 350 return "" 351 case "snapshot": 352 s := d.NewSnapshot() 353 var name string 354 td.ScanArgs(t, "name", &name) 355 snaps[name] = s 356 return "" 357 case "wait-for-file-only-snapshot": 358 if len(td.CmdArgs) != 1 { 359 panic("insufficient args for file-only-snapshot command") 360 } 361 name := td.CmdArgs[0].Key 362 es := efos[name] 363 if err := es.WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond); err != nil { 364 return err.Error() 365 } 366 return "ok" 367 case "file-only-snapshot": 368 if len(td.CmdArgs) != 1 { 369 panic("insufficient args for file-only-snapshot command") 370 } 371 name := td.CmdArgs[0].Key 372 var keyRanges []KeyRange 373 for _, line := range strings.Split(td.Input, "\n") { 374 fields := strings.Fields(line) 375 if len(fields) != 2 { 376 return "expected two fields for file-only snapshot KeyRanges" 377 } 378 kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} 379 keyRanges = append(keyRanges, kr) 380 } 381 382 s := d.NewEventuallyFileOnlySnapshot(keyRanges) 383 efos[name] = s 384 return "ok" 385 case "batch": 386 var name string 387 td.MaybeScanArgs(t, "name", &name) 388 commit := td.HasArg("commit") 389 ingest := td.HasArg("ingest") 390 b := d.NewIndexedBatch() 391 require.NoError(t, runBatchDefineCmd(td, b)) 392 var err error 393 if commit { 394 func() { 395 defer func() { 396 if r := recover(); r != nil { 397 err = errors.New(r.(string)) 398 } 399 }() 400 err = b.Commit(nil) 401 }() 402 } else if ingest { 403 points, rangeDels, rangeKeys := batchSort(b) 404 file, err := d.opts.FS.Create("temp0.sst") 405 require.NoError(t, err) 406 w := sstable.NewWriter(objstorageprovider.NewFileWritable(file), d.opts.MakeWriterOptions(0, sstable.TableFormatPebblev4)) 407 for span := rangeDels.First(); span != nil; span = rangeDels.Next() { 408 require.NoError(t, w.DeleteRange(span.Start, span.End)) 409 } 410 rangeDels.Close() 411 for span := rangeKeys.First(); span != nil; span = rangeKeys.Next() { 412 keys := []keyspan.Key{} 413 for i := range span.Keys { 414 keys = append(keys, span.Keys[i]) 415 keys[i].Trailer = base.MakeTrailer(0, keys[i].Kind()) 416 } 417 keyspan.SortKeysByTrailer(&keys) 418 newSpan := &keyspan.Span{Start: span.Start, End: span.End, Keys: keys} 419 rangekey.Encode(newSpan, w.AddRangeKey) 420 } 421 rangeKeys.Close() 422 for key, val := points.First(); key != nil; key, val = points.Next() { 423 var value []byte 424 value, _, err = val.Value(value) 425 require.NoError(t, err) 426 require.NoError(t, w.Add(*key, value)) 427 } 428 points.Close() 429 require.NoError(t, w.Close()) 430 require.NoError(t, d.Ingest([]string{"temp0.sst"})) 431 } else if name != "" { 432 batches[name] = b 433 } 434 if err != nil { 435 return err.Error() 436 } 437 count := b.Count() 438 if commit { 439 return fmt.Sprintf("committed %d keys\n", count) 440 } 441 return fmt.Sprintf("wrote %d keys to batch %q\n", count, name) 442 case "compact": 443 if err := runCompactCmd(td, d); err != nil { 444 return err.Error() 445 } 446 return runLSMCmd(td, d) 447 case "flush": 448 err := d.Flush() 449 if err != nil { 450 return err.Error() 451 } 452 return "" 453 case "lsm": 454 return runLSMCmd(td, d) 455 case "commit": 456 name := pluckStringCmdArg(td, "batch") 457 b := batches[name] 458 defer b.Close() 459 count := b.Count() 460 require.NoError(t, d.Apply(b, nil)) 461 delete(batches, name) 462 return fmt.Sprintf("committed %d keys\n", count) 463 case "scan-internal": 464 var lower, upper []byte 465 var reader scanInternalReader = d 466 var b strings.Builder 467 var fileVisitor func(sst *SharedSSTMeta) error 468 for _, arg := range td.CmdArgs { 469 switch arg.Key { 470 case "lower": 471 lower = []byte(arg.Vals[0]) 472 case "upper": 473 upper = []byte(arg.Vals[0]) 474 case "snapshot": 475 name := arg.Vals[0] 476 snap, ok := snaps[name] 477 if !ok { 478 return fmt.Sprintf("no snapshot found for name %s", name) 479 } 480 reader = snap 481 case "file-only-snapshot": 482 name := arg.Vals[0] 483 efos, ok := efos[name] 484 if !ok { 485 return fmt.Sprintf("no snapshot found for name %s", name) 486 } 487 reader = efos 488 case "skip-shared": 489 fileVisitor = func(sst *SharedSSTMeta) error { 490 fmt.Fprintf(&b, "shared file: %s [%s-%s] [point=%s-%s] [range=%s-%s]\n", sst.fileNum, sst.Smallest.String(), sst.Largest.String(), sst.SmallestPointKey.String(), sst.LargestPointKey.String(), sst.SmallestRangeKey.String(), sst.LargestRangeKey.String()) 491 return nil 492 } 493 } 494 } 495 err := reader.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, lower, upper, 496 func(key *InternalKey, value LazyValue, _ IteratorLevel) error { 497 v := value.InPlaceValue() 498 fmt.Fprintf(&b, "%s (%s)\n", key, v) 499 return nil 500 }, 501 func(start, end []byte, seqNum uint64) error { 502 fmt.Fprintf(&b, "%s-%s#%d,RANGEDEL\n", start, end, seqNum) 503 return nil 504 }, 505 func(start, end []byte, keys []keyspan.Key) error { 506 s := keyspan.Span{Start: start, End: end, Keys: keys} 507 fmt.Fprintf(&b, "%s\n", s.String()) 508 return nil 509 }, 510 fileVisitor, 511 ) 512 if err != nil { 513 return err.Error() 514 } 515 return b.String() 516 default: 517 return fmt.Sprintf("unknown command %q", td.Cmd) 518 } 519 }) 520 } 521 522 func TestPointCollapsingIter(t *testing.T) { 523 var def string 524 datadriven.RunTest(t, "testdata/point_collapsing_iter", func(t *testing.T, d *datadriven.TestData) string { 525 switch d.Cmd { 526 case "define": 527 def = d.Input 528 return "" 529 530 case "iter": 531 f := &fakeIter{} 532 var spans []keyspan.Span 533 for _, line := range strings.Split(def, "\n") { 534 for _, key := range strings.Fields(line) { 535 j := strings.Index(key, ":") 536 k := base.ParseInternalKey(key[:j]) 537 v := []byte(key[j+1:]) 538 if k.Kind() == InternalKeyKindRangeDelete { 539 spans = append(spans, keyspan.Span{ 540 Start: k.UserKey, 541 End: v, 542 Keys: []keyspan.Key{{Trailer: k.Trailer}}, 543 KeysOrder: 0, 544 }) 545 continue 546 } 547 f.keys = append(f.keys, k) 548 f.vals = append(f.vals, v) 549 } 550 } 551 552 ksIter := keyspan.NewIter(base.DefaultComparer.Compare, spans) 553 pcIter := &pointCollapsingIterator{ 554 comparer: base.DefaultComparer, 555 merge: base.DefaultMerger.Merge, 556 seqNum: math.MaxUint64, 557 } 558 pcIter.iter.Init(base.DefaultComparer, f, ksIter, keyspan.InterleavingIterOpts{}) 559 defer pcIter.Close() 560 return itertest.RunInternalIterCmd(t, d, pcIter, itertest.Verbose) 561 562 default: 563 return fmt.Sprintf("unknown command: %s", d.Cmd) 564 } 565 }) 566 }