github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/scan_internal_test.go (about) 1 // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package pebble 6 7 import ( 8 "context" 9 "fmt" 10 "math" 11 "strconv" 12 "strings" 13 "testing" 14 "time" 15 16 "github.com/cockroachdb/datadriven" 17 "github.com/cockroachdb/errors" 18 "github.com/cockroachdb/pebble/bloom" 19 "github.com/cockroachdb/pebble/internal/base" 20 "github.com/cockroachdb/pebble/internal/keyspan" 21 "github.com/cockroachdb/pebble/internal/rangekey" 22 "github.com/cockroachdb/pebble/internal/testkeys" 23 "github.com/cockroachdb/pebble/objstorage/objstorageprovider" 24 "github.com/cockroachdb/pebble/objstorage/remote" 25 "github.com/cockroachdb/pebble/sstable" 26 "github.com/cockroachdb/pebble/vfs" 27 "github.com/stretchr/testify/require" 28 ) 29 30 func TestScanStatistics(t *testing.T) { 31 var d *DB 32 type scanInternalReader interface { 33 ScanStatistics( 34 ctx context.Context, 35 lower, upper []byte, 36 opts ScanStatisticsOptions, 37 ) (LSMKeyStatistics, error) 38 } 39 batches := map[string]*Batch{} 40 snaps := map[string]*Snapshot{} 41 ctx := context.TODO() 42 43 getOpts := func() *Options { 44 opts := &Options{ 45 FS: vfs.NewMem(), 46 Logger: testLogger{t: t}, 47 Comparer: testkeys.Comparer, 48 FormatMajorVersion: FormatRangeKeys, 49 BlockPropertyCollectors: []func() BlockPropertyCollector{ 50 sstable.NewTestKeysBlockPropertyCollector, 51 }, 52 } 53 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 54 "": remote.NewInMem(), 55 }) 56 opts.Experimental.CreateOnShared = remote.CreateOnSharedAll 57 opts.Experimental.CreateOnSharedLocator = "" 58 opts.DisableAutomaticCompactions = true 59 opts.EnsureDefaults() 60 opts.WithFSDefaults() 61 return opts 62 } 63 cleanup := func() (err error) { 64 for key, batch := range batches { 65 err = firstError(err, batch.Close()) 66 delete(batches, key) 67 } 68 for key, snap := range snaps { 69 err = firstError(err, snap.Close()) 70 delete(snaps, key) 71 } 72 if d != nil { 73 err = firstError(err, d.Close()) 74 d = nil 75 } 76 return err 77 } 78 defer cleanup() 79 80 datadriven.RunTest(t, "testdata/scan_statistics", func(t *testing.T, td *datadriven.TestData) string { 81 switch td.Cmd { 82 case "reset": 83 if err := cleanup(); err != nil { 84 t.Fatal(err) 85 return err.Error() 86 } 87 var err error 88 d, err = Open("", getOpts()) 89 require.NoError(t, err) 90 require.NoError(t, d.SetCreatorID(1)) 91 return "" 92 case "snapshot": 93 s := d.NewSnapshot() 94 var name string 95 td.ScanArgs(t, "name", &name) 96 snaps[name] = s 97 return "" 98 case "batch": 99 var name string 100 td.MaybeScanArgs(t, "name", &name) 101 commit := td.HasArg("commit") 102 b := d.NewIndexedBatch() 103 require.NoError(t, runBatchDefineCmd(td, b)) 104 var err error 105 if commit { 106 func() { 107 defer func() { 108 if r := recover(); r != nil { 109 err = errors.New(r.(string)) 110 } 111 }() 112 err = b.Commit(nil) 113 }() 114 } else if name != "" { 115 batches[name] = b 116 } 117 if err != nil { 118 return err.Error() 119 } 120 count := b.Count() 121 if commit { 122 return fmt.Sprintf("committed %d keys\n", count) 123 } 124 return fmt.Sprintf("wrote %d keys to batch %q\n", count, name) 125 case "compact": 126 if err := runCompactCmd(td, d); err != nil { 127 return err.Error() 128 } 129 return runLSMCmd(td, d) 130 case "flush": 131 err := d.Flush() 132 if err != nil { 133 return err.Error() 134 } 135 return "" 136 case "commit": 137 name := pluckStringCmdArg(td, "batch") 138 b := batches[name] 139 defer b.Close() 140 count := b.Count() 141 require.NoError(t, d.Apply(b, nil)) 142 delete(batches, name) 143 return fmt.Sprintf("committed %d keys\n", count) 144 case "scan-statistics": 145 var lower, upper []byte 146 var reader scanInternalReader = d 147 var b strings.Builder 148 var showSnapshotPinned = false 149 var keyKindsToDisplay []InternalKeyKind 150 var showLevels []string 151 152 for _, arg := range td.CmdArgs { 153 switch arg.Key { 154 case "lower": 155 lower = []byte(arg.Vals[0]) 156 case "upper": 157 upper = []byte(arg.Vals[0]) 158 case "show-snapshot-pinned": 159 showSnapshotPinned = true 160 case "keys": 161 for _, key := range arg.Vals { 162 keyKindsToDisplay = append(keyKindsToDisplay, base.ParseKind(key)) 163 } 164 case "levels": 165 showLevels = append(showLevels, arg.Vals...) 166 default: 167 } 168 } 169 stats, err := reader.ScanStatistics(ctx, lower, upper, ScanStatisticsOptions{}) 170 if err != nil { 171 return err.Error() 172 } 173 174 for _, level := range showLevels { 175 lvl, err := strconv.Atoi(level) 176 if err != nil || lvl >= numLevels { 177 return fmt.Sprintf("invalid level %s", level) 178 } 179 180 fmt.Fprintf(&b, "Level %d:\n", lvl) 181 if showSnapshotPinned { 182 fmt.Fprintf(&b, " compaction pinned count: %d\n", stats.Levels[lvl].SnapshotPinnedKeys) 183 } 184 for _, kind := range keyKindsToDisplay { 185 fmt.Fprintf(&b, " %s key count: %d\n", kind.String(), stats.Levels[lvl].KindsCount[kind]) 186 if stats.Levels[lvl].LatestKindsCount[kind] > 0 { 187 fmt.Fprintf(&b, " %s latest count: %d\n", kind.String(), stats.Levels[lvl].LatestKindsCount[kind]) 188 } 189 } 190 } 191 192 fmt.Fprintf(&b, "Aggregate:\n") 193 if showSnapshotPinned { 194 fmt.Fprintf(&b, " snapshot pinned count: %d\n", stats.Accumulated.SnapshotPinnedKeys) 195 } 196 for _, kind := range keyKindsToDisplay { 197 fmt.Fprintf(&b, " %s key count: %d\n", kind.String(), stats.Accumulated.KindsCount[kind]) 198 if stats.Accumulated.LatestKindsCount[kind] > 0 { 199 fmt.Fprintf(&b, " %s latest count: %d\n", kind.String(), stats.Accumulated.LatestKindsCount[kind]) 200 } 201 } 202 return b.String() 203 default: 204 return fmt.Sprintf("unknown command %q", td.Cmd) 205 } 206 }) 207 } 208 209 func TestScanInternal(t *testing.T) { 210 var d *DB 211 type scanInternalReader interface { 212 ScanInternal( 213 ctx context.Context, 214 lower, upper []byte, 215 visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error, 216 visitRangeDel func(start, end []byte, seqNum uint64) error, 217 visitRangeKey func(start, end []byte, keys []keyspan.Key) error, 218 visitSharedFile func(sst *SharedSSTMeta) error, 219 ) error 220 } 221 batches := map[string]*Batch{} 222 snaps := map[string]*Snapshot{} 223 efos := map[string]*EventuallyFileOnlySnapshot{} 224 parseOpts := func(td *datadriven.TestData) (*Options, error) { 225 opts := &Options{ 226 FS: vfs.NewMem(), 227 Logger: testLogger{t: t}, 228 Comparer: testkeys.Comparer, 229 FormatMajorVersion: FormatVirtualSSTables, 230 BlockPropertyCollectors: []func() BlockPropertyCollector{ 231 sstable.NewTestKeysBlockPropertyCollector, 232 }, 233 } 234 opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ 235 "": remote.NewInMem(), 236 }) 237 opts.Experimental.CreateOnShared = remote.CreateOnSharedAll 238 opts.Experimental.CreateOnSharedLocator = "" 239 opts.DisableAutomaticCompactions = true 240 opts.EnsureDefaults() 241 opts.WithFSDefaults() 242 243 for _, cmdArg := range td.CmdArgs { 244 switch cmdArg.Key { 245 case "format-major-version": 246 v, err := strconv.Atoi(cmdArg.Vals[0]) 247 if err != nil { 248 return nil, err 249 } 250 // Override the DB version. 251 opts.FormatMajorVersion = FormatMajorVersion(v) 252 case "block-size": 253 v, err := strconv.Atoi(cmdArg.Vals[0]) 254 if err != nil { 255 return nil, err 256 } 257 for i := range opts.Levels { 258 opts.Levels[i].BlockSize = v 259 } 260 case "index-block-size": 261 v, err := strconv.Atoi(cmdArg.Vals[0]) 262 if err != nil { 263 return nil, err 264 } 265 for i := range opts.Levels { 266 opts.Levels[i].IndexBlockSize = v 267 } 268 case "target-file-size": 269 v, err := strconv.Atoi(cmdArg.Vals[0]) 270 if err != nil { 271 return nil, err 272 } 273 for i := range opts.Levels { 274 opts.Levels[i].TargetFileSize = int64(v) 275 } 276 case "bloom-bits-per-key": 277 v, err := strconv.Atoi(cmdArg.Vals[0]) 278 if err != nil { 279 return nil, err 280 } 281 fp := bloom.FilterPolicy(v) 282 opts.Filters = map[string]FilterPolicy{fp.Name(): fp} 283 for i := range opts.Levels { 284 opts.Levels[i].FilterPolicy = fp 285 } 286 case "merger": 287 switch cmdArg.Vals[0] { 288 case "appender": 289 opts.Merger = base.DefaultMerger 290 default: 291 return nil, errors.Newf("unrecognized Merger %q\n", cmdArg.Vals[0]) 292 } 293 } 294 } 295 return opts, nil 296 } 297 cleanup := func() (err error) { 298 for key, batch := range batches { 299 err = firstError(err, batch.Close()) 300 delete(batches, key) 301 } 302 for key, snap := range snaps { 303 err = firstError(err, snap.Close()) 304 delete(snaps, key) 305 } 306 for key, es := range efos { 307 err = firstError(err, es.Close()) 308 delete(efos, key) 309 } 310 if d != nil { 311 err = firstError(err, d.Close()) 312 d = nil 313 } 314 return err 315 } 316 defer cleanup() 317 318 datadriven.RunTest(t, "testdata/scan_internal", func(t *testing.T, td *datadriven.TestData) string { 319 switch td.Cmd { 320 case "define": 321 if err := cleanup(); err != nil { 322 return err.Error() 323 } 324 opts, err := parseOpts(td) 325 if err != nil { 326 return err.Error() 327 } 328 d, err = runDBDefineCmd(td, opts) 329 if err != nil { 330 return err.Error() 331 } 332 return runLSMCmd(td, d) 333 334 case "reset": 335 if err := cleanup(); err != nil { 336 t.Fatal(err) 337 return err.Error() 338 } 339 opts, err := parseOpts(td) 340 if err != nil { 341 t.Fatal(err) 342 return err.Error() 343 } 344 345 d, err = Open("", opts) 346 require.NoError(t, err) 347 require.NoError(t, d.SetCreatorID(1)) 348 return "" 349 case "snapshot": 350 s := d.NewSnapshot() 351 var name string 352 td.ScanArgs(t, "name", &name) 353 snaps[name] = s 354 return "" 355 case "wait-for-file-only-snapshot": 356 if len(td.CmdArgs) != 1 { 357 panic("insufficient args for file-only-snapshot command") 358 } 359 name := td.CmdArgs[0].Key 360 es := efos[name] 361 if err := es.WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond); err != nil { 362 return err.Error() 363 } 364 return "ok" 365 case "file-only-snapshot": 366 if len(td.CmdArgs) != 1 { 367 panic("insufficient args for file-only-snapshot command") 368 } 369 name := td.CmdArgs[0].Key 370 var keyRanges []KeyRange 371 for _, line := range strings.Split(td.Input, "\n") { 372 fields := strings.Fields(line) 373 if len(fields) != 2 { 374 return "expected two fields for file-only snapshot KeyRanges" 375 } 376 kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} 377 keyRanges = append(keyRanges, kr) 378 } 379 380 s := d.NewEventuallyFileOnlySnapshot(keyRanges) 381 efos[name] = s 382 return "ok" 383 case "batch": 384 var name string 385 td.MaybeScanArgs(t, "name", &name) 386 commit := td.HasArg("commit") 387 ingest := td.HasArg("ingest") 388 b := d.NewIndexedBatch() 389 require.NoError(t, runBatchDefineCmd(td, b)) 390 var err error 391 if commit { 392 func() { 393 defer func() { 394 if r := recover(); r != nil { 395 err = errors.New(r.(string)) 396 } 397 }() 398 err = b.Commit(nil) 399 }() 400 } else if ingest { 401 points, rangeDels, rangeKeys := batchSort(b) 402 file, err := d.opts.FS.Create("temp0.sst") 403 require.NoError(t, err) 404 w := sstable.NewWriter(objstorageprovider.NewFileWritable(file), d.opts.MakeWriterOptions(0, sstable.TableFormatPebblev4)) 405 for span := rangeDels.First(); span != nil; span = rangeDels.Next() { 406 require.NoError(t, w.DeleteRange(span.Start, span.End)) 407 } 408 rangeDels.Close() 409 for span := rangeKeys.First(); span != nil; span = rangeKeys.Next() { 410 keys := []keyspan.Key{} 411 for i := range span.Keys { 412 keys = append(keys, span.Keys[i]) 413 keys[i].Trailer = base.MakeTrailer(0, keys[i].Kind()) 414 } 415 keyspan.SortKeysByTrailer(&keys) 416 newSpan := &keyspan.Span{Start: span.Start, End: span.End, Keys: keys} 417 rangekey.Encode(newSpan, w.AddRangeKey) 418 } 419 rangeKeys.Close() 420 for key, val := points.First(); key != nil; key, val = points.Next() { 421 var value []byte 422 value, _, err = val.Value(value) 423 require.NoError(t, err) 424 require.NoError(t, w.Add(*key, value)) 425 } 426 points.Close() 427 require.NoError(t, w.Close()) 428 require.NoError(t, d.Ingest([]string{"temp0.sst"})) 429 } else if name != "" { 430 batches[name] = b 431 } 432 if err != nil { 433 return err.Error() 434 } 435 count := b.Count() 436 if commit { 437 return fmt.Sprintf("committed %d keys\n", count) 438 } 439 return fmt.Sprintf("wrote %d keys to batch %q\n", count, name) 440 case "compact": 441 if err := runCompactCmd(td, d); err != nil { 442 return err.Error() 443 } 444 return runLSMCmd(td, d) 445 case "flush": 446 err := d.Flush() 447 if err != nil { 448 return err.Error() 449 } 450 return "" 451 case "lsm": 452 return runLSMCmd(td, d) 453 case "commit": 454 name := pluckStringCmdArg(td, "batch") 455 b := batches[name] 456 defer b.Close() 457 count := b.Count() 458 require.NoError(t, d.Apply(b, nil)) 459 delete(batches, name) 460 return fmt.Sprintf("committed %d keys\n", count) 461 case "scan-internal": 462 var lower, upper []byte 463 var reader scanInternalReader = d 464 var b strings.Builder 465 var fileVisitor func(sst *SharedSSTMeta) error 466 for _, arg := range td.CmdArgs { 467 switch arg.Key { 468 case "lower": 469 lower = []byte(arg.Vals[0]) 470 case "upper": 471 upper = []byte(arg.Vals[0]) 472 case "snapshot": 473 name := arg.Vals[0] 474 snap, ok := snaps[name] 475 if !ok { 476 return fmt.Sprintf("no snapshot found for name %s", name) 477 } 478 reader = snap 479 case "file-only-snapshot": 480 name := arg.Vals[0] 481 efos, ok := efos[name] 482 if !ok { 483 return fmt.Sprintf("no snapshot found for name %s", name) 484 } 485 reader = efos 486 case "skip-shared": 487 fileVisitor = func(sst *SharedSSTMeta) error { 488 fmt.Fprintf(&b, "shared file: %s [%s-%s] [point=%s-%s] [range=%s-%s]\n", sst.fileNum, sst.Smallest.String(), sst.Largest.String(), sst.SmallestPointKey.String(), sst.LargestPointKey.String(), sst.SmallestRangeKey.String(), sst.LargestRangeKey.String()) 489 return nil 490 } 491 } 492 } 493 err := reader.ScanInternal(context.TODO(), lower, upper, 494 func(key *InternalKey, value LazyValue, _ IteratorLevel) error { 495 v := value.InPlaceValue() 496 fmt.Fprintf(&b, "%s (%s)\n", key, v) 497 return nil 498 }, 499 func(start, end []byte, seqNum uint64) error { 500 fmt.Fprintf(&b, "%s-%s#%d,RANGEDEL\n", start, end, seqNum) 501 return nil 502 }, 503 func(start, end []byte, keys []keyspan.Key) error { 504 s := keyspan.Span{Start: start, End: end, Keys: keys} 505 fmt.Fprintf(&b, "%s\n", s.String()) 506 return nil 507 }, 508 fileVisitor, 509 ) 510 if err != nil { 511 return err.Error() 512 } 513 return b.String() 514 default: 515 return fmt.Sprintf("unknown command %q", td.Cmd) 516 } 517 }) 518 } 519 520 func TestPointCollapsingIter(t *testing.T) { 521 var def string 522 datadriven.RunTest(t, "testdata/point_collapsing_iter", func(t *testing.T, d *datadriven.TestData) string { 523 switch d.Cmd { 524 case "define": 525 def = d.Input 526 return "" 527 528 case "iter": 529 f := &fakeIter{} 530 var spans []keyspan.Span 531 for _, line := range strings.Split(def, "\n") { 532 for _, key := range strings.Fields(line) { 533 j := strings.Index(key, ":") 534 k := base.ParseInternalKey(key[:j]) 535 v := []byte(key[j+1:]) 536 if k.Kind() == InternalKeyKindRangeDelete { 537 spans = append(spans, keyspan.Span{ 538 Start: k.UserKey, 539 End: v, 540 Keys: []keyspan.Key{{Trailer: k.Trailer}}, 541 KeysOrder: 0, 542 }) 543 continue 544 } 545 f.keys = append(f.keys, k) 546 f.vals = append(f.vals, v) 547 } 548 } 549 550 ksIter := keyspan.NewIter(base.DefaultComparer.Compare, spans) 551 pcIter := &pointCollapsingIterator{ 552 comparer: base.DefaultComparer, 553 merge: base.DefaultMerger.Merge, 554 seqNum: math.MaxUint64, 555 } 556 pcIter.iter.Init(base.DefaultComparer, f, ksIter, keyspan.InterleavingIterOpts{}) 557 defer pcIter.Close() 558 559 return runInternalIterCmd(t, d, pcIter, iterCmdVerboseKey) 560 561 default: 562 return fmt.Sprintf("unknown command: %s", d.Cmd) 563 } 564 }) 565 }