github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/tool/find.go (about) 1 // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package tool 6 7 import ( 8 "bytes" 9 "cmp" 10 "fmt" 11 "io" 12 "slices" 13 "sort" 14 15 "github.com/cockroachdb/pebble" 16 "github.com/cockroachdb/pebble/internal/base" 17 "github.com/cockroachdb/pebble/internal/keyspan" 18 "github.com/cockroachdb/pebble/internal/manifest" 19 "github.com/cockroachdb/pebble/internal/private" 20 "github.com/cockroachdb/pebble/internal/rangedel" 21 "github.com/cockroachdb/pebble/record" 22 "github.com/cockroachdb/pebble/sstable" 23 "github.com/spf13/cobra" 24 ) 25 26 type findRef struct { 27 key base.InternalKey 28 value []byte 29 fileNum base.FileNum 30 } 31 32 // findT implements the find tool. 33 // 34 // TODO(bananabrick): Add support for virtual sstables in this tool. Currently, 35 // the tool will work because we're parsing files from disk, so virtual sstables 36 // will never be added to findT.tables. The manifest could contain information 37 // about virtual sstables. This is fine because the manifest is only used to 38 // compute the findT.editRefs, and editRefs is only used if a file in 39 // findT.tables contains a key. Of course, the tool won't be completely 40 // accurate without dealing with virtual sstable case. 41 type findT struct { 42 Root *cobra.Command 43 44 // Configuration. 45 opts *pebble.Options 46 comparers sstable.Comparers 47 mergers sstable.Mergers 48 49 // Flags. 50 comparerName string 51 fmtKey keyFormatter 52 fmtValue valueFormatter 53 verbose bool 54 55 // Map from file num to path on disk. 56 files map[base.DiskFileNum]string 57 // Map from file num to version edit index which references the file num. 58 editRefs map[base.FileNum][]int 59 // List of version edits. 60 edits []manifest.VersionEdit 61 // Sorted list of WAL file nums. 62 logs []base.DiskFileNum 63 // Sorted list of manifest file nums. 64 manifests []base.DiskFileNum 65 // Sorted list of table file nums. 66 tables []base.FileNum 67 // Set of tables that contains references to the search key. 68 tableRefs map[base.FileNum]bool 69 // Map from file num to table metadata. 70 tableMeta map[base.FileNum]*manifest.FileMetadata 71 // List of error messages for SSTables that could not be decoded. 72 errors []string 73 } 74 75 func newFind( 76 opts *pebble.Options, 77 comparers sstable.Comparers, 78 defaultComparer string, 79 mergers sstable.Mergers, 80 ) *findT { 81 f := &findT{ 82 opts: opts, 83 comparers: comparers, 84 mergers: mergers, 85 } 86 f.fmtKey.mustSet("quoted") 87 f.fmtValue.mustSet("[%x]") 88 89 f.Root = &cobra.Command{ 90 Use: "find <dir> <key>", 91 Short: "find references to the specified key", 92 Long: ` 93 Find references to the specified key and any range tombstones that contain the 94 key. This includes references to the key in WAL files and sstables, and the 95 provenance of the sstables (flushed, ingested, compacted). 96 `, 97 Args: cobra.ExactArgs(2), 98 Run: f.run, 99 } 100 101 f.Root.Flags().BoolVarP( 102 &f.verbose, "verbose", "v", false, "verbose output") 103 f.Root.Flags().StringVar( 104 &f.comparerName, "comparer", defaultComparer, "comparer name") 105 f.Root.Flags().Var( 106 &f.fmtKey, "key", "key formatter") 107 f.Root.Flags().Var( 108 &f.fmtValue, "value", "value formatter") 109 return f 110 } 111 112 func (f *findT) run(cmd *cobra.Command, args []string) { 113 stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr() 114 var key key 115 if err := key.Set(args[1]); err != nil { 116 fmt.Fprintf(stdout, "%s\n", err) 117 return 118 } 119 120 if err := f.findFiles(stdout, stderr, args[0]); err != nil { 121 fmt.Fprintf(stdout, "%s\n", err) 122 return 123 } 124 f.readManifests(stdout) 125 126 f.opts.Comparer = f.comparers[f.comparerName] 127 if f.opts.Comparer == nil { 128 fmt.Fprintf(stderr, "unknown comparer %q", f.comparerName) 129 return 130 } 131 f.fmtKey.setForComparer(f.opts.Comparer.Name, f.comparers) 132 f.fmtValue.setForComparer(f.opts.Comparer.Name, f.comparers) 133 134 refs := f.search(stdout, key) 135 var lastFileNum base.FileNum 136 for i := range refs { 137 r := &refs[i] 138 if lastFileNum != r.fileNum { 139 lastFileNum = r.fileNum 140 fmt.Fprintf(stdout, "%s", f.opts.FS.PathBase(f.files[r.fileNum.DiskFileNum()])) 141 if m := f.tableMeta[r.fileNum]; m != nil { 142 fmt.Fprintf(stdout, " ") 143 formatKeyRange(stdout, f.fmtKey, &m.Smallest, &m.Largest) 144 } 145 fmt.Fprintf(stdout, "\n") 146 if p := f.tableProvenance(r.fileNum); p != "" { 147 fmt.Fprintf(stdout, " (%s)\n", p) 148 } 149 } 150 fmt.Fprintf(stdout, " ") 151 formatKeyValue(stdout, f.fmtKey, f.fmtValue, &r.key, r.value) 152 } 153 154 for _, errorMsg := range f.errors { 155 fmt.Fprint(stdout, errorMsg) 156 } 157 } 158 159 // Find all of the manifests, logs, and tables in the specified directory. 160 func (f *findT) findFiles(stdout, stderr io.Writer, dir string) error { 161 f.files = make(map[base.DiskFileNum]string) 162 f.editRefs = make(map[base.FileNum][]int) 163 f.logs = nil 164 f.manifests = nil 165 f.tables = nil 166 f.tableMeta = make(map[base.FileNum]*manifest.FileMetadata) 167 168 if _, err := f.opts.FS.Stat(dir); err != nil { 169 return err 170 } 171 172 walk(stderr, f.opts.FS, dir, func(path string) { 173 ft, fileNum, ok := base.ParseFilename(f.opts.FS, path) 174 if !ok { 175 return 176 } 177 switch ft { 178 case base.FileTypeLog: 179 f.logs = append(f.logs, fileNum) 180 case base.FileTypeManifest: 181 f.manifests = append(f.manifests, fileNum) 182 case base.FileTypeTable: 183 f.tables = append(f.tables, fileNum.FileNum()) 184 default: 185 return 186 } 187 f.files[fileNum] = path 188 }) 189 190 slices.Sort(f.logs) 191 slices.Sort(f.manifests) 192 slices.Sort(f.tables) 193 194 if f.verbose { 195 fmt.Fprintf(stdout, "%s\n", dir) 196 fmt.Fprintf(stdout, "%5d %s\n", len(f.manifests), makePlural("manifest", int64(len(f.manifests)))) 197 fmt.Fprintf(stdout, "%5d %s\n", len(f.logs), makePlural("log", int64(len(f.logs)))) 198 fmt.Fprintf(stdout, "%5d %s\n", len(f.tables), makePlural("sstable", int64(len(f.tables)))) 199 } 200 return nil 201 } 202 203 // Read the manifests and populate the editRefs map which is used to determine 204 // the provenance and metadata of tables. 205 func (f *findT) readManifests(stdout io.Writer) { 206 for _, fileNum := range f.manifests { 207 func() { 208 path := f.files[fileNum] 209 mf, err := f.opts.FS.Open(path) 210 if err != nil { 211 fmt.Fprintf(stdout, "%s\n", err) 212 return 213 } 214 defer mf.Close() 215 216 if f.verbose { 217 fmt.Fprintf(stdout, "%s\n", path) 218 } 219 220 rr := record.NewReader(mf, 0 /* logNum */) 221 for { 222 r, err := rr.Next() 223 if err != nil { 224 if err != io.EOF { 225 fmt.Fprintf(stdout, "%s: %s\n", path, err) 226 } 227 break 228 } 229 230 var ve manifest.VersionEdit 231 if err := ve.Decode(r); err != nil { 232 fmt.Fprintf(stdout, "%s: %s\n", path, err) 233 break 234 } 235 i := len(f.edits) 236 f.edits = append(f.edits, ve) 237 238 if ve.ComparerName != "" { 239 f.comparerName = ve.ComparerName 240 } 241 if num := ve.MinUnflushedLogNum.FileNum(); num != 0 { 242 f.editRefs[num] = append(f.editRefs[num], i) 243 } 244 for df := range ve.DeletedFiles { 245 f.editRefs[df.FileNum] = append(f.editRefs[df.FileNum], i) 246 } 247 for _, nf := range ve.NewFiles { 248 // The same file can be deleted and added in a single version edit 249 // which indicates a "move" compaction. Only add the edit to the list 250 // once. 251 refs := f.editRefs[nf.Meta.FileNum] 252 if n := len(refs); n == 0 || refs[n-1] != i { 253 f.editRefs[nf.Meta.FileNum] = append(refs, i) 254 } 255 if _, ok := f.tableMeta[nf.Meta.FileNum]; !ok { 256 f.tableMeta[nf.Meta.FileNum] = nf.Meta 257 } 258 } 259 } 260 }() 261 } 262 263 if f.verbose { 264 fmt.Fprintf(stdout, "%5d %s\n", len(f.edits), makePlural("edit", int64(len(f.edits)))) 265 } 266 } 267 268 // Search the logs and sstables for references to the specified key. 269 func (f *findT) search(stdout io.Writer, key []byte) []findRef { 270 refs := f.searchLogs(stdout, key, nil) 271 refs = f.searchTables(stdout, key, refs) 272 273 // For a given file (log or table) the references are already in the correct 274 // order. We simply want to order the references by fileNum using a stable 275 // sort. 276 // 277 // TODO(peter): I'm not sure if this is perfectly correct with regards to log 278 // files and ingested sstables, but it is close enough and doing something 279 // better is onerous. Revisit if this ever becomes problematic (e.g. if we 280 // allow finding more than one key at a time). 281 // 282 // An example of the problem with logs and ingestion (which can only occur 283 // with distinct keys). If I write key "a" to a log, I can then ingested key 284 // "b" without causing "a" to be flushed. Then I can write key "c" to the 285 // log. Ideally, we'd show the key "a" from the log, then the key "b" from 286 // the ingested sstable, then key "c" from the log. 287 slices.SortStableFunc(refs, func(a, b findRef) int { 288 return cmp.Compare(a.fileNum, b.fileNum) 289 }) 290 return refs 291 } 292 293 // Search the logs for references to the specified key. 294 func (f *findT) searchLogs(stdout io.Writer, searchKey []byte, refs []findRef) []findRef { 295 cmp := f.opts.Comparer.Compare 296 for _, fileNum := range f.logs { 297 _ = func() (err error) { 298 path := f.files[fileNum] 299 lf, err := f.opts.FS.Open(path) 300 if err != nil { 301 fmt.Fprintf(stdout, "%s\n", err) 302 return 303 } 304 defer lf.Close() 305 306 if f.verbose { 307 fmt.Fprintf(stdout, "%s", path) 308 defer fmt.Fprintf(stdout, "\n") 309 } 310 defer func() { 311 switch err { 312 case record.ErrZeroedChunk: 313 if f.verbose { 314 fmt.Fprintf(stdout, ": EOF [%s] (may be due to WAL preallocation)", err) 315 } 316 case record.ErrInvalidChunk: 317 if f.verbose { 318 fmt.Fprintf(stdout, ": EOF [%s] (may be due to WAL recycling)", err) 319 } 320 default: 321 if err != io.EOF { 322 if f.verbose { 323 fmt.Fprintf(stdout, ": %s", err) 324 } else { 325 fmt.Fprintf(stdout, "%s: %s\n", path, err) 326 } 327 } 328 } 329 }() 330 331 var b pebble.Batch 332 var buf bytes.Buffer 333 rr := record.NewReader(lf, fileNum) 334 for { 335 r, err := rr.Next() 336 if err == nil { 337 buf.Reset() 338 _, err = io.Copy(&buf, r) 339 } 340 if err != nil { 341 return err 342 } 343 344 b = pebble.Batch{} 345 if err := b.SetRepr(buf.Bytes()); err != nil { 346 fmt.Fprintf(stdout, "%s: corrupt log file: %v", path, err) 347 continue 348 } 349 seqNum := b.SeqNum() 350 for r := b.Reader(); ; seqNum++ { 351 kind, ukey, value, ok, err := r.Next() 352 if !ok { 353 if err != nil { 354 fmt.Fprintf(stdout, "%s: corrupt log file: %v", path, err) 355 break 356 } 357 break 358 } 359 ikey := base.MakeInternalKey(ukey, seqNum, kind) 360 switch kind { 361 case base.InternalKeyKindDelete, 362 base.InternalKeyKindDeleteSized, 363 base.InternalKeyKindSet, 364 base.InternalKeyKindMerge, 365 base.InternalKeyKindSingleDelete, 366 base.InternalKeyKindSetWithDelete: 367 if cmp(searchKey, ikey.UserKey) != 0 { 368 continue 369 } 370 case base.InternalKeyKindRangeDelete: 371 // Output tombstones that contain or end with the search key. 372 t := rangedel.Decode(ikey, value, nil) 373 if !t.Contains(cmp, searchKey) && cmp(t.End, searchKey) != 0 { 374 continue 375 } 376 default: 377 continue 378 } 379 380 refs = append(refs, findRef{ 381 key: ikey.Clone(), 382 value: append([]byte(nil), value...), 383 fileNum: fileNum.FileNum(), 384 }) 385 } 386 } 387 }() 388 } 389 return refs 390 } 391 392 // Search the tables for references to the specified key. 393 func (f *findT) searchTables(stdout io.Writer, searchKey []byte, refs []findRef) []findRef { 394 cache := pebble.NewCache(128 << 20 /* 128 MB */) 395 defer cache.Unref() 396 397 f.tableRefs = make(map[base.FileNum]bool) 398 for _, fileNum := range f.tables { 399 _ = func() (err error) { 400 path := f.files[fileNum.DiskFileNum()] 401 tf, err := f.opts.FS.Open(path) 402 if err != nil { 403 fmt.Fprintf(stdout, "%s\n", err) 404 return 405 } 406 407 m := f.tableMeta[fileNum] 408 if f.verbose { 409 fmt.Fprintf(stdout, "%s", path) 410 if m != nil && m.SmallestSeqNum == m.LargestSeqNum { 411 fmt.Fprintf(stdout, ": global seqnum: %d", m.LargestSeqNum) 412 } 413 defer fmt.Fprintf(stdout, "\n") 414 } 415 defer func() { 416 switch { 417 case err != nil: 418 if f.verbose { 419 fmt.Fprintf(stdout, ": %v", err) 420 } else { 421 fmt.Fprintf(stdout, "%s: %v\n", path, err) 422 } 423 } 424 }() 425 426 opts := sstable.ReaderOptions{ 427 Cache: cache, 428 Comparer: f.opts.Comparer, 429 Filters: f.opts.Filters, 430 } 431 readable, err := sstable.NewSimpleReadable(tf) 432 if err != nil { 433 return err 434 } 435 r, err := sstable.NewReader(readable, opts, f.comparers, f.mergers, 436 private.SSTableRawTombstonesOpt.(sstable.ReaderOption)) 437 if err != nil { 438 f.errors = append(f.errors, fmt.Sprintf("Unable to decode sstable %s, %s", f.files[fileNum.DiskFileNum()], err.Error())) 439 // Ensure the error only gets printed once. 440 err = nil 441 return 442 } 443 defer r.Close() 444 445 if m != nil && m.SmallestSeqNum == m.LargestSeqNum { 446 r.Properties.GlobalSeqNum = m.LargestSeqNum 447 } 448 449 iter, err := r.NewIter(nil, nil) 450 if err != nil { 451 return err 452 } 453 defer iter.Close() 454 key, value := iter.SeekGE(searchKey, base.SeekGEFlagsNone) 455 456 // We configured sstable.Reader to return raw tombstones which requires a 457 // bit more work here to put them in a form that can be iterated in 458 // parallel with the point records. 459 rangeDelIter, err := func() (keyspan.FragmentIterator, error) { 460 iter, err := r.NewRawRangeDelIter() 461 if err != nil { 462 return nil, err 463 } 464 if iter == nil { 465 return keyspan.NewIter(r.Compare, nil), nil 466 } 467 defer iter.Close() 468 469 var tombstones []keyspan.Span 470 for t := iter.First(); t != nil; t = iter.Next() { 471 if !t.Contains(r.Compare, searchKey) { 472 continue 473 } 474 tombstones = append(tombstones, t.ShallowClone()) 475 } 476 477 slices.SortFunc(tombstones, func(a, b keyspan.Span) int { 478 return r.Compare(a.Start, b.Start) 479 }) 480 return keyspan.NewIter(r.Compare, tombstones), nil 481 }() 482 if err != nil { 483 return err 484 } 485 486 defer rangeDelIter.Close() 487 rangeDel := rangeDelIter.First() 488 489 foundRef := false 490 for key != nil || rangeDel != nil { 491 if key != nil && 492 (rangeDel == nil || r.Compare(key.UserKey, rangeDel.Start) < 0) { 493 if r.Compare(searchKey, key.UserKey) != 0 { 494 key, value = nil, base.LazyValue{} 495 continue 496 } 497 v, _, err := value.Value(nil) 498 if err != nil { 499 return err 500 } 501 refs = append(refs, findRef{ 502 key: key.Clone(), 503 value: append([]byte(nil), v...), 504 fileNum: fileNum, 505 }) 506 key, value = iter.Next() 507 } else { 508 // Use rangedel.Encode to add a reference for each key 509 // within the span. 510 err := rangedel.Encode(rangeDel, func(k base.InternalKey, v []byte) error { 511 refs = append(refs, findRef{ 512 key: k.Clone(), 513 value: append([]byte(nil), v...), 514 fileNum: fileNum, 515 }) 516 return nil 517 }) 518 if err != nil { 519 return err 520 } 521 rangeDel = rangeDelIter.Next() 522 } 523 foundRef = true 524 } 525 526 if foundRef { 527 f.tableRefs[fileNum] = true 528 } 529 return nil 530 }() 531 } 532 return refs 533 } 534 535 // Determine the provenance of the specified table. We search the version edits 536 // for the first edit which created the table, and then analyze the edit to 537 // determine if it was a compaction, flush, or ingestion. Returns an empty 538 // string if the provenance of a table cannot be determined. 539 func (f *findT) tableProvenance(fileNum base.FileNum) string { 540 editRefs := f.editRefs[fileNum] 541 for len(editRefs) > 0 { 542 ve := f.edits[editRefs[0]] 543 editRefs = editRefs[1:] 544 for _, nf := range ve.NewFiles { 545 if fileNum != nf.Meta.FileNum { 546 continue 547 } 548 549 var buf bytes.Buffer 550 switch { 551 case len(ve.DeletedFiles) > 0: 552 // A version edit with deleted files is a compaction. The deleted 553 // files are the inputs to the compaction. We're going to 554 // reconstruct the input files and display those inputs that 555 // contain the search key (i.e. are list in refs) and use an 556 // ellipsis to indicate when there were other inputs that have 557 // been elided. 558 var sourceLevels []int 559 levels := make(map[int][]base.FileNum) 560 for df := range ve.DeletedFiles { 561 files := levels[df.Level] 562 if len(files) == 0 { 563 sourceLevels = append(sourceLevels, df.Level) 564 } 565 levels[df.Level] = append(files, df.FileNum) 566 } 567 568 sort.Ints(sourceLevels) 569 if sourceLevels[len(sourceLevels)-1] != nf.Level { 570 sourceLevels = append(sourceLevels, nf.Level) 571 } 572 573 sep := " " 574 fmt.Fprintf(&buf, "compacted") 575 for _, level := range sourceLevels { 576 files := levels[level] 577 slices.Sort(files) 578 579 fmt.Fprintf(&buf, "%sL%d [", sep, level) 580 sep = "" 581 elided := false 582 for _, fileNum := range files { 583 if f.tableRefs[fileNum] { 584 fmt.Fprintf(&buf, "%s%s", sep, fileNum) 585 sep = " " 586 } else { 587 elided = true 588 } 589 } 590 if elided { 591 fmt.Fprintf(&buf, "%s...", sep) 592 } 593 fmt.Fprintf(&buf, "]") 594 sep = " + " 595 } 596 597 case ve.MinUnflushedLogNum != 0: 598 // A version edit with a min-unflushed log indicates a flush 599 // operation. 600 fmt.Fprintf(&buf, "flushed to L%d", nf.Level) 601 602 case nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum: 603 // If the smallest and largest seqnum are the same, the file was 604 // ingested. Note that this can also occur for a flushed sstable 605 // that contains only a single key, though that would have 606 // already been captured above. 607 fmt.Fprintf(&buf, "ingested to L%d", nf.Level) 608 609 default: 610 // The provenance of the table is unclear. This is usually due to 611 // the MANIFEST rolling over and taking a snapshot of the LSM 612 // state. 613 fmt.Fprintf(&buf, "added to L%d", nf.Level) 614 } 615 616 // After a table is created, it can be moved to a different level via a 617 // move compaction. This is indicated by a version edit that deletes the 618 // table from one level and adds the same table to a different 619 // level. Loop over the remaining version edits for the table looking for 620 // such moves. 621 for len(editRefs) > 0 { 622 ve := f.edits[editRefs[0]] 623 editRefs = editRefs[1:] 624 for _, nf := range ve.NewFiles { 625 if fileNum == nf.Meta.FileNum { 626 for df := range ve.DeletedFiles { 627 if fileNum == df.FileNum { 628 fmt.Fprintf(&buf, ", moved to L%d", nf.Level) 629 break 630 } 631 } 632 break 633 } 634 } 635 } 636 637 return buf.String() 638 } 639 } 640 return "" 641 }