github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/tool/find.go (about) 1 // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package tool 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "sort" 12 13 "github.com/cockroachdb/pebble" 14 "github.com/cockroachdb/pebble/internal/base" 15 "github.com/cockroachdb/pebble/internal/keyspan" 16 "github.com/cockroachdb/pebble/internal/manifest" 17 "github.com/cockroachdb/pebble/internal/private" 18 "github.com/cockroachdb/pebble/internal/rangedel" 19 "github.com/cockroachdb/pebble/record" 20 "github.com/cockroachdb/pebble/sstable" 21 "github.com/spf13/cobra" 22 ) 23 24 type findRef struct { 25 key base.InternalKey 26 value []byte 27 fileNum base.FileNum 28 } 29 30 // findT implements the find tool. 31 // 32 // TODO(bananabrick): Add support for virtual sstables in this tool. Currently, 33 // the tool will work because we're parsing files from disk, so virtual sstables 34 // will never be added to findT.tables. The manifest could contain information 35 // about virtual sstables. This is fine because the manifest is only used to 36 // compute the findT.editRefs, and editRefs is only used if a file in 37 // findT.tables contains a key. Of course, the tool won't be completely 38 // accurate without dealing with virtual sstable case. 39 type findT struct { 40 Root *cobra.Command 41 42 // Configuration. 43 opts *pebble.Options 44 comparers sstable.Comparers 45 mergers sstable.Mergers 46 47 // Flags. 48 comparerName string 49 fmtKey keyFormatter 50 fmtValue valueFormatter 51 verbose bool 52 53 // Map from file num to path on disk. 54 files map[base.FileNum]string 55 // Map from file num to version edit index which references the file num. 56 editRefs map[base.FileNum][]int 57 // List of version edits. 58 edits []manifest.VersionEdit 59 // Sorted list of WAL file nums. 60 logs []base.FileNum 61 // Sorted list of manifest file nums. 62 manifests []base.FileNum 63 // Sorted list of table file nums. 64 tables []base.FileNum 65 // Set of tables that contains references to the search key. 66 tableRefs map[base.FileNum]bool 67 // Map from file num to table metadata. 68 tableMeta map[base.FileNum]*manifest.FileMetadata 69 // List of error messages for SSTables that could not be decoded. 70 errors []string 71 } 72 73 func newFind( 74 opts *pebble.Options, 75 comparers sstable.Comparers, 76 defaultComparer string, 77 mergers sstable.Mergers, 78 ) *findT { 79 f := &findT{ 80 opts: opts, 81 comparers: comparers, 82 mergers: mergers, 83 } 84 f.fmtKey.mustSet("quoted") 85 f.fmtValue.mustSet("[%x]") 86 87 f.Root = &cobra.Command{ 88 Use: "find <dir> <key>", 89 Short: "find references to the specified key", 90 Long: ` 91 Find references to the specified key and any range tombstones that contain the 92 key. This includes references to the key in WAL files and sstables, and the 93 provenance of the sstables (flushed, ingested, compacted). 94 `, 95 Args: cobra.ExactArgs(2), 96 Run: f.run, 97 } 98 99 f.Root.Flags().BoolVarP( 100 &f.verbose, "verbose", "v", false, "verbose output") 101 f.Root.Flags().StringVar( 102 &f.comparerName, "comparer", defaultComparer, "comparer name") 103 f.Root.Flags().Var( 104 &f.fmtKey, "key", "key formatter") 105 f.Root.Flags().Var( 106 &f.fmtValue, "value", "value formatter") 107 return f 108 } 109 110 func (f *findT) run(cmd *cobra.Command, args []string) { 111 stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr() 112 var key key 113 if err := key.Set(args[1]); err != nil { 114 fmt.Fprintf(stdout, "%s\n", err) 115 return 116 } 117 118 if err := f.findFiles(stdout, stderr, args[0]); err != nil { 119 fmt.Fprintf(stdout, "%s\n", err) 120 return 121 } 122 f.readManifests(stdout) 123 124 f.opts.Comparer = f.comparers[f.comparerName] 125 if f.opts.Comparer == nil { 126 fmt.Fprintf(stderr, "unknown comparer %q", f.comparerName) 127 return 128 } 129 f.fmtKey.setForComparer(f.opts.Comparer.Name, f.comparers) 130 f.fmtValue.setForComparer(f.opts.Comparer.Name, f.comparers) 131 132 refs := f.search(stdout, key) 133 var lastFileNum base.FileNum 134 for i := range refs { 135 r := &refs[i] 136 if lastFileNum != r.fileNum { 137 lastFileNum = r.fileNum 138 fmt.Fprintf(stdout, "%s", f.opts.FS.PathBase(f.files[r.fileNum])) 139 if m := f.tableMeta[r.fileNum]; m != nil { 140 fmt.Fprintf(stdout, " ") 141 formatKeyRange(stdout, f.fmtKey, &m.Smallest, &m.Largest) 142 } 143 fmt.Fprintf(stdout, "\n") 144 if p := f.tableProvenance(r.fileNum); p != "" { 145 fmt.Fprintf(stdout, " (%s)\n", p) 146 } 147 } 148 fmt.Fprintf(stdout, " ") 149 formatKeyValue(stdout, f.fmtKey, f.fmtValue, &r.key, r.value) 150 } 151 152 for _, errorMsg := range f.errors { 153 fmt.Fprint(stdout, errorMsg) 154 } 155 } 156 157 // Find all of the manifests, logs, and tables in the specified directory. 158 func (f *findT) findFiles(stdout, stderr io.Writer, dir string) error { 159 f.files = make(map[base.FileNum]string) 160 f.editRefs = make(map[base.FileNum][]int) 161 f.logs = nil 162 f.manifests = nil 163 f.tables = nil 164 f.tableMeta = make(map[base.FileNum]*manifest.FileMetadata) 165 166 if _, err := f.opts.FS.Stat(dir); err != nil { 167 return err 168 } 169 170 walk(stderr, f.opts.FS, dir, func(path string) { 171 ft, fileNum, ok := base.ParseFilename(f.opts.FS, path) 172 if !ok { 173 return 174 } 175 switch ft { 176 case base.FileTypeLog: 177 f.logs = append(f.logs, fileNum.FileNum()) 178 case base.FileTypeManifest: 179 f.manifests = append(f.manifests, fileNum.FileNum()) 180 case base.FileTypeTable: 181 f.tables = append(f.tables, fileNum.FileNum()) 182 default: 183 return 184 } 185 f.files[fileNum.FileNum()] = path 186 }) 187 188 sort.Slice(f.logs, func(i, j int) bool { 189 return f.logs[i] < f.logs[j] 190 }) 191 sort.Slice(f.manifests, func(i, j int) bool { 192 return f.manifests[i] < f.manifests[j] 193 }) 194 sort.Slice(f.tables, func(i, j int) bool { 195 return f.tables[i] < f.tables[j] 196 }) 197 198 if f.verbose { 199 fmt.Fprintf(stdout, "%s\n", dir) 200 fmt.Fprintf(stdout, "%5d %s\n", len(f.manifests), makePlural("manifest", int64(len(f.manifests)))) 201 fmt.Fprintf(stdout, "%5d %s\n", len(f.logs), makePlural("log", int64(len(f.logs)))) 202 fmt.Fprintf(stdout, "%5d %s\n", len(f.tables), makePlural("sstable", int64(len(f.tables)))) 203 } 204 return nil 205 } 206 207 // Read the manifests and populate the editRefs map which is used to determine 208 // the provenance and metadata of tables. 209 func (f *findT) readManifests(stdout io.Writer) { 210 for _, fileNum := range f.manifests { 211 func() { 212 path := f.files[fileNum] 213 mf, err := f.opts.FS.Open(path) 214 if err != nil { 215 fmt.Fprintf(stdout, "%s\n", err) 216 return 217 } 218 defer mf.Close() 219 220 if f.verbose { 221 fmt.Fprintf(stdout, "%s\n", path) 222 } 223 224 rr := record.NewReader(mf, 0 /* logNum */) 225 for { 226 r, err := rr.Next() 227 if err != nil { 228 if err != io.EOF { 229 fmt.Fprintf(stdout, "%s: %s\n", path, err) 230 } 231 break 232 } 233 234 var ve manifest.VersionEdit 235 if err := ve.Decode(r); err != nil { 236 fmt.Fprintf(stdout, "%s: %s\n", path, err) 237 break 238 } 239 i := len(f.edits) 240 f.edits = append(f.edits, ve) 241 242 if ve.ComparerName != "" { 243 f.comparerName = ve.ComparerName 244 } 245 if num := ve.MinUnflushedLogNum; num != 0 { 246 f.editRefs[num] = append(f.editRefs[num], i) 247 } 248 for df := range ve.DeletedFiles { 249 f.editRefs[df.FileNum] = append(f.editRefs[df.FileNum], i) 250 } 251 for _, nf := range ve.NewFiles { 252 // The same file can be deleted and added in a single version edit 253 // which indicates a "move" compaction. Only add the edit to the list 254 // once. 255 refs := f.editRefs[nf.Meta.FileNum] 256 if n := len(refs); n == 0 || refs[n-1] != i { 257 f.editRefs[nf.Meta.FileNum] = append(refs, i) 258 } 259 if _, ok := f.tableMeta[nf.Meta.FileNum]; !ok { 260 f.tableMeta[nf.Meta.FileNum] = nf.Meta 261 } 262 } 263 } 264 }() 265 } 266 267 if f.verbose { 268 fmt.Fprintf(stdout, "%5d %s\n", len(f.edits), makePlural("edit", int64(len(f.edits)))) 269 } 270 } 271 272 // Search the logs and sstables for references to the specified key. 273 func (f *findT) search(stdout io.Writer, key []byte) []findRef { 274 refs := f.searchLogs(stdout, key, nil) 275 refs = f.searchTables(stdout, key, refs) 276 277 // For a given file (log or table) the references are already in the correct 278 // order. We simply want to order the references by fileNum using a stable 279 // sort. 280 // 281 // TODO(peter): I'm not sure if this is perfectly correct with regards to log 282 // files and ingested sstables, but it is close enough and doing something 283 // better is onerous. Revisit if this ever becomes problematic (e.g. if we 284 // allow finding more than one key at a time). 285 // 286 // An example of the problem with logs and ingestion (which can only occur 287 // with distinct keys). If I write key "a" to a log, I can then ingested key 288 // "b" without causing "a" to be flushed. Then I can write key "c" to the 289 // log. Ideally, we'd show the key "a" from the log, then the key "b" from 290 // the ingested sstable, then key "c" from the log. 291 sort.SliceStable(refs, func(i, j int) bool { 292 return refs[i].fileNum < refs[j].fileNum 293 }) 294 return refs 295 } 296 297 // Search the logs for references to the specified key. 298 func (f *findT) searchLogs(stdout io.Writer, searchKey []byte, refs []findRef) []findRef { 299 cmp := f.opts.Comparer.Compare 300 for _, fileNum := range f.logs { 301 _ = func() (err error) { 302 path := f.files[fileNum] 303 lf, err := f.opts.FS.Open(path) 304 if err != nil { 305 fmt.Fprintf(stdout, "%s\n", err) 306 return 307 } 308 defer lf.Close() 309 310 if f.verbose { 311 fmt.Fprintf(stdout, "%s", path) 312 defer fmt.Fprintf(stdout, "\n") 313 } 314 defer func() { 315 switch err { 316 case record.ErrZeroedChunk: 317 if f.verbose { 318 fmt.Fprintf(stdout, ": EOF [%s] (may be due to WAL preallocation)", err) 319 } 320 case record.ErrInvalidChunk: 321 if f.verbose { 322 fmt.Fprintf(stdout, ": EOF [%s] (may be due to WAL recycling)", err) 323 } 324 default: 325 if err != io.EOF { 326 if f.verbose { 327 fmt.Fprintf(stdout, ": %s", err) 328 } else { 329 fmt.Fprintf(stdout, "%s: %s\n", path, err) 330 } 331 } 332 } 333 }() 334 335 var b pebble.Batch 336 var buf bytes.Buffer 337 rr := record.NewReader(lf, fileNum) 338 for { 339 r, err := rr.Next() 340 if err == nil { 341 buf.Reset() 342 _, err = io.Copy(&buf, r) 343 } 344 if err != nil { 345 return err 346 } 347 348 b = pebble.Batch{} 349 if err := b.SetRepr(buf.Bytes()); err != nil { 350 fmt.Fprintf(stdout, "%s: corrupt log file: %v", path, err) 351 continue 352 } 353 seqNum := b.SeqNum() 354 for r := b.Reader(); ; seqNum++ { 355 kind, ukey, value, ok, err := r.Next() 356 if !ok { 357 if err != nil { 358 fmt.Fprintf(stdout, "%s: corrupt log file: %v", path, err) 359 break 360 } 361 break 362 } 363 ikey := base.MakeInternalKey(ukey, seqNum, kind) 364 switch kind { 365 case base.InternalKeyKindDelete, 366 base.InternalKeyKindDeleteSized, 367 base.InternalKeyKindSet, 368 base.InternalKeyKindMerge, 369 base.InternalKeyKindSingleDelete, 370 base.InternalKeyKindSetWithDelete: 371 if cmp(searchKey, ikey.UserKey) != 0 { 372 continue 373 } 374 case base.InternalKeyKindRangeDelete: 375 // Output tombstones that contain or end with the search key. 376 t := rangedel.Decode(ikey, value, nil) 377 if !t.Contains(cmp, searchKey) && cmp(t.End, searchKey) != 0 { 378 continue 379 } 380 default: 381 continue 382 } 383 384 refs = append(refs, findRef{ 385 key: ikey.Clone(), 386 value: append([]byte(nil), value...), 387 fileNum: fileNum, 388 }) 389 } 390 } 391 }() 392 } 393 return refs 394 } 395 396 // Search the tables for references to the specified key. 397 func (f *findT) searchTables(stdout io.Writer, searchKey []byte, refs []findRef) []findRef { 398 cache := pebble.NewCache(128 << 20 /* 128 MB */) 399 defer cache.Unref() 400 401 f.tableRefs = make(map[base.FileNum]bool) 402 for _, fileNum := range f.tables { 403 _ = func() (err error) { 404 path := f.files[fileNum] 405 tf, err := f.opts.FS.Open(path) 406 if err != nil { 407 fmt.Fprintf(stdout, "%s\n", err) 408 return 409 } 410 411 m := f.tableMeta[fileNum] 412 if f.verbose { 413 fmt.Fprintf(stdout, "%s", path) 414 if m != nil && m.SmallestSeqNum == m.LargestSeqNum { 415 fmt.Fprintf(stdout, ": global seqnum: %d", m.LargestSeqNum) 416 } 417 defer fmt.Fprintf(stdout, "\n") 418 } 419 defer func() { 420 switch { 421 case err != nil: 422 if f.verbose { 423 fmt.Fprintf(stdout, ": %v", err) 424 } else { 425 fmt.Fprintf(stdout, "%s: %v\n", path, err) 426 } 427 } 428 }() 429 430 opts := sstable.ReaderOptions{ 431 Cache: cache, 432 Comparer: f.opts.Comparer, 433 Filters: f.opts.Filters, 434 } 435 readable, err := sstable.NewSimpleReadable(tf) 436 if err != nil { 437 return err 438 } 439 r, err := sstable.NewReader(readable, opts, f.comparers, f.mergers, 440 private.SSTableRawTombstonesOpt.(sstable.ReaderOption)) 441 if err != nil { 442 f.errors = append(f.errors, fmt.Sprintf("Unable to decode sstable %s, %s", f.files[fileNum], err.Error())) 443 // Ensure the error only gets printed once. 444 err = nil 445 return 446 } 447 defer r.Close() 448 449 if m != nil && m.SmallestSeqNum == m.LargestSeqNum { 450 r.Properties.GlobalSeqNum = m.LargestSeqNum 451 } 452 453 iter, err := r.NewIter(nil, nil) 454 if err != nil { 455 return err 456 } 457 defer iter.Close() 458 key, value := iter.SeekGE(searchKey, base.SeekGEFlagsNone) 459 460 // We configured sstable.Reader to return raw tombstones which requires a 461 // bit more work here to put them in a form that can be iterated in 462 // parallel with the point records. 463 rangeDelIter, err := func() (keyspan.FragmentIterator, error) { 464 iter, err := r.NewRawRangeDelIter() 465 if err != nil { 466 return nil, err 467 } 468 if iter == nil { 469 return keyspan.NewIter(r.Compare, nil), nil 470 } 471 defer iter.Close() 472 473 var tombstones []keyspan.Span 474 for t := iter.First(); t != nil; t = iter.Next() { 475 if !t.Contains(r.Compare, searchKey) { 476 continue 477 } 478 tombstones = append(tombstones, t.ShallowClone()) 479 } 480 481 sort.Slice(tombstones, func(i, j int) bool { 482 return r.Compare(tombstones[i].Start, tombstones[j].Start) < 0 483 }) 484 return keyspan.NewIter(r.Compare, tombstones), nil 485 }() 486 if err != nil { 487 return err 488 } 489 490 defer rangeDelIter.Close() 491 rangeDel := rangeDelIter.First() 492 493 foundRef := false 494 for key != nil || rangeDel != nil { 495 if key != nil && 496 (rangeDel == nil || r.Compare(key.UserKey, rangeDel.Start) < 0) { 497 if r.Compare(searchKey, key.UserKey) != 0 { 498 key, value = nil, base.LazyValue{} 499 continue 500 } 501 v, _, err := value.Value(nil) 502 if err != nil { 503 return err 504 } 505 refs = append(refs, findRef{ 506 key: key.Clone(), 507 value: append([]byte(nil), v...), 508 fileNum: fileNum, 509 }) 510 key, value = iter.Next() 511 } else { 512 // Use rangedel.Encode to add a reference for each key 513 // within the span. 514 err := rangedel.Encode(rangeDel, func(k base.InternalKey, v []byte) error { 515 refs = append(refs, findRef{ 516 key: k.Clone(), 517 value: append([]byte(nil), v...), 518 fileNum: fileNum, 519 }) 520 return nil 521 }) 522 if err != nil { 523 return err 524 } 525 rangeDel = rangeDelIter.Next() 526 } 527 foundRef = true 528 } 529 530 if foundRef { 531 f.tableRefs[fileNum] = true 532 } 533 return nil 534 }() 535 } 536 return refs 537 } 538 539 // Determine the provenance of the specified table. We search the version edits 540 // for the first edit which created the table, and then analyze the edit to 541 // determine if it was a compaction, flush, or ingestion. Returns an empty 542 // string if the provenance of a table cannot be determined. 543 func (f *findT) tableProvenance(fileNum base.FileNum) string { 544 editRefs := f.editRefs[fileNum] 545 for len(editRefs) > 0 { 546 ve := f.edits[editRefs[0]] 547 editRefs = editRefs[1:] 548 for _, nf := range ve.NewFiles { 549 if fileNum != nf.Meta.FileNum { 550 continue 551 } 552 553 var buf bytes.Buffer 554 switch { 555 case len(ve.DeletedFiles) > 0: 556 // A version edit with deleted files is a compaction. The deleted 557 // files are the inputs to the compaction. We're going to 558 // reconstruct the input files and display those inputs that 559 // contain the search key (i.e. are list in refs) and use an 560 // ellipsis to indicate when there were other inputs that have 561 // been elided. 562 var sourceLevels []int 563 levels := make(map[int][]base.FileNum) 564 for df := range ve.DeletedFiles { 565 files := levels[df.Level] 566 if len(files) == 0 { 567 sourceLevels = append(sourceLevels, df.Level) 568 } 569 levels[df.Level] = append(files, df.FileNum) 570 } 571 572 sort.Ints(sourceLevels) 573 if sourceLevels[len(sourceLevels)-1] != nf.Level { 574 sourceLevels = append(sourceLevels, nf.Level) 575 } 576 577 sep := " " 578 fmt.Fprintf(&buf, "compacted") 579 for _, level := range sourceLevels { 580 files := levels[level] 581 sort.Slice(files, func(i, j int) bool { 582 return files[i] < files[j] 583 }) 584 585 fmt.Fprintf(&buf, "%sL%d [", sep, level) 586 sep = "" 587 elided := false 588 for _, fileNum := range files { 589 if f.tableRefs[fileNum] { 590 fmt.Fprintf(&buf, "%s%s", sep, fileNum) 591 sep = " " 592 } else { 593 elided = true 594 } 595 } 596 if elided { 597 fmt.Fprintf(&buf, "%s...", sep) 598 } 599 fmt.Fprintf(&buf, "]") 600 sep = " + " 601 } 602 603 case ve.MinUnflushedLogNum != 0: 604 // A version edit with a min-unflushed log indicates a flush 605 // operation. 606 fmt.Fprintf(&buf, "flushed to L%d", nf.Level) 607 608 case nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum: 609 // If the smallest and largest seqnum are the same, the file was 610 // ingested. Note that this can also occur for a flushed sstable 611 // that contains only a single key, though that would have 612 // already been captured above. 613 fmt.Fprintf(&buf, "ingested to L%d", nf.Level) 614 615 default: 616 // The provenance of the table is unclear. This is usually due to 617 // the MANIFEST rolling over and taking a snapshot of the LSM 618 // state. 619 fmt.Fprintf(&buf, "added to L%d", nf.Level) 620 } 621 622 // After a table is created, it can be moved to a different level via a 623 // move compaction. This is indicated by a version edit that deletes the 624 // table from one level and adds the same table to a different 625 // level. Loop over the remaining version edits for the table looking for 626 // such moves. 627 for len(editRefs) > 0 { 628 ve := f.edits[editRefs[0]] 629 editRefs = editRefs[1:] 630 for _, nf := range ve.NewFiles { 631 if fileNum == nf.Meta.FileNum { 632 for df := range ve.DeletedFiles { 633 if fileNum == df.FileNum { 634 fmt.Fprintf(&buf, ", moved to L%d", nf.Level) 635 break 636 } 637 } 638 break 639 } 640 } 641 } 642 643 return buf.String() 644 } 645 } 646 return "" 647 }