github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/tool/manifest.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package tool
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"sort"
    11  	"time"
    12  
    13  	"github.com/cockroachdb/pebble"
    14  	"github.com/cockroachdb/pebble/internal/base"
    15  	"github.com/cockroachdb/pebble/internal/humanize"
    16  	"github.com/cockroachdb/pebble/internal/manifest"
    17  	"github.com/cockroachdb/pebble/record"
    18  	"github.com/cockroachdb/pebble/sstable"
    19  	"github.com/spf13/cobra"
    20  )
    21  
    22  // manifestT implements manifest-level tools, including both configuration
    23  // state and the commands themselves.
    24  type manifestT struct {
    25  	Root      *cobra.Command
    26  	Dump      *cobra.Command
    27  	Summarize *cobra.Command
    28  	Check     *cobra.Command
    29  
    30  	opts      *pebble.Options
    31  	comparers sstable.Comparers
    32  	fmtKey    keyFormatter
    33  	verbose   bool
    34  
    35  	filterStart key
    36  	filterEnd   key
    37  
    38  	summarizeDur time.Duration
    39  }
    40  
    41  func newManifest(opts *pebble.Options, comparers sstable.Comparers) *manifestT {
    42  	m := &manifestT{
    43  		opts:         opts,
    44  		comparers:    comparers,
    45  		summarizeDur: time.Hour,
    46  	}
    47  	m.fmtKey.mustSet("quoted")
    48  
    49  	m.Root = &cobra.Command{
    50  		Use:   "manifest",
    51  		Short: "manifest introspection tools",
    52  	}
    53  
    54  	// Add dump command
    55  	m.Dump = &cobra.Command{
    56  		Use:   "dump <manifest-files>",
    57  		Short: "print manifest contents",
    58  		Long: `
    59  Print the contents of the MANIFEST files.
    60  `,
    61  		Args: cobra.MinimumNArgs(1),
    62  		Run:  m.runDump,
    63  	}
    64  	m.Dump.Flags().Var(&m.fmtKey, "key", "key formatter")
    65  	m.Dump.Flags().Var(&m.filterStart, "filter-start", "start key filters out all version edits that only reference sstables containing keys strictly before the given key")
    66  	m.Dump.Flags().Var(&m.filterEnd, "filter-end", "end key filters out all version edits that only reference sstables containing keys at or strictly after the given key")
    67  	m.Root.AddCommand(m.Dump)
    68  	m.Root.PersistentFlags().BoolVarP(&m.verbose, "verbose", "v", false, "verbose output")
    69  
    70  	// Add summarize command
    71  	m.Summarize = &cobra.Command{
    72  		Use:   "summarize <manifest-files>",
    73  		Short: "summarize manifest contents",
    74  		Long: `
    75  Summarize the edits to the MANIFEST files over time.
    76  `,
    77  		Args: cobra.MinimumNArgs(1),
    78  		Run:  m.runSummarize,
    79  	}
    80  	m.Root.AddCommand(m.Summarize)
    81  	m.Summarize.Flags().DurationVar(
    82  		&m.summarizeDur, "dur", time.Hour, "bucket duration as a Go duration string (eg, '1h', '15m')")
    83  
    84  	// Add check command
    85  	m.Check = &cobra.Command{
    86  		Use:   "check <manifest-files>",
    87  		Short: "check manifest contents",
    88  		Long: `
    89  Check the contents of the MANIFEST files.
    90  `,
    91  		Args: cobra.MinimumNArgs(1),
    92  		Run:  m.runCheck,
    93  	}
    94  	m.Root.AddCommand(m.Check)
    95  	m.Check.Flags().Var(
    96  		&m.fmtKey, "key", "key formatter")
    97  
    98  	return m
    99  }
   100  
   101  func (m *manifestT) printLevels(cmp base.Compare, stdout io.Writer, v *manifest.Version) {
   102  	for level := range v.Levels {
   103  		if level == 0 && len(v.L0SublevelFiles) > 0 && !v.Levels[level].Empty() {
   104  			for sublevel := len(v.L0SublevelFiles) - 1; sublevel >= 0; sublevel-- {
   105  				fmt.Fprintf(stdout, "--- L0.%d ---\n", sublevel)
   106  				v.L0SublevelFiles[sublevel].Each(func(f *manifest.FileMetadata) {
   107  					if !anyOverlapFile(cmp, f, m.filterStart, m.filterEnd) {
   108  						return
   109  					}
   110  					fmt.Fprintf(stdout, "  %s:%d", f.FileNum, f.Size)
   111  					formatSeqNumRange(stdout, f.SmallestSeqNum, f.LargestSeqNum)
   112  					formatKeyRange(stdout, m.fmtKey, &f.Smallest, &f.Largest)
   113  					fmt.Fprintf(stdout, "\n")
   114  				})
   115  			}
   116  			continue
   117  		}
   118  		fmt.Fprintf(stdout, "--- L%d ---\n", level)
   119  		iter := v.Levels[level].Iter()
   120  		for f := iter.First(); f != nil; f = iter.Next() {
   121  			if !anyOverlapFile(cmp, f, m.filterStart, m.filterEnd) {
   122  				continue
   123  			}
   124  			fmt.Fprintf(stdout, "  %s:%d", f.FileNum, f.Size)
   125  			formatSeqNumRange(stdout, f.SmallestSeqNum, f.LargestSeqNum)
   126  			formatKeyRange(stdout, m.fmtKey, &f.Smallest, &f.Largest)
   127  			fmt.Fprintf(stdout, "\n")
   128  		}
   129  	}
   130  }
   131  
   132  func (m *manifestT) runDump(cmd *cobra.Command, args []string) {
   133  	stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr()
   134  	for _, arg := range args {
   135  		func() {
   136  			f, err := m.opts.FS.Open(arg)
   137  			if err != nil {
   138  				fmt.Fprintf(stderr, "%s\n", err)
   139  				return
   140  			}
   141  			defer f.Close()
   142  
   143  			fmt.Fprintf(stdout, "%s\n", arg)
   144  
   145  			var bve manifest.BulkVersionEdit
   146  			bve.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
   147  			var cmp *base.Comparer
   148  			var editIdx int
   149  			rr := record.NewReader(f, 0 /* logNum */)
   150  			for {
   151  				offset := rr.Offset()
   152  				r, err := rr.Next()
   153  				if err != nil {
   154  					fmt.Fprintf(stdout, "%s\n", err)
   155  					break
   156  				}
   157  
   158  				var ve manifest.VersionEdit
   159  				err = ve.Decode(r)
   160  				if err != nil {
   161  					fmt.Fprintf(stdout, "%s\n", err)
   162  					break
   163  				}
   164  				if err := bve.Accumulate(&ve); err != nil {
   165  					fmt.Fprintf(stdout, "%s\n", err)
   166  					break
   167  				}
   168  
   169  				if cmp != nil && !anyOverlap(cmp.Compare, &ve, m.filterStart, m.filterEnd) {
   170  					continue
   171  				}
   172  
   173  				empty := true
   174  				fmt.Fprintf(stdout, "%d/%d\n", offset, editIdx)
   175  				if ve.ComparerName != "" {
   176  					empty = false
   177  					fmt.Fprintf(stdout, "  comparer:     %s", ve.ComparerName)
   178  					cmp = m.comparers[ve.ComparerName]
   179  					if cmp == nil {
   180  						fmt.Fprintf(stdout, " (unknown)")
   181  					}
   182  					fmt.Fprintf(stdout, "\n")
   183  					m.fmtKey.setForComparer(ve.ComparerName, m.comparers)
   184  				}
   185  				if ve.MinUnflushedLogNum != 0 {
   186  					empty = false
   187  					fmt.Fprintf(stdout, "  log-num:       %d\n", ve.MinUnflushedLogNum)
   188  				}
   189  				if ve.ObsoletePrevLogNum != 0 {
   190  					empty = false
   191  					fmt.Fprintf(stdout, "  prev-log-num:  %d\n", ve.ObsoletePrevLogNum)
   192  				}
   193  				if ve.NextFileNum != 0 {
   194  					empty = false
   195  					fmt.Fprintf(stdout, "  next-file-num: %d\n", ve.NextFileNum)
   196  				}
   197  				if ve.LastSeqNum != 0 {
   198  					empty = false
   199  					fmt.Fprintf(stdout, "  last-seq-num:  %d\n", ve.LastSeqNum)
   200  				}
   201  				entries := make([]manifest.DeletedFileEntry, 0, len(ve.DeletedFiles))
   202  				for df := range ve.DeletedFiles {
   203  					empty = false
   204  					entries = append(entries, df)
   205  				}
   206  				sort.Slice(entries, func(i, j int) bool {
   207  					if entries[i].Level != entries[j].Level {
   208  						return entries[i].Level < entries[j].Level
   209  					}
   210  					return entries[i].FileNum < entries[j].FileNum
   211  				})
   212  				for _, df := range entries {
   213  					fmt.Fprintf(stdout, "  deleted:       L%d %s\n", df.Level, df.FileNum)
   214  				}
   215  				for _, nf := range ve.NewFiles {
   216  					empty = false
   217  					fmt.Fprintf(stdout, "  added:         L%d %s:%d",
   218  						nf.Level, nf.Meta.FileNum, nf.Meta.Size)
   219  					formatSeqNumRange(stdout, nf.Meta.SmallestSeqNum, nf.Meta.LargestSeqNum)
   220  					formatKeyRange(stdout, m.fmtKey, &nf.Meta.Smallest, &nf.Meta.Largest)
   221  					if nf.Meta.CreationTime != 0 {
   222  						fmt.Fprintf(stdout, " (%s)",
   223  							time.Unix(nf.Meta.CreationTime, 0).UTC().Format(time.RFC3339))
   224  					}
   225  					fmt.Fprintf(stdout, "\n")
   226  				}
   227  				if empty {
   228  					// NB: An empty version edit can happen if we log a version edit with
   229  					// a zero field. RocksDB does this with a version edit that contains
   230  					// `LogNum == 0`.
   231  					fmt.Fprintf(stdout, "  <empty>\n")
   232  				}
   233  				editIdx++
   234  			}
   235  
   236  			if cmp != nil {
   237  				v, err := bve.Apply(
   238  					nil /* version */, cmp.Compare, m.fmtKey.fn, 0,
   239  					m.opts.Experimental.ReadCompactionRate,
   240  					nil /* zombies */, manifest.AllowSplitUserKeys,
   241  				)
   242  				if err != nil {
   243  					fmt.Fprintf(stdout, "%s\n", err)
   244  					return
   245  				}
   246  				m.printLevels(cmp.Compare, stdout, v)
   247  			}
   248  		}()
   249  	}
   250  }
   251  
   252  func anyOverlap(cmp base.Compare, ve *manifest.VersionEdit, start, end key) bool {
   253  	if start == nil && end == nil {
   254  		return true
   255  	}
   256  	for _, df := range ve.DeletedFiles {
   257  		if anyOverlapFile(cmp, df, start, end) {
   258  			return true
   259  		}
   260  	}
   261  	for _, nf := range ve.NewFiles {
   262  		if anyOverlapFile(cmp, nf.Meta, start, end) {
   263  			return true
   264  		}
   265  	}
   266  	return false
   267  }
   268  
   269  func anyOverlapFile(cmp base.Compare, f *manifest.FileMetadata, start, end key) bool {
   270  	if f == nil {
   271  		return true
   272  	}
   273  	if start != nil {
   274  		if v := cmp(f.Largest.UserKey, start); v < 0 {
   275  			return false
   276  		} else if f.Largest.IsExclusiveSentinel() && v == 0 {
   277  			return false
   278  		}
   279  	}
   280  	if end != nil && cmp(f.Smallest.UserKey, end) >= 0 {
   281  		return false
   282  	}
   283  	return true
   284  }
   285  
   286  func (m *manifestT) runSummarize(cmd *cobra.Command, args []string) {
   287  	for _, arg := range args {
   288  		err := m.runSummarizeOne(cmd.OutOrStdout(), arg)
   289  		if err != nil {
   290  			fmt.Fprintf(cmd.OutOrStderr(), "%s\n", err)
   291  		}
   292  	}
   293  }
   294  
   295  func (m *manifestT) runSummarizeOne(stdout io.Writer, arg string) error {
   296  	f, err := m.opts.FS.Open(arg)
   297  	if err != nil {
   298  		return err
   299  	}
   300  	defer f.Close()
   301  	fmt.Fprintf(stdout, "%s\n", arg)
   302  
   303  	type summaryBucket struct {
   304  		bytesAdded      [manifest.NumLevels]uint64
   305  		bytesCompactOut [manifest.NumLevels]uint64
   306  	}
   307  	var (
   308  		bve           manifest.BulkVersionEdit
   309  		newestOverall time.Time
   310  		oldestOverall time.Time // oldest after initial version edit
   311  		buckets       = map[time.Time]*summaryBucket{}
   312  		metadatas     = map[base.FileNum]*manifest.FileMetadata{}
   313  	)
   314  	bve.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
   315  	rr := record.NewReader(f, 0 /* logNum */)
   316  	for i := 0; ; i++ {
   317  		r, err := rr.Next()
   318  		if err == io.EOF {
   319  			break
   320  		} else if err != nil {
   321  			return err
   322  		}
   323  
   324  		var ve manifest.VersionEdit
   325  		err = ve.Decode(r)
   326  		if err != nil {
   327  			return err
   328  		}
   329  		if err := bve.Accumulate(&ve); err != nil {
   330  			return err
   331  		}
   332  
   333  		veNewest, veOldest := newestOverall, newestOverall
   334  		for _, nf := range ve.NewFiles {
   335  			_, seen := metadatas[nf.Meta.FileNum]
   336  			metadatas[nf.Meta.FileNum] = nf.Meta
   337  			if nf.Meta.CreationTime == 0 {
   338  				continue
   339  			}
   340  
   341  			t := time.Unix(nf.Meta.CreationTime, 0).UTC()
   342  			if veNewest.Before(t) {
   343  				veNewest = t
   344  			}
   345  			// Only update the oldest if we haven't already seen this
   346  			// file; it might've been moved in which case the sstable's
   347  			// creation time is from when it was originally created.
   348  			if veOldest.After(t) && !seen {
   349  				veOldest = t
   350  			}
   351  		}
   352  		// Ratchet up the most recent timestamp we've seen.
   353  		if newestOverall.Before(veNewest) {
   354  			newestOverall = veNewest
   355  		}
   356  
   357  		if i == 0 || newestOverall.IsZero() {
   358  			continue
   359  		}
   360  		// Update oldestOverall once, when we encounter the first version edit
   361  		// at index >= 1. It should be approximately the start time of the
   362  		// manifest.
   363  		if !newestOverall.IsZero() && oldestOverall.IsZero() {
   364  			oldestOverall = newestOverall
   365  		}
   366  
   367  		bucketKey := newestOverall.Truncate(m.summarizeDur)
   368  		b := buckets[bucketKey]
   369  		if b == nil {
   370  			b = &summaryBucket{}
   371  			buckets[bucketKey] = b
   372  		}
   373  
   374  		// Increase `bytesAdded` for any version edits that only add files.
   375  		// These are either flushes or ingests.
   376  		if len(ve.NewFiles) > 0 && len(ve.DeletedFiles) == 0 {
   377  			for _, nf := range ve.NewFiles {
   378  				b.bytesAdded[nf.Level] += nf.Meta.Size
   379  			}
   380  			continue
   381  		}
   382  
   383  		// Increase `bytesCompactOut` for the input level of any compactions
   384  		// that remove bytes from a level (excluding intra-L0 compactions).
   385  		// compactions.
   386  		destLevel := -1
   387  		if len(ve.NewFiles) > 0 {
   388  			destLevel = ve.NewFiles[0].Level
   389  		}
   390  		for dfe := range ve.DeletedFiles {
   391  			if dfe.Level != destLevel {
   392  				b.bytesCompactOut[dfe.Level] += metadatas[dfe.FileNum].Size
   393  			}
   394  		}
   395  	}
   396  
   397  	formatUint64 := func(v uint64, _ time.Duration) string {
   398  		if v == 0 {
   399  			return "."
   400  		}
   401  		return humanize.Bytes.Uint64(v).String()
   402  	}
   403  	formatRate := func(v uint64, dur time.Duration) string {
   404  		if v == 0 {
   405  			return "."
   406  		}
   407  		secs := dur.Seconds()
   408  		if secs == 0 {
   409  			secs = 1
   410  		}
   411  		return humanize.Bytes.Uint64(uint64(float64(v)/secs)).String() + "/s"
   412  	}
   413  
   414  	if newestOverall.IsZero() {
   415  		fmt.Fprintf(stdout, "(no timestamps)\n")
   416  	} else {
   417  		// NB: bt begins unaligned with the bucket duration (m.summarizeDur),
   418  		// but after the first bucket will always be aligned.
   419  		for bi, bt := 0, oldestOverall; !bt.After(newestOverall); bi, bt = bi+1, bt.Truncate(m.summarizeDur).Add(m.summarizeDur) {
   420  			// Truncate the start time to calculate the bucket key, and
   421  			// retrieve the appropriate bucket.
   422  			bk := bt.Truncate(m.summarizeDur)
   423  			var bucket summaryBucket
   424  			if buckets[bk] != nil {
   425  				bucket = *buckets[bk]
   426  			}
   427  
   428  			if bi%10 == 0 {
   429  				fmt.Fprintf(stdout, "                     ")
   430  				fmt.Fprintf(stdout, "_______L0_______L1_______L2_______L3_______L4_______L5_______L6_____TOTAL\n")
   431  			}
   432  			fmt.Fprintf(stdout, "%s\n", bt.Format(time.RFC3339))
   433  
   434  			// Compute the bucket duration. It may < `m.summarizeDur` if this is
   435  			// the first or last bucket.
   436  			bucketEnd := bt.Truncate(m.summarizeDur).Add(m.summarizeDur)
   437  			if bucketEnd.After(newestOverall) {
   438  				bucketEnd = newestOverall
   439  			}
   440  			dur := bucketEnd.Sub(bt)
   441  
   442  			stats := []struct {
   443  				label  string
   444  				format func(uint64, time.Duration) string
   445  				vals   [manifest.NumLevels]uint64
   446  			}{
   447  				{"Ingest+Flush", formatUint64, bucket.bytesAdded},
   448  				{"Ingest+Flush", formatRate, bucket.bytesAdded},
   449  				{"Compact (out)", formatUint64, bucket.bytesCompactOut},
   450  				{"Compact (out)", formatRate, bucket.bytesCompactOut},
   451  			}
   452  			for _, stat := range stats {
   453  				var sum uint64
   454  				for _, v := range stat.vals {
   455  					sum += v
   456  				}
   457  				fmt.Fprintf(stdout, "%20s   %8s %8s %8s %8s %8s %8s %8s %8s\n",
   458  					stat.label,
   459  					stat.format(stat.vals[0], dur),
   460  					stat.format(stat.vals[1], dur),
   461  					stat.format(stat.vals[2], dur),
   462  					stat.format(stat.vals[3], dur),
   463  					stat.format(stat.vals[4], dur),
   464  					stat.format(stat.vals[5], dur),
   465  					stat.format(stat.vals[6], dur),
   466  					stat.format(sum, dur))
   467  			}
   468  		}
   469  		fmt.Fprintf(stdout, "%s\n", newestOverall.Format(time.RFC3339))
   470  	}
   471  
   472  	dur := newestOverall.Sub(oldestOverall)
   473  	fmt.Fprintf(stdout, "---\n")
   474  	fmt.Fprintf(stdout, "Estimated start time: %s\n", oldestOverall.Format(time.RFC3339))
   475  	fmt.Fprintf(stdout, "Estimated end time:   %s\n", newestOverall.Format(time.RFC3339))
   476  	fmt.Fprintf(stdout, "Estimated duration:   %s\n", dur.String())
   477  
   478  	return nil
   479  }
   480  
   481  func (m *manifestT) runCheck(cmd *cobra.Command, args []string) {
   482  	stdout, stderr := cmd.OutOrStdout(), cmd.OutOrStderr()
   483  	ok := true
   484  	for _, arg := range args {
   485  		func() {
   486  			f, err := m.opts.FS.Open(arg)
   487  			if err != nil {
   488  				fmt.Fprintf(stderr, "%s\n", err)
   489  				ok = false
   490  				return
   491  			}
   492  			defer f.Close()
   493  
   494  			var v *manifest.Version
   495  			var cmp *base.Comparer
   496  			rr := record.NewReader(f, 0 /* logNum */)
   497  			// Contains the FileMetadata needed by BulkVersionEdit.Apply.
   498  			// It accumulates the additions since later edits contain
   499  			// deletions of earlier added files.
   500  			addedByFileNum := make(map[base.FileNum]*manifest.FileMetadata)
   501  			for {
   502  				offset := rr.Offset()
   503  				r, err := rr.Next()
   504  				if err != nil {
   505  					if err == io.EOF {
   506  						break
   507  					}
   508  					fmt.Fprintf(stdout, "%s: offset: %d err: %s\n", arg, offset, err)
   509  					ok = false
   510  					break
   511  				}
   512  
   513  				var ve manifest.VersionEdit
   514  				err = ve.Decode(r)
   515  				if err != nil {
   516  					fmt.Fprintf(stdout, "%s: offset: %d err: %s\n", arg, offset, err)
   517  					ok = false
   518  					break
   519  				}
   520  				var bve manifest.BulkVersionEdit
   521  				bve.AddedByFileNum = addedByFileNum
   522  				if err := bve.Accumulate(&ve); err != nil {
   523  					fmt.Fprintf(stderr, "%s\n", err)
   524  					ok = false
   525  					return
   526  				}
   527  
   528  				empty := true
   529  				if ve.ComparerName != "" {
   530  					empty = false
   531  					cmp = m.comparers[ve.ComparerName]
   532  					if cmp == nil {
   533  						fmt.Fprintf(stdout, "%s: offset: %d comparer %s not found",
   534  							arg, offset, ve.ComparerName)
   535  						ok = false
   536  						break
   537  					}
   538  					m.fmtKey.setForComparer(ve.ComparerName, m.comparers)
   539  				}
   540  				empty = empty && ve.MinUnflushedLogNum == 0 && ve.ObsoletePrevLogNum == 0 &&
   541  					ve.LastSeqNum == 0 && len(ve.DeletedFiles) == 0 &&
   542  					len(ve.NewFiles) == 0
   543  				if empty {
   544  					continue
   545  				}
   546  				// TODO(sbhola): add option to Apply that reports all errors instead of
   547  				// one error.
   548  				newv, err := bve.Apply(v, cmp.Compare, m.fmtKey.fn, 0, m.opts.Experimental.ReadCompactionRate, nil /* zombies */, manifest.AllowSplitUserKeys)
   549  				if err != nil {
   550  					fmt.Fprintf(stdout, "%s: offset: %d err: %s\n",
   551  						arg, offset, err)
   552  					fmt.Fprintf(stdout, "Version state before failed Apply\n")
   553  					m.printLevels(cmp.Compare, stdout, v)
   554  					fmt.Fprintf(stdout, "Version edit that failed\n")
   555  					for df := range ve.DeletedFiles {
   556  						fmt.Fprintf(stdout, "  deleted: L%d %s\n", df.Level, df.FileNum)
   557  					}
   558  					for _, nf := range ve.NewFiles {
   559  						fmt.Fprintf(stdout, "  added: L%d %s:%d",
   560  							nf.Level, nf.Meta.FileNum, nf.Meta.Size)
   561  						formatSeqNumRange(stdout, nf.Meta.SmallestSeqNum, nf.Meta.LargestSeqNum)
   562  						formatKeyRange(stdout, m.fmtKey, &nf.Meta.Smallest, &nf.Meta.Largest)
   563  						fmt.Fprintf(stdout, "\n")
   564  					}
   565  					ok = false
   566  					break
   567  				}
   568  				v = newv
   569  			}
   570  		}()
   571  	}
   572  	if ok {
   573  		fmt.Fprintf(stdout, "OK\n")
   574  	}
   575  }