github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/tool/lsm.go (about)

     1  // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package tool
     6  
     7  import (
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"math"
    13  	"slices"
    14  
    15  	"github.com/cockroachdb/errors"
    16  	"github.com/cockroachdb/pebble"
    17  	"github.com/cockroachdb/pebble/internal/base"
    18  	"github.com/cockroachdb/pebble/internal/manifest"
    19  	"github.com/cockroachdb/pebble/record"
    20  	"github.com/cockroachdb/pebble/sstable"
    21  	"github.com/spf13/cobra"
    22  )
    23  
    24  //go:generate ./make_lsm_data.sh
    25  
    26  type lsmFileMetadata struct {
    27  	Size           uint64
    28  	Smallest       int // ID of smallest key
    29  	Largest        int // ID of largest key
    30  	SmallestSeqNum uint64
    31  	LargestSeqNum  uint64
    32  	Virtual        bool
    33  }
    34  
    35  type lsmVersionEdit struct {
    36  	// Reason for the edit: flushed, ingested, compacted, added.
    37  	Reason string
    38  	// Map from level to files added to the level.
    39  	Added map[int][]base.FileNum `json:",omitempty"`
    40  	// Map from level to files deleted from the level.
    41  	Deleted map[int][]base.FileNum `json:",omitempty"`
    42  	// L0 sublevels for any files with changed sublevels so far.
    43  	Sublevels map[base.FileNum]int `json:",omitempty"`
    44  }
    45  
    46  type lsmKey struct {
    47  	Pretty string
    48  	SeqNum uint64
    49  	Kind   int
    50  }
    51  
    52  type lsmState struct {
    53  	Manifest  string
    54  	Edits     []lsmVersionEdit                 `json:",omitempty"`
    55  	Files     map[base.FileNum]lsmFileMetadata `json:",omitempty"`
    56  	Keys      []lsmKey                         `json:",omitempty"`
    57  	StartEdit int64
    58  }
    59  
    60  type lsmT struct {
    61  	Root *cobra.Command
    62  
    63  	// Configuration.
    64  	opts      *pebble.Options
    65  	comparers sstable.Comparers
    66  
    67  	fmtKey    keyFormatter
    68  	embed     bool
    69  	pretty    bool
    70  	startEdit int64
    71  	endEdit   int64
    72  	editCount int64
    73  
    74  	cmp    *base.Comparer
    75  	state  lsmState
    76  	keyMap map[lsmKey]int
    77  }
    78  
    79  func newLSM(opts *pebble.Options, comparers sstable.Comparers) *lsmT {
    80  	l := &lsmT{
    81  		opts:      opts,
    82  		comparers: comparers,
    83  	}
    84  	l.fmtKey.mustSet("quoted")
    85  
    86  	l.Root = &cobra.Command{
    87  		Use:   "lsm <manifest>",
    88  		Short: "LSM visualization tool",
    89  		Long: `
    90  Visualize the evolution of an LSM from the version edits in a MANIFEST.
    91  
    92  Given an input MANIFEST, output an HTML file containing a visualization showing
    93  the evolution of the LSM. Each version edit in the MANIFEST becomes a single
    94  step in the visualization. The 7 levels of the LSM are depicted with each
    95  sstable represented as a 1-pixel wide rectangle. The height of the rectangle is
    96  proportional to the size (in bytes) of the sstable. The sstables are displayed
    97  in the same order as they occur in the LSM. Note that the sstables from
    98  different levels are NOT aligned according to their start and end keys (doing so
    99  is also interesting, but it works against using the area of the rectangle to
   100  indicate size).
   101  `,
   102  		Args: cobra.ExactArgs(1),
   103  		RunE: l.runLSM,
   104  	}
   105  
   106  	l.Root.Flags().Var(&l.fmtKey, "key", "key formatter")
   107  	l.Root.Flags().BoolVar(&l.embed, "embed", true, "embed javascript in HTML (disable for development)")
   108  	l.Root.Flags().BoolVar(&l.pretty, "pretty", false, "pretty JSON output")
   109  	l.Root.Flags().Int64Var(&l.startEdit, "start-edit", 0, "starting edit # to include in visualization")
   110  	l.Root.Flags().Int64Var(&l.endEdit, "end-edit", math.MaxInt64, "ending edit # to include in visualization")
   111  	l.Root.Flags().Int64Var(&l.editCount, "edit-count", math.MaxInt64, "count of edits to include in visualization")
   112  	return l
   113  }
   114  
   115  func (l *lsmT) isFlagSet(name string) bool {
   116  	return l.Root.Flags().Changed(name)
   117  }
   118  
   119  func (l *lsmT) validateFlags() error {
   120  	if l.isFlagSet("edit-count") {
   121  		if l.isFlagSet("start-edit") && l.isFlagSet("end-edit") {
   122  			return errors.Errorf("edit-count cannot be provided with both start-edit and end-edit")
   123  		} else if l.isFlagSet("end-edit") {
   124  			return errors.Errorf("cannot use edit-count with end-edit, use start-edit and end-edit instead")
   125  		}
   126  	}
   127  
   128  	if l.startEdit > l.endEdit {
   129  		return errors.Errorf("start-edit cannot be after end-edit")
   130  	}
   131  
   132  	return nil
   133  }
   134  
   135  func (l *lsmT) runLSM(cmd *cobra.Command, args []string) error {
   136  	err := l.validateFlags()
   137  	if err != nil {
   138  		return err
   139  	}
   140  
   141  	edits := l.readManifest(args[0])
   142  	if edits == nil {
   143  		return nil
   144  	}
   145  
   146  	if l.startEdit > 0 {
   147  		edits, err = l.coalesceEdits(edits)
   148  		if err != nil {
   149  			return err
   150  		}
   151  	}
   152  	if l.endEdit < int64(len(edits)) {
   153  		edits = edits[:l.endEdit-l.startEdit+1]
   154  	}
   155  	if l.editCount < int64(len(edits)) {
   156  		edits = edits[:l.editCount]
   157  	}
   158  
   159  	l.buildKeys(edits)
   160  	err = l.buildEdits(edits)
   161  	if err != nil {
   162  		return err
   163  	}
   164  
   165  	w := l.Root.OutOrStdout()
   166  
   167  	fmt.Fprintf(w, `<!DOCTYPE html>
   168  <html>
   169  <head>
   170  <meta charset="utf-8">
   171  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
   172  `)
   173  	if l.embed {
   174  		fmt.Fprintf(w, "<style>%s</style>\n", lsmDataCSS)
   175  	} else {
   176  		fmt.Fprintf(w, "<link rel=\"stylesheet\" href=\"data/lsm.css\">\n")
   177  	}
   178  	fmt.Fprintf(w, "</head>\n<body>\n")
   179  	if l.embed {
   180  		fmt.Fprintf(w, "<script src=\"https://d3js.org/d3.v5.min.js\"></script>\n")
   181  	} else {
   182  		fmt.Fprintf(w, "<script src=\"data/d3.v5.min.js\"></script>\n")
   183  	}
   184  	fmt.Fprintf(w, "<script type=\"text/javascript\">\n")
   185  	fmt.Fprintf(w, "data = %s\n", l.formatJSON(l.state))
   186  	fmt.Fprintf(w, "</script>\n")
   187  	if l.embed {
   188  		fmt.Fprintf(w, "<script type=\"text/javascript\">%s</script>\n", lsmDataJS)
   189  	} else {
   190  		fmt.Fprintf(w, "<script src=\"data/lsm.js\"></script>\n")
   191  	}
   192  	fmt.Fprintf(w, "</body>\n</html>\n")
   193  
   194  	return nil
   195  }
   196  
   197  func (l *lsmT) readManifest(path string) []*manifest.VersionEdit {
   198  	f, err := l.opts.FS.Open(path)
   199  	if err != nil {
   200  		fmt.Fprintf(l.Root.OutOrStderr(), "%s\n", err)
   201  		return nil
   202  	}
   203  	defer f.Close()
   204  
   205  	l.state.Manifest = path
   206  
   207  	var edits []*manifest.VersionEdit
   208  	w := l.Root.OutOrStdout()
   209  	rr := record.NewReader(f, 0 /* logNum */)
   210  	for i := 0; ; i++ {
   211  		r, err := rr.Next()
   212  		if err != nil {
   213  			if err != io.EOF {
   214  				fmt.Fprintf(w, "%s\n", err)
   215  			}
   216  			break
   217  		}
   218  
   219  		ve := &manifest.VersionEdit{}
   220  		err = ve.Decode(r)
   221  		if err != nil {
   222  			fmt.Fprintf(w, "%s\n", err)
   223  			break
   224  		}
   225  		edits = append(edits, ve)
   226  
   227  		if ve.ComparerName != "" {
   228  			l.cmp = l.comparers[ve.ComparerName]
   229  			if l.cmp == nil {
   230  				fmt.Fprintf(w, "%d: unknown comparer %q\n", i, ve.ComparerName)
   231  				return nil
   232  			}
   233  			l.fmtKey.setForComparer(ve.ComparerName, l.comparers)
   234  		} else if l.cmp == nil {
   235  			l.cmp = base.DefaultComparer
   236  		}
   237  	}
   238  	return edits
   239  }
   240  
   241  func (l *lsmT) buildKeys(edits []*manifest.VersionEdit) {
   242  	var keys []base.InternalKey
   243  	for _, ve := range edits {
   244  		for i := range ve.NewFiles {
   245  			nf := &ve.NewFiles[i]
   246  			keys = append(keys, nf.Meta.Smallest)
   247  			keys = append(keys, nf.Meta.Largest)
   248  		}
   249  	}
   250  
   251  	l.keyMap = make(map[lsmKey]int)
   252  
   253  	slices.SortFunc(keys, func(a, b base.InternalKey) int {
   254  		return base.InternalCompare(l.cmp.Compare, a, b)
   255  	})
   256  
   257  	for i := range keys {
   258  		k := &keys[i]
   259  		if i > 0 && base.InternalCompare(l.cmp.Compare, keys[i-1], keys[i]) == 0 {
   260  			continue
   261  		}
   262  		j := len(l.state.Keys)
   263  		l.state.Keys = append(l.state.Keys, lsmKey{
   264  			Pretty: fmt.Sprint(l.fmtKey.fn(k.UserKey)),
   265  			SeqNum: k.SeqNum(),
   266  			Kind:   int(k.Kind()),
   267  		})
   268  		l.keyMap[lsmKey{string(k.UserKey), k.SeqNum(), int(k.Kind())}] = j
   269  	}
   270  }
   271  
   272  func (l *lsmT) buildEdits(edits []*manifest.VersionEdit) error {
   273  	l.state.Edits = nil
   274  	l.state.StartEdit = l.startEdit
   275  	l.state.Files = make(map[base.FileNum]lsmFileMetadata)
   276  	var currentFiles [manifest.NumLevels][]*manifest.FileMetadata
   277  
   278  	backings := make(map[base.DiskFileNum]*manifest.FileBacking)
   279  
   280  	for _, ve := range edits {
   281  		for _, i := range ve.CreatedBackingTables {
   282  			backings[i.DiskFileNum] = i
   283  		}
   284  		if len(ve.DeletedFiles) == 0 && len(ve.NewFiles) == 0 {
   285  			continue
   286  		}
   287  
   288  		edit := lsmVersionEdit{
   289  			Reason:  l.reason(ve),
   290  			Added:   make(map[int][]base.FileNum),
   291  			Deleted: make(map[int][]base.FileNum),
   292  		}
   293  
   294  		for j := range ve.NewFiles {
   295  			nf := &ve.NewFiles[j]
   296  			if b, ok := backings[nf.BackingFileNum]; ok && nf.Meta.Virtual {
   297  				nf.Meta.FileBacking = b
   298  			}
   299  			if _, ok := l.state.Files[nf.Meta.FileNum]; !ok {
   300  				l.state.Files[nf.Meta.FileNum] = lsmFileMetadata{
   301  					Size:           nf.Meta.Size,
   302  					Smallest:       l.findKey(nf.Meta.Smallest),
   303  					Largest:        l.findKey(nf.Meta.Largest),
   304  					SmallestSeqNum: nf.Meta.SmallestSeqNum,
   305  					LargestSeqNum:  nf.Meta.LargestSeqNum,
   306  					Virtual:        nf.Meta.Virtual,
   307  				}
   308  			}
   309  			edit.Added[nf.Level] = append(edit.Added[nf.Level], nf.Meta.FileNum)
   310  			currentFiles[nf.Level] = append(currentFiles[nf.Level], nf.Meta)
   311  		}
   312  
   313  		for df := range ve.DeletedFiles {
   314  			edit.Deleted[df.Level] = append(edit.Deleted[df.Level], df.FileNum)
   315  			for j, f := range currentFiles[df.Level] {
   316  				if f.FileNum == df.FileNum {
   317  					copy(currentFiles[df.Level][j:], currentFiles[df.Level][j+1:])
   318  					currentFiles[df.Level] = currentFiles[df.Level][:len(currentFiles[df.Level])-1]
   319  				}
   320  			}
   321  		}
   322  
   323  		v := manifest.NewVersion(l.cmp.Compare, l.fmtKey.fn, 0, currentFiles)
   324  		edit.Sublevels = make(map[base.FileNum]int)
   325  		for sublevel, files := range v.L0SublevelFiles {
   326  			iter := files.Iter()
   327  			for f := iter.First(); f != nil; f = iter.Next() {
   328  				if len(l.state.Edits) > 0 {
   329  					lastEdit := l.state.Edits[len(l.state.Edits)-1]
   330  					if sublevel2, ok := lastEdit.Sublevels[f.FileNum]; ok && sublevel == sublevel2 {
   331  						continue
   332  					}
   333  				}
   334  				edit.Sublevels[f.FileNum] = sublevel
   335  			}
   336  		}
   337  		l.state.Edits = append(l.state.Edits, edit)
   338  	}
   339  
   340  	if l.state.Edits == nil {
   341  		return errors.Errorf("there are no edits in [start-edit, end-edit], which add or delete files")
   342  	}
   343  	return nil
   344  }
   345  
   346  func (l *lsmT) coalesceEdits(edits []*manifest.VersionEdit) ([]*manifest.VersionEdit, error) {
   347  	if l.startEdit >= int64(len(edits)) {
   348  		return nil, errors.Errorf("start-edit is more than the number of edits, %d", len(edits))
   349  	}
   350  
   351  	be := manifest.BulkVersionEdit{}
   352  	be.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
   353  
   354  	// Coalesce all edits from [0, l.startEdit) into a BulkVersionEdit.
   355  	for _, ve := range edits[:l.startEdit] {
   356  		err := be.Accumulate(ve)
   357  		if err != nil {
   358  			return nil, err
   359  		}
   360  	}
   361  
   362  	startingEdit := edits[l.startEdit]
   363  	var beNewFiles []manifest.NewFileEntry
   364  	beDeletedFiles := make(map[manifest.DeletedFileEntry]*manifest.FileMetadata)
   365  
   366  	for level, deletedFiles := range be.Deleted {
   367  		for _, file := range deletedFiles {
   368  			dfe := manifest.DeletedFileEntry{
   369  				Level:   level,
   370  				FileNum: file.FileNum,
   371  			}
   372  			beDeletedFiles[dfe] = file
   373  		}
   374  	}
   375  
   376  	// Filter out added files that were also deleted in the BulkVersionEdit.
   377  	for level, newFiles := range be.Added {
   378  		for _, file := range newFiles {
   379  			dfe := manifest.DeletedFileEntry{
   380  				Level:   level,
   381  				FileNum: file.FileNum,
   382  			}
   383  
   384  			if _, ok := beDeletedFiles[dfe]; !ok {
   385  				beNewFiles = append(beNewFiles, manifest.NewFileEntry{
   386  					Level: level,
   387  					Meta:  file,
   388  				})
   389  			}
   390  		}
   391  	}
   392  	startingEdit.NewFiles = append(beNewFiles, startingEdit.NewFiles...)
   393  
   394  	edits = edits[l.startEdit:]
   395  	return edits, nil
   396  }
   397  
   398  func (l *lsmT) findKey(key base.InternalKey) int {
   399  	return l.keyMap[lsmKey{string(key.UserKey), key.SeqNum(), int(key.Kind())}]
   400  }
   401  
   402  func (l *lsmT) reason(ve *manifest.VersionEdit) string {
   403  	if len(ve.DeletedFiles) > 0 {
   404  		return "compacted"
   405  	}
   406  	if ve.MinUnflushedLogNum != 0 {
   407  		return "flushed"
   408  	}
   409  	for i := range ve.NewFiles {
   410  		nf := &ve.NewFiles[i]
   411  		if nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum {
   412  			return "ingested"
   413  		}
   414  	}
   415  	return "added"
   416  }
   417  
   418  func (l *lsmT) formatJSON(v interface{}) string {
   419  	if l.pretty {
   420  		return l.prettyJSON(v)
   421  	}
   422  	return l.uglyJSON(v)
   423  }
   424  
   425  func (l *lsmT) uglyJSON(v interface{}) string {
   426  	data, err := json.Marshal(v)
   427  	if err != nil {
   428  		log.Fatal(err)
   429  	}
   430  	return string(data)
   431  }
   432  
   433  func (l *lsmT) prettyJSON(v interface{}) string {
   434  	data, err := json.MarshalIndent(v, "", "\t")
   435  	if err != nil {
   436  		log.Fatal(err)
   437  	}
   438  	return string(data)
   439  }