github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/tool/lsm.go (about)

     1  // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package tool
     6  
     7  import (
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"math"
    13  	"sort"
    14  
    15  	"github.com/cockroachdb/errors"
    16  	"github.com/cockroachdb/pebble"
    17  	"github.com/cockroachdb/pebble/internal/base"
    18  	"github.com/cockroachdb/pebble/internal/manifest"
    19  	"github.com/cockroachdb/pebble/record"
    20  	"github.com/cockroachdb/pebble/sstable"
    21  	"github.com/spf13/cobra"
    22  )
    23  
    24  //go:generate ./make_lsm_data.sh
    25  
    26  type lsmFileMetadata struct {
    27  	Size           uint64
    28  	Smallest       int // ID of smallest key
    29  	Largest        int // ID of largest key
    30  	SmallestSeqNum uint64
    31  	LargestSeqNum  uint64
    32  }
    33  
    34  type lsmVersionEdit struct {
    35  	// Reason for the edit: flushed, ingested, compacted, added.
    36  	Reason string
    37  	// Map from level to files added to the level.
    38  	Added map[int][]base.FileNum `json:",omitempty"`
    39  	// Map from level to files deleted from the level.
    40  	Deleted map[int][]base.FileNum `json:",omitempty"`
    41  	// L0 sublevels for any files with changed sublevels so far.
    42  	Sublevels map[base.FileNum]int `json:",omitempty"`
    43  }
    44  
    45  type lsmKey struct {
    46  	Pretty string
    47  	SeqNum uint64
    48  	Kind   int
    49  }
    50  
    51  type lsmState struct {
    52  	Manifest  string
    53  	Edits     []lsmVersionEdit                 `json:",omitempty"`
    54  	Files     map[base.FileNum]lsmFileMetadata `json:",omitempty"`
    55  	Keys      []lsmKey                         `json:",omitempty"`
    56  	StartEdit int64
    57  }
    58  
    59  type lsmT struct {
    60  	Root *cobra.Command
    61  
    62  	// Configuration.
    63  	opts      *pebble.Options
    64  	comparers sstable.Comparers
    65  
    66  	fmtKey    keyFormatter
    67  	embed     bool
    68  	pretty    bool
    69  	startEdit int64
    70  	endEdit   int64
    71  	editCount int64
    72  
    73  	cmp    *base.Comparer
    74  	state  lsmState
    75  	keyMap map[lsmKey]int
    76  }
    77  
    78  func newLSM(opts *pebble.Options, comparers sstable.Comparers) *lsmT {
    79  	l := &lsmT{
    80  		opts:      opts,
    81  		comparers: comparers,
    82  	}
    83  	l.fmtKey.mustSet("quoted")
    84  
    85  	l.Root = &cobra.Command{
    86  		Use:   "lsm <manifest>",
    87  		Short: "LSM visualization tool",
    88  		Long: `
    89  Visualize the evolution of an LSM from the version edits in a MANIFEST.
    90  
    91  Given an input MANIFEST, output an HTML file containing a visualization showing
    92  the evolution of the LSM. Each version edit in the MANIFEST becomes a single
    93  step in the visualization. The 7 levels of the LSM are depicted with each
    94  sstable represented as a 1-pixel wide rectangle. The height of the rectangle is
    95  proportional to the size (in bytes) of the sstable. The sstables are displayed
    96  in the same order as they occur in the LSM. Note that the sstables from
    97  different levels are NOT aligned according to their start and end keys (doing so
    98  is also interesting, but it works against using the area of the rectangle to
    99  indicate size).
   100  `,
   101  		Args: cobra.ExactArgs(1),
   102  		RunE: l.runLSM,
   103  	}
   104  
   105  	l.Root.Flags().Var(&l.fmtKey, "key", "key formatter")
   106  	l.Root.Flags().BoolVar(&l.embed, "embed", true, "embed javascript in HTML (disable for development)")
   107  	l.Root.Flags().BoolVar(&l.pretty, "pretty", false, "pretty JSON output")
   108  	l.Root.Flags().Int64Var(&l.startEdit, "start-edit", 0, "starting edit # to include in visualization")
   109  	l.Root.Flags().Int64Var(&l.endEdit, "end-edit", math.MaxInt64, "ending edit # to include in visualization")
   110  	l.Root.Flags().Int64Var(&l.editCount, "edit-count", math.MaxInt64, "count of edits to include in visualization")
   111  	return l
   112  }
   113  
   114  func (l *lsmT) isFlagSet(name string) bool {
   115  	return l.Root.Flags().Changed(name)
   116  }
   117  
   118  func (l *lsmT) validateFlags() error {
   119  	if l.isFlagSet("edit-count") {
   120  		if l.isFlagSet("start-edit") && l.isFlagSet("end-edit") {
   121  			return errors.Errorf("edit-count cannot be provided with both start-edit and end-edit")
   122  		} else if l.isFlagSet("end-edit") {
   123  			return errors.Errorf("cannot use edit-count with end-edit, use start-edit and end-edit instead")
   124  		}
   125  	}
   126  
   127  	if l.startEdit > l.endEdit {
   128  		return errors.Errorf("start-edit cannot be after end-edit")
   129  	}
   130  
   131  	return nil
   132  }
   133  
   134  func (l *lsmT) runLSM(cmd *cobra.Command, args []string) error {
   135  	err := l.validateFlags()
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	edits := l.readManifest(args[0])
   141  	if edits == nil {
   142  		return nil
   143  	}
   144  
   145  	if l.startEdit > 0 {
   146  		edits, err = l.coalesceEdits(edits)
   147  		if err != nil {
   148  			return err
   149  		}
   150  	}
   151  	if l.endEdit < int64(len(edits)) {
   152  		edits = edits[:l.endEdit-l.startEdit+1]
   153  	}
   154  	if l.editCount < int64(len(edits)) {
   155  		edits = edits[:l.editCount]
   156  	}
   157  
   158  	l.buildKeys(edits)
   159  	err = l.buildEdits(edits)
   160  	if err != nil {
   161  		return err
   162  	}
   163  
   164  	w := l.Root.OutOrStdout()
   165  
   166  	fmt.Fprintf(w, `<!DOCTYPE html>
   167  <html>
   168  <head>
   169  <meta charset="utf-8">
   170  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
   171  `)
   172  	if l.embed {
   173  		fmt.Fprintf(w, "<style>%s</style>\n", lsmDataCSS)
   174  	} else {
   175  		fmt.Fprintf(w, "<link rel=\"stylesheet\" href=\"data/lsm.css\">\n")
   176  	}
   177  	fmt.Fprintf(w, "</head>\n<body>\n")
   178  	if l.embed {
   179  		fmt.Fprintf(w, "<script src=\"https://d3js.org/d3.v5.min.js\"></script>\n")
   180  	} else {
   181  		fmt.Fprintf(w, "<script src=\"data/d3.v5.min.js\"></script>\n")
   182  	}
   183  	fmt.Fprintf(w, "<script type=\"text/javascript\">\n")
   184  	fmt.Fprintf(w, "data = %s\n", l.formatJSON(l.state))
   185  	fmt.Fprintf(w, "</script>\n")
   186  	if l.embed {
   187  		fmt.Fprintf(w, "<script type=\"text/javascript\">%s</script>\n", lsmDataJS)
   188  	} else {
   189  		fmt.Fprintf(w, "<script src=\"data/lsm.js\"></script>\n")
   190  	}
   191  	fmt.Fprintf(w, "</body>\n</html>\n")
   192  
   193  	return nil
   194  }
   195  
   196  func (l *lsmT) readManifest(path string) []*manifest.VersionEdit {
   197  	f, err := l.opts.FS.Open(path)
   198  	if err != nil {
   199  		fmt.Fprintf(l.Root.OutOrStderr(), "%s\n", err)
   200  		return nil
   201  	}
   202  	defer f.Close()
   203  
   204  	l.state.Manifest = path
   205  
   206  	var edits []*manifest.VersionEdit
   207  	w := l.Root.OutOrStdout()
   208  	rr := record.NewReader(f, 0 /* logNum */)
   209  	for i := 0; ; i++ {
   210  		r, err := rr.Next()
   211  		if err != nil {
   212  			if err != io.EOF {
   213  				fmt.Fprintf(w, "%s\n", err)
   214  			}
   215  			break
   216  		}
   217  
   218  		ve := &manifest.VersionEdit{}
   219  		err = ve.Decode(r)
   220  		if err != nil {
   221  			fmt.Fprintf(w, "%s\n", err)
   222  			break
   223  		}
   224  		edits = append(edits, ve)
   225  
   226  		if ve.ComparerName != "" {
   227  			l.cmp = l.comparers[ve.ComparerName]
   228  			if l.cmp == nil {
   229  				fmt.Fprintf(w, "%d: unknown comparer %q\n", i, ve.ComparerName)
   230  				return nil
   231  			}
   232  			l.fmtKey.setForComparer(ve.ComparerName, l.comparers)
   233  		} else if l.cmp == nil {
   234  			l.cmp = base.DefaultComparer
   235  		}
   236  	}
   237  	return edits
   238  }
   239  
   240  func (l *lsmT) buildKeys(edits []*manifest.VersionEdit) {
   241  	var keys []base.InternalKey
   242  	for _, ve := range edits {
   243  		for i := range ve.NewFiles {
   244  			nf := &ve.NewFiles[i]
   245  			keys = append(keys, nf.Meta.Smallest)
   246  			keys = append(keys, nf.Meta.Largest)
   247  		}
   248  	}
   249  
   250  	l.keyMap = make(map[lsmKey]int)
   251  
   252  	sort.Slice(keys, func(i, j int) bool {
   253  		return base.InternalCompare(l.cmp.Compare, keys[i], keys[j]) < 0
   254  	})
   255  
   256  	for i := range keys {
   257  		k := &keys[i]
   258  		if i > 0 && base.InternalCompare(l.cmp.Compare, keys[i-1], keys[i]) == 0 {
   259  			continue
   260  		}
   261  		j := len(l.state.Keys)
   262  		l.state.Keys = append(l.state.Keys, lsmKey{
   263  			Pretty: fmt.Sprint(l.fmtKey.fn(k.UserKey)),
   264  			SeqNum: k.SeqNum(),
   265  			Kind:   int(k.Kind()),
   266  		})
   267  		l.keyMap[lsmKey{string(k.UserKey), k.SeqNum(), int(k.Kind())}] = j
   268  	}
   269  }
   270  
   271  func (l *lsmT) buildEdits(edits []*manifest.VersionEdit) error {
   272  	l.state.Edits = nil
   273  	l.state.StartEdit = l.startEdit
   274  	l.state.Files = make(map[base.FileNum]lsmFileMetadata)
   275  	var currentFiles [manifest.NumLevels][]*manifest.FileMetadata
   276  
   277  	for _, ve := range edits {
   278  		if len(ve.DeletedFiles) == 0 && len(ve.NewFiles) == 0 {
   279  			continue
   280  		}
   281  
   282  		edit := lsmVersionEdit{
   283  			Reason:  l.reason(ve),
   284  			Added:   make(map[int][]base.FileNum),
   285  			Deleted: make(map[int][]base.FileNum),
   286  		}
   287  
   288  		for j := range ve.NewFiles {
   289  			nf := &ve.NewFiles[j]
   290  			if _, ok := l.state.Files[nf.Meta.FileNum]; !ok {
   291  				l.state.Files[nf.Meta.FileNum] = lsmFileMetadata{
   292  					Size:           nf.Meta.Size,
   293  					Smallest:       l.findKey(nf.Meta.Smallest),
   294  					Largest:        l.findKey(nf.Meta.Largest),
   295  					SmallestSeqNum: nf.Meta.SmallestSeqNum,
   296  					LargestSeqNum:  nf.Meta.LargestSeqNum,
   297  				}
   298  			}
   299  			edit.Added[nf.Level] = append(edit.Added[nf.Level], nf.Meta.FileNum)
   300  			currentFiles[nf.Level] = append(currentFiles[nf.Level], nf.Meta)
   301  		}
   302  
   303  		for df := range ve.DeletedFiles {
   304  			edit.Deleted[df.Level] = append(edit.Deleted[df.Level], df.FileNum)
   305  			for j, f := range currentFiles[df.Level] {
   306  				if f.FileNum == df.FileNum {
   307  					copy(currentFiles[df.Level][j:], currentFiles[df.Level][j+1:])
   308  					currentFiles[df.Level] = currentFiles[df.Level][:len(currentFiles[df.Level])-1]
   309  				}
   310  			}
   311  		}
   312  
   313  		v := manifest.NewVersion(l.cmp.Compare, l.fmtKey.fn, 0, currentFiles)
   314  		edit.Sublevels = make(map[base.FileNum]int)
   315  		for sublevel, files := range v.L0SublevelFiles {
   316  			iter := files.Iter()
   317  			for f := iter.First(); f != nil; f = iter.Next() {
   318  				if len(l.state.Edits) > 0 {
   319  					lastEdit := l.state.Edits[len(l.state.Edits)-1]
   320  					if sublevel2, ok := lastEdit.Sublevels[f.FileNum]; ok && sublevel == sublevel2 {
   321  						continue
   322  					}
   323  				}
   324  				edit.Sublevels[f.FileNum] = sublevel
   325  			}
   326  		}
   327  		l.state.Edits = append(l.state.Edits, edit)
   328  	}
   329  
   330  	if l.state.Edits == nil {
   331  		return errors.Errorf("there are no edits in [start-edit, end-edit], which add or delete files")
   332  	}
   333  	return nil
   334  }
   335  
   336  func (l *lsmT) coalesceEdits(edits []*manifest.VersionEdit) ([]*manifest.VersionEdit, error) {
   337  	if l.startEdit >= int64(len(edits)) {
   338  		return nil, errors.Errorf("start-edit is more than the number of edits, %d", len(edits))
   339  	}
   340  
   341  	be := manifest.BulkVersionEdit{}
   342  	be.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata)
   343  
   344  	// Coalesce all edits from [0, l.startEdit) into a BulkVersionEdit.
   345  	for _, ve := range edits[:l.startEdit] {
   346  		err := be.Accumulate(ve)
   347  		if err != nil {
   348  			return nil, err
   349  		}
   350  	}
   351  
   352  	startingEdit := edits[l.startEdit]
   353  	var beNewFiles []manifest.NewFileEntry
   354  	beDeletedFiles := make(map[manifest.DeletedFileEntry]*manifest.FileMetadata)
   355  
   356  	for level, deletedFiles := range be.Deleted {
   357  		for _, file := range deletedFiles {
   358  			dfe := manifest.DeletedFileEntry{
   359  				Level:   level,
   360  				FileNum: file.FileNum,
   361  			}
   362  			beDeletedFiles[dfe] = file
   363  		}
   364  	}
   365  
   366  	// Filter out added files that were also deleted in the BulkVersionEdit.
   367  	for level, newFiles := range be.Added {
   368  		for _, file := range newFiles {
   369  			dfe := manifest.DeletedFileEntry{
   370  				Level:   level,
   371  				FileNum: file.FileNum,
   372  			}
   373  
   374  			if _, ok := beDeletedFiles[dfe]; !ok {
   375  				beNewFiles = append(beNewFiles, manifest.NewFileEntry{
   376  					Level: level,
   377  					Meta:  file,
   378  				})
   379  			}
   380  		}
   381  	}
   382  	startingEdit.NewFiles = append(beNewFiles, startingEdit.NewFiles...)
   383  
   384  	edits = edits[l.startEdit:]
   385  	return edits, nil
   386  }
   387  
   388  func (l *lsmT) findKey(key base.InternalKey) int {
   389  	return l.keyMap[lsmKey{string(key.UserKey), key.SeqNum(), int(key.Kind())}]
   390  }
   391  
   392  func (l *lsmT) reason(ve *manifest.VersionEdit) string {
   393  	if len(ve.DeletedFiles) > 0 {
   394  		return "compacted"
   395  	}
   396  	if ve.MinUnflushedLogNum != 0 {
   397  		return "flushed"
   398  	}
   399  	for i := range ve.NewFiles {
   400  		nf := &ve.NewFiles[i]
   401  		if nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum {
   402  			return "ingested"
   403  		}
   404  	}
   405  	return "added"
   406  }
   407  
   408  func (l *lsmT) formatJSON(v interface{}) string {
   409  	if l.pretty {
   410  		return l.prettyJSON(v)
   411  	}
   412  	return l.uglyJSON(v)
   413  }
   414  
   415  func (l *lsmT) uglyJSON(v interface{}) string {
   416  	data, err := json.Marshal(v)
   417  	if err != nil {
   418  		log.Fatal(err)
   419  	}
   420  	return string(data)
   421  }
   422  
   423  func (l *lsmT) prettyJSON(v interface{}) string {
   424  	data, err := json.MarshalIndent(v, "", "\t")
   425  	if err != nil {
   426  		log.Fatal(err)
   427  	}
   428  	return string(data)
   429  }