github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/tool/lsm.go (about) 1 // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package tool 6 7 import ( 8 "encoding/json" 9 "fmt" 10 "io" 11 "log" 12 "math" 13 "sort" 14 15 "github.com/cockroachdb/errors" 16 "github.com/cockroachdb/pebble" 17 "github.com/cockroachdb/pebble/internal/base" 18 "github.com/cockroachdb/pebble/internal/manifest" 19 "github.com/cockroachdb/pebble/record" 20 "github.com/cockroachdb/pebble/sstable" 21 "github.com/spf13/cobra" 22 ) 23 24 //go:generate ./make_lsm_data.sh 25 26 type lsmFileMetadata struct { 27 Size uint64 28 Smallest int // ID of smallest key 29 Largest int // ID of largest key 30 SmallestSeqNum uint64 31 LargestSeqNum uint64 32 } 33 34 type lsmVersionEdit struct { 35 // Reason for the edit: flushed, ingested, compacted, added. 36 Reason string 37 // Map from level to files added to the level. 38 Added map[int][]base.FileNum `json:",omitempty"` 39 // Map from level to files deleted from the level. 40 Deleted map[int][]base.FileNum `json:",omitempty"` 41 // L0 sublevels for any files with changed sublevels so far. 42 Sublevels map[base.FileNum]int `json:",omitempty"` 43 } 44 45 type lsmKey struct { 46 Pretty string 47 SeqNum uint64 48 Kind int 49 } 50 51 type lsmState struct { 52 Manifest string 53 Edits []lsmVersionEdit `json:",omitempty"` 54 Files map[base.FileNum]lsmFileMetadata `json:",omitempty"` 55 Keys []lsmKey `json:",omitempty"` 56 StartEdit int64 57 } 58 59 type lsmT struct { 60 Root *cobra.Command 61 62 // Configuration. 63 opts *pebble.Options 64 comparers sstable.Comparers 65 66 fmtKey keyFormatter 67 embed bool 68 pretty bool 69 startEdit int64 70 endEdit int64 71 editCount int64 72 73 cmp *base.Comparer 74 state lsmState 75 keyMap map[lsmKey]int 76 } 77 78 func newLSM(opts *pebble.Options, comparers sstable.Comparers) *lsmT { 79 l := &lsmT{ 80 opts: opts, 81 comparers: comparers, 82 } 83 l.fmtKey.mustSet("quoted") 84 85 l.Root = &cobra.Command{ 86 Use: "lsm <manifest>", 87 Short: "LSM visualization tool", 88 Long: ` 89 Visualize the evolution of an LSM from the version edits in a MANIFEST. 90 91 Given an input MANIFEST, output an HTML file containing a visualization showing 92 the evolution of the LSM. Each version edit in the MANIFEST becomes a single 93 step in the visualization. The 7 levels of the LSM are depicted with each 94 sstable represented as a 1-pixel wide rectangle. The height of the rectangle is 95 proportional to the size (in bytes) of the sstable. The sstables are displayed 96 in the same order as they occur in the LSM. Note that the sstables from 97 different levels are NOT aligned according to their start and end keys (doing so 98 is also interesting, but it works against using the area of the rectangle to 99 indicate size). 100 `, 101 Args: cobra.ExactArgs(1), 102 RunE: l.runLSM, 103 } 104 105 l.Root.Flags().Var(&l.fmtKey, "key", "key formatter") 106 l.Root.Flags().BoolVar(&l.embed, "embed", true, "embed javascript in HTML (disable for development)") 107 l.Root.Flags().BoolVar(&l.pretty, "pretty", false, "pretty JSON output") 108 l.Root.Flags().Int64Var(&l.startEdit, "start-edit", 0, "starting edit # to include in visualization") 109 l.Root.Flags().Int64Var(&l.endEdit, "end-edit", math.MaxInt64, "ending edit # to include in visualization") 110 l.Root.Flags().Int64Var(&l.editCount, "edit-count", math.MaxInt64, "count of edits to include in visualization") 111 return l 112 } 113 114 func (l *lsmT) isFlagSet(name string) bool { 115 return l.Root.Flags().Changed(name) 116 } 117 118 func (l *lsmT) validateFlags() error { 119 if l.isFlagSet("edit-count") { 120 if l.isFlagSet("start-edit") && l.isFlagSet("end-edit") { 121 return errors.Errorf("edit-count cannot be provided with both start-edit and end-edit") 122 } else if l.isFlagSet("end-edit") { 123 return errors.Errorf("cannot use edit-count with end-edit, use start-edit and end-edit instead") 124 } 125 } 126 127 if l.startEdit > l.endEdit { 128 return errors.Errorf("start-edit cannot be after end-edit") 129 } 130 131 return nil 132 } 133 134 func (l *lsmT) runLSM(cmd *cobra.Command, args []string) error { 135 err := l.validateFlags() 136 if err != nil { 137 return err 138 } 139 140 edits := l.readManifest(args[0]) 141 if edits == nil { 142 return nil 143 } 144 145 if l.startEdit > 0 { 146 edits, err = l.coalesceEdits(edits) 147 if err != nil { 148 return err 149 } 150 } 151 if l.endEdit < int64(len(edits)) { 152 edits = edits[:l.endEdit-l.startEdit+1] 153 } 154 if l.editCount < int64(len(edits)) { 155 edits = edits[:l.editCount] 156 } 157 158 l.buildKeys(edits) 159 err = l.buildEdits(edits) 160 if err != nil { 161 return err 162 } 163 164 w := l.Root.OutOrStdout() 165 166 fmt.Fprintf(w, `<!DOCTYPE html> 167 <html> 168 <head> 169 <meta charset="utf-8"> 170 <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> 171 `) 172 if l.embed { 173 fmt.Fprintf(w, "<style>%s</style>\n", lsmDataCSS) 174 } else { 175 fmt.Fprintf(w, "<link rel=\"stylesheet\" href=\"data/lsm.css\">\n") 176 } 177 fmt.Fprintf(w, "</head>\n<body>\n") 178 if l.embed { 179 fmt.Fprintf(w, "<script src=\"https://d3js.org/d3.v5.min.js\"></script>\n") 180 } else { 181 fmt.Fprintf(w, "<script src=\"data/d3.v5.min.js\"></script>\n") 182 } 183 fmt.Fprintf(w, "<script type=\"text/javascript\">\n") 184 fmt.Fprintf(w, "data = %s\n", l.formatJSON(l.state)) 185 fmt.Fprintf(w, "</script>\n") 186 if l.embed { 187 fmt.Fprintf(w, "<script type=\"text/javascript\">%s</script>\n", lsmDataJS) 188 } else { 189 fmt.Fprintf(w, "<script src=\"data/lsm.js\"></script>\n") 190 } 191 fmt.Fprintf(w, "</body>\n</html>\n") 192 193 return nil 194 } 195 196 func (l *lsmT) readManifest(path string) []*manifest.VersionEdit { 197 f, err := l.opts.FS.Open(path) 198 if err != nil { 199 fmt.Fprintf(l.Root.OutOrStderr(), "%s\n", err) 200 return nil 201 } 202 defer f.Close() 203 204 l.state.Manifest = path 205 206 var edits []*manifest.VersionEdit 207 w := l.Root.OutOrStdout() 208 rr := record.NewReader(f, 0 /* logNum */) 209 for i := 0; ; i++ { 210 r, err := rr.Next() 211 if err != nil { 212 if err != io.EOF { 213 fmt.Fprintf(w, "%s\n", err) 214 } 215 break 216 } 217 218 ve := &manifest.VersionEdit{} 219 err = ve.Decode(r) 220 if err != nil { 221 fmt.Fprintf(w, "%s\n", err) 222 break 223 } 224 edits = append(edits, ve) 225 226 if ve.ComparerName != "" { 227 l.cmp = l.comparers[ve.ComparerName] 228 if l.cmp == nil { 229 fmt.Fprintf(w, "%d: unknown comparer %q\n", i, ve.ComparerName) 230 return nil 231 } 232 l.fmtKey.setForComparer(ve.ComparerName, l.comparers) 233 } else if l.cmp == nil { 234 l.cmp = base.DefaultComparer 235 } 236 } 237 return edits 238 } 239 240 func (l *lsmT) buildKeys(edits []*manifest.VersionEdit) { 241 var keys []base.InternalKey 242 for _, ve := range edits { 243 for i := range ve.NewFiles { 244 nf := &ve.NewFiles[i] 245 keys = append(keys, nf.Meta.Smallest) 246 keys = append(keys, nf.Meta.Largest) 247 } 248 } 249 250 l.keyMap = make(map[lsmKey]int) 251 252 sort.Slice(keys, func(i, j int) bool { 253 return base.InternalCompare(l.cmp.Compare, keys[i], keys[j]) < 0 254 }) 255 256 for i := range keys { 257 k := &keys[i] 258 if i > 0 && base.InternalCompare(l.cmp.Compare, keys[i-1], keys[i]) == 0 { 259 continue 260 } 261 j := len(l.state.Keys) 262 l.state.Keys = append(l.state.Keys, lsmKey{ 263 Pretty: fmt.Sprint(l.fmtKey.fn(k.UserKey)), 264 SeqNum: k.SeqNum(), 265 Kind: int(k.Kind()), 266 }) 267 l.keyMap[lsmKey{string(k.UserKey), k.SeqNum(), int(k.Kind())}] = j 268 } 269 } 270 271 func (l *lsmT) buildEdits(edits []*manifest.VersionEdit) error { 272 l.state.Edits = nil 273 l.state.StartEdit = l.startEdit 274 l.state.Files = make(map[base.FileNum]lsmFileMetadata) 275 var currentFiles [manifest.NumLevels][]*manifest.FileMetadata 276 277 for _, ve := range edits { 278 if len(ve.DeletedFiles) == 0 && len(ve.NewFiles) == 0 { 279 continue 280 } 281 282 edit := lsmVersionEdit{ 283 Reason: l.reason(ve), 284 Added: make(map[int][]base.FileNum), 285 Deleted: make(map[int][]base.FileNum), 286 } 287 288 for j := range ve.NewFiles { 289 nf := &ve.NewFiles[j] 290 if _, ok := l.state.Files[nf.Meta.FileNum]; !ok { 291 l.state.Files[nf.Meta.FileNum] = lsmFileMetadata{ 292 Size: nf.Meta.Size, 293 Smallest: l.findKey(nf.Meta.Smallest), 294 Largest: l.findKey(nf.Meta.Largest), 295 SmallestSeqNum: nf.Meta.SmallestSeqNum, 296 LargestSeqNum: nf.Meta.LargestSeqNum, 297 } 298 } 299 edit.Added[nf.Level] = append(edit.Added[nf.Level], nf.Meta.FileNum) 300 currentFiles[nf.Level] = append(currentFiles[nf.Level], nf.Meta) 301 } 302 303 for df := range ve.DeletedFiles { 304 edit.Deleted[df.Level] = append(edit.Deleted[df.Level], df.FileNum) 305 for j, f := range currentFiles[df.Level] { 306 if f.FileNum == df.FileNum { 307 copy(currentFiles[df.Level][j:], currentFiles[df.Level][j+1:]) 308 currentFiles[df.Level] = currentFiles[df.Level][:len(currentFiles[df.Level])-1] 309 } 310 } 311 } 312 313 v := manifest.NewVersion(l.cmp.Compare, l.fmtKey.fn, 0, currentFiles) 314 edit.Sublevels = make(map[base.FileNum]int) 315 for sublevel, files := range v.L0SublevelFiles { 316 iter := files.Iter() 317 for f := iter.First(); f != nil; f = iter.Next() { 318 if len(l.state.Edits) > 0 { 319 lastEdit := l.state.Edits[len(l.state.Edits)-1] 320 if sublevel2, ok := lastEdit.Sublevels[f.FileNum]; ok && sublevel == sublevel2 { 321 continue 322 } 323 } 324 edit.Sublevels[f.FileNum] = sublevel 325 } 326 } 327 l.state.Edits = append(l.state.Edits, edit) 328 } 329 330 if l.state.Edits == nil { 331 return errors.Errorf("there are no edits in [start-edit, end-edit], which add or delete files") 332 } 333 return nil 334 } 335 336 func (l *lsmT) coalesceEdits(edits []*manifest.VersionEdit) ([]*manifest.VersionEdit, error) { 337 if l.startEdit >= int64(len(edits)) { 338 return nil, errors.Errorf("start-edit is more than the number of edits, %d", len(edits)) 339 } 340 341 be := manifest.BulkVersionEdit{} 342 be.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata) 343 344 // Coalesce all edits from [0, l.startEdit) into a BulkVersionEdit. 345 for _, ve := range edits[:l.startEdit] { 346 err := be.Accumulate(ve) 347 if err != nil { 348 return nil, err 349 } 350 } 351 352 startingEdit := edits[l.startEdit] 353 var beNewFiles []manifest.NewFileEntry 354 beDeletedFiles := make(map[manifest.DeletedFileEntry]*manifest.FileMetadata) 355 356 for level, deletedFiles := range be.Deleted { 357 for _, file := range deletedFiles { 358 dfe := manifest.DeletedFileEntry{ 359 Level: level, 360 FileNum: file.FileNum, 361 } 362 beDeletedFiles[dfe] = file 363 } 364 } 365 366 // Filter out added files that were also deleted in the BulkVersionEdit. 367 for level, newFiles := range be.Added { 368 for _, file := range newFiles { 369 dfe := manifest.DeletedFileEntry{ 370 Level: level, 371 FileNum: file.FileNum, 372 } 373 374 if _, ok := beDeletedFiles[dfe]; !ok { 375 beNewFiles = append(beNewFiles, manifest.NewFileEntry{ 376 Level: level, 377 Meta: file, 378 }) 379 } 380 } 381 } 382 startingEdit.NewFiles = append(beNewFiles, startingEdit.NewFiles...) 383 384 edits = edits[l.startEdit:] 385 return edits, nil 386 } 387 388 func (l *lsmT) findKey(key base.InternalKey) int { 389 return l.keyMap[lsmKey{string(key.UserKey), key.SeqNum(), int(key.Kind())}] 390 } 391 392 func (l *lsmT) reason(ve *manifest.VersionEdit) string { 393 if len(ve.DeletedFiles) > 0 { 394 return "compacted" 395 } 396 if ve.MinUnflushedLogNum != 0 { 397 return "flushed" 398 } 399 for i := range ve.NewFiles { 400 nf := &ve.NewFiles[i] 401 if nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum { 402 return "ingested" 403 } 404 } 405 return "added" 406 } 407 408 func (l *lsmT) formatJSON(v interface{}) string { 409 if l.pretty { 410 return l.prettyJSON(v) 411 } 412 return l.uglyJSON(v) 413 } 414 415 func (l *lsmT) uglyJSON(v interface{}) string { 416 data, err := json.Marshal(v) 417 if err != nil { 418 log.Fatal(err) 419 } 420 return string(data) 421 } 422 423 func (l *lsmT) prettyJSON(v interface{}) string { 424 data, err := json.MarshalIndent(v, "", "\t") 425 if err != nil { 426 log.Fatal(err) 427 } 428 return string(data) 429 }