github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/tool/lsm.go (about) 1 // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package tool 6 7 import ( 8 "encoding/json" 9 "fmt" 10 "io" 11 "log" 12 "math" 13 "slices" 14 15 "github.com/cockroachdb/errors" 16 "github.com/cockroachdb/pebble" 17 "github.com/cockroachdb/pebble/internal/base" 18 "github.com/cockroachdb/pebble/internal/manifest" 19 "github.com/cockroachdb/pebble/record" 20 "github.com/cockroachdb/pebble/sstable" 21 "github.com/spf13/cobra" 22 ) 23 24 //go:generate ./make_lsm_data.sh 25 26 type lsmFileMetadata struct { 27 Size uint64 28 Smallest int // ID of smallest key 29 Largest int // ID of largest key 30 SmallestSeqNum uint64 31 LargestSeqNum uint64 32 Virtual bool 33 } 34 35 type lsmVersionEdit struct { 36 // Reason for the edit: flushed, ingested, compacted, added. 37 Reason string 38 // Map from level to files added to the level. 39 Added map[int][]base.FileNum `json:",omitempty"` 40 // Map from level to files deleted from the level. 41 Deleted map[int][]base.FileNum `json:",omitempty"` 42 // L0 sublevels for any files with changed sublevels so far. 43 Sublevels map[base.FileNum]int `json:",omitempty"` 44 } 45 46 type lsmKey struct { 47 Pretty string 48 SeqNum uint64 49 Kind int 50 } 51 52 type lsmState struct { 53 Manifest string 54 Edits []lsmVersionEdit `json:",omitempty"` 55 Files map[base.FileNum]lsmFileMetadata `json:",omitempty"` 56 Keys []lsmKey `json:",omitempty"` 57 StartEdit int64 58 } 59 60 type lsmT struct { 61 Root *cobra.Command 62 63 // Configuration. 64 opts *pebble.Options 65 comparers sstable.Comparers 66 67 fmtKey keyFormatter 68 embed bool 69 pretty bool 70 startEdit int64 71 endEdit int64 72 editCount int64 73 74 cmp *base.Comparer 75 state lsmState 76 keyMap map[lsmKey]int 77 } 78 79 func newLSM(opts *pebble.Options, comparers sstable.Comparers) *lsmT { 80 l := &lsmT{ 81 opts: opts, 82 comparers: comparers, 83 } 84 l.fmtKey.mustSet("quoted") 85 86 l.Root = &cobra.Command{ 87 Use: "lsm <manifest>", 88 Short: "LSM visualization tool", 89 Long: ` 90 Visualize the evolution of an LSM from the version edits in a MANIFEST. 91 92 Given an input MANIFEST, output an HTML file containing a visualization showing 93 the evolution of the LSM. Each version edit in the MANIFEST becomes a single 94 step in the visualization. The 7 levels of the LSM are depicted with each 95 sstable represented as a 1-pixel wide rectangle. The height of the rectangle is 96 proportional to the size (in bytes) of the sstable. The sstables are displayed 97 in the same order as they occur in the LSM. Note that the sstables from 98 different levels are NOT aligned according to their start and end keys (doing so 99 is also interesting, but it works against using the area of the rectangle to 100 indicate size). 101 `, 102 Args: cobra.ExactArgs(1), 103 RunE: l.runLSM, 104 } 105 106 l.Root.Flags().Var(&l.fmtKey, "key", "key formatter") 107 l.Root.Flags().BoolVar(&l.embed, "embed", true, "embed javascript in HTML (disable for development)") 108 l.Root.Flags().BoolVar(&l.pretty, "pretty", false, "pretty JSON output") 109 l.Root.Flags().Int64Var(&l.startEdit, "start-edit", 0, "starting edit # to include in visualization") 110 l.Root.Flags().Int64Var(&l.endEdit, "end-edit", math.MaxInt64, "ending edit # to include in visualization") 111 l.Root.Flags().Int64Var(&l.editCount, "edit-count", math.MaxInt64, "count of edits to include in visualization") 112 return l 113 } 114 115 func (l *lsmT) isFlagSet(name string) bool { 116 return l.Root.Flags().Changed(name) 117 } 118 119 func (l *lsmT) validateFlags() error { 120 if l.isFlagSet("edit-count") { 121 if l.isFlagSet("start-edit") && l.isFlagSet("end-edit") { 122 return errors.Errorf("edit-count cannot be provided with both start-edit and end-edit") 123 } else if l.isFlagSet("end-edit") { 124 return errors.Errorf("cannot use edit-count with end-edit, use start-edit and end-edit instead") 125 } 126 } 127 128 if l.startEdit > l.endEdit { 129 return errors.Errorf("start-edit cannot be after end-edit") 130 } 131 132 return nil 133 } 134 135 func (l *lsmT) runLSM(cmd *cobra.Command, args []string) error { 136 err := l.validateFlags() 137 if err != nil { 138 return err 139 } 140 141 edits := l.readManifest(args[0]) 142 if edits == nil { 143 return nil 144 } 145 146 if l.startEdit > 0 { 147 edits, err = l.coalesceEdits(edits) 148 if err != nil { 149 return err 150 } 151 } 152 if l.endEdit < int64(len(edits)) { 153 edits = edits[:l.endEdit-l.startEdit+1] 154 } 155 if l.editCount < int64(len(edits)) { 156 edits = edits[:l.editCount] 157 } 158 159 l.buildKeys(edits) 160 err = l.buildEdits(edits) 161 if err != nil { 162 return err 163 } 164 165 w := l.Root.OutOrStdout() 166 167 fmt.Fprintf(w, `<!DOCTYPE html> 168 <html> 169 <head> 170 <meta charset="utf-8"> 171 <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> 172 `) 173 if l.embed { 174 fmt.Fprintf(w, "<style>%s</style>\n", lsmDataCSS) 175 } else { 176 fmt.Fprintf(w, "<link rel=\"stylesheet\" href=\"data/lsm.css\">\n") 177 } 178 fmt.Fprintf(w, "</head>\n<body>\n") 179 if l.embed { 180 fmt.Fprintf(w, "<script src=\"https://d3js.org/d3.v5.min.js\"></script>\n") 181 } else { 182 fmt.Fprintf(w, "<script src=\"data/d3.v5.min.js\"></script>\n") 183 } 184 fmt.Fprintf(w, "<script type=\"text/javascript\">\n") 185 fmt.Fprintf(w, "data = %s\n", l.formatJSON(l.state)) 186 fmt.Fprintf(w, "</script>\n") 187 if l.embed { 188 fmt.Fprintf(w, "<script type=\"text/javascript\">%s</script>\n", lsmDataJS) 189 } else { 190 fmt.Fprintf(w, "<script src=\"data/lsm.js\"></script>\n") 191 } 192 fmt.Fprintf(w, "</body>\n</html>\n") 193 194 return nil 195 } 196 197 func (l *lsmT) readManifest(path string) []*manifest.VersionEdit { 198 f, err := l.opts.FS.Open(path) 199 if err != nil { 200 fmt.Fprintf(l.Root.OutOrStderr(), "%s\n", err) 201 return nil 202 } 203 defer f.Close() 204 205 l.state.Manifest = path 206 207 var edits []*manifest.VersionEdit 208 w := l.Root.OutOrStdout() 209 rr := record.NewReader(f, 0 /* logNum */) 210 for i := 0; ; i++ { 211 r, err := rr.Next() 212 if err != nil { 213 if err != io.EOF { 214 fmt.Fprintf(w, "%s\n", err) 215 } 216 break 217 } 218 219 ve := &manifest.VersionEdit{} 220 err = ve.Decode(r) 221 if err != nil { 222 fmt.Fprintf(w, "%s\n", err) 223 break 224 } 225 edits = append(edits, ve) 226 227 if ve.ComparerName != "" { 228 l.cmp = l.comparers[ve.ComparerName] 229 if l.cmp == nil { 230 fmt.Fprintf(w, "%d: unknown comparer %q\n", i, ve.ComparerName) 231 return nil 232 } 233 l.fmtKey.setForComparer(ve.ComparerName, l.comparers) 234 } else if l.cmp == nil { 235 l.cmp = base.DefaultComparer 236 } 237 } 238 return edits 239 } 240 241 func (l *lsmT) buildKeys(edits []*manifest.VersionEdit) { 242 var keys []base.InternalKey 243 for _, ve := range edits { 244 for i := range ve.NewFiles { 245 nf := &ve.NewFiles[i] 246 keys = append(keys, nf.Meta.Smallest) 247 keys = append(keys, nf.Meta.Largest) 248 } 249 } 250 251 l.keyMap = make(map[lsmKey]int) 252 253 slices.SortFunc(keys, func(a, b base.InternalKey) int { 254 return base.InternalCompare(l.cmp.Compare, a, b) 255 }) 256 257 for i := range keys { 258 k := &keys[i] 259 if i > 0 && base.InternalCompare(l.cmp.Compare, keys[i-1], keys[i]) == 0 { 260 continue 261 } 262 j := len(l.state.Keys) 263 l.state.Keys = append(l.state.Keys, lsmKey{ 264 Pretty: fmt.Sprint(l.fmtKey.fn(k.UserKey)), 265 SeqNum: k.SeqNum(), 266 Kind: int(k.Kind()), 267 }) 268 l.keyMap[lsmKey{string(k.UserKey), k.SeqNum(), int(k.Kind())}] = j 269 } 270 } 271 272 func (l *lsmT) buildEdits(edits []*manifest.VersionEdit) error { 273 l.state.Edits = nil 274 l.state.StartEdit = l.startEdit 275 l.state.Files = make(map[base.FileNum]lsmFileMetadata) 276 var currentFiles [manifest.NumLevels][]*manifest.FileMetadata 277 278 backings := make(map[base.DiskFileNum]*manifest.FileBacking) 279 280 for _, ve := range edits { 281 for _, i := range ve.CreatedBackingTables { 282 backings[i.DiskFileNum] = i 283 } 284 if len(ve.DeletedFiles) == 0 && len(ve.NewFiles) == 0 { 285 continue 286 } 287 288 edit := lsmVersionEdit{ 289 Reason: l.reason(ve), 290 Added: make(map[int][]base.FileNum), 291 Deleted: make(map[int][]base.FileNum), 292 } 293 294 for j := range ve.NewFiles { 295 nf := &ve.NewFiles[j] 296 if b, ok := backings[nf.BackingFileNum]; ok && nf.Meta.Virtual { 297 nf.Meta.FileBacking = b 298 } 299 if _, ok := l.state.Files[nf.Meta.FileNum]; !ok { 300 l.state.Files[nf.Meta.FileNum] = lsmFileMetadata{ 301 Size: nf.Meta.Size, 302 Smallest: l.findKey(nf.Meta.Smallest), 303 Largest: l.findKey(nf.Meta.Largest), 304 SmallestSeqNum: nf.Meta.SmallestSeqNum, 305 LargestSeqNum: nf.Meta.LargestSeqNum, 306 Virtual: nf.Meta.Virtual, 307 } 308 } 309 edit.Added[nf.Level] = append(edit.Added[nf.Level], nf.Meta.FileNum) 310 currentFiles[nf.Level] = append(currentFiles[nf.Level], nf.Meta) 311 } 312 313 for df := range ve.DeletedFiles { 314 edit.Deleted[df.Level] = append(edit.Deleted[df.Level], df.FileNum) 315 for j, f := range currentFiles[df.Level] { 316 if f.FileNum == df.FileNum { 317 copy(currentFiles[df.Level][j:], currentFiles[df.Level][j+1:]) 318 currentFiles[df.Level] = currentFiles[df.Level][:len(currentFiles[df.Level])-1] 319 } 320 } 321 } 322 323 v := manifest.NewVersion(l.cmp.Compare, l.fmtKey.fn, 0, currentFiles) 324 edit.Sublevels = make(map[base.FileNum]int) 325 for sublevel, files := range v.L0SublevelFiles { 326 iter := files.Iter() 327 for f := iter.First(); f != nil; f = iter.Next() { 328 if len(l.state.Edits) > 0 { 329 lastEdit := l.state.Edits[len(l.state.Edits)-1] 330 if sublevel2, ok := lastEdit.Sublevels[f.FileNum]; ok && sublevel == sublevel2 { 331 continue 332 } 333 } 334 edit.Sublevels[f.FileNum] = sublevel 335 } 336 } 337 l.state.Edits = append(l.state.Edits, edit) 338 } 339 340 if l.state.Edits == nil { 341 return errors.Errorf("there are no edits in [start-edit, end-edit], which add or delete files") 342 } 343 return nil 344 } 345 346 func (l *lsmT) coalesceEdits(edits []*manifest.VersionEdit) ([]*manifest.VersionEdit, error) { 347 if l.startEdit >= int64(len(edits)) { 348 return nil, errors.Errorf("start-edit is more than the number of edits, %d", len(edits)) 349 } 350 351 be := manifest.BulkVersionEdit{} 352 be.AddedByFileNum = make(map[base.FileNum]*manifest.FileMetadata) 353 354 // Coalesce all edits from [0, l.startEdit) into a BulkVersionEdit. 355 for _, ve := range edits[:l.startEdit] { 356 err := be.Accumulate(ve) 357 if err != nil { 358 return nil, err 359 } 360 } 361 362 startingEdit := edits[l.startEdit] 363 var beNewFiles []manifest.NewFileEntry 364 beDeletedFiles := make(map[manifest.DeletedFileEntry]*manifest.FileMetadata) 365 366 for level, deletedFiles := range be.Deleted { 367 for _, file := range deletedFiles { 368 dfe := manifest.DeletedFileEntry{ 369 Level: level, 370 FileNum: file.FileNum, 371 } 372 beDeletedFiles[dfe] = file 373 } 374 } 375 376 // Filter out added files that were also deleted in the BulkVersionEdit. 377 for level, newFiles := range be.Added { 378 for _, file := range newFiles { 379 dfe := manifest.DeletedFileEntry{ 380 Level: level, 381 FileNum: file.FileNum, 382 } 383 384 if _, ok := beDeletedFiles[dfe]; !ok { 385 beNewFiles = append(beNewFiles, manifest.NewFileEntry{ 386 Level: level, 387 Meta: file, 388 }) 389 } 390 } 391 } 392 startingEdit.NewFiles = append(beNewFiles, startingEdit.NewFiles...) 393 394 edits = edits[l.startEdit:] 395 return edits, nil 396 } 397 398 func (l *lsmT) findKey(key base.InternalKey) int { 399 return l.keyMap[lsmKey{string(key.UserKey), key.SeqNum(), int(key.Kind())}] 400 } 401 402 func (l *lsmT) reason(ve *manifest.VersionEdit) string { 403 if len(ve.DeletedFiles) > 0 { 404 return "compacted" 405 } 406 if ve.MinUnflushedLogNum != 0 { 407 return "flushed" 408 } 409 for i := range ve.NewFiles { 410 nf := &ve.NewFiles[i] 411 if nf.Meta.SmallestSeqNum == nf.Meta.LargestSeqNum { 412 return "ingested" 413 } 414 } 415 return "added" 416 } 417 418 func (l *lsmT) formatJSON(v interface{}) string { 419 if l.pretty { 420 return l.prettyJSON(v) 421 } 422 return l.uglyJSON(v) 423 } 424 425 func (l *lsmT) uglyJSON(v interface{}) string { 426 data, err := json.Marshal(v) 427 if err != nil { 428 log.Fatal(err) 429 } 430 return string(data) 431 } 432 433 func (l *lsmT) prettyJSON(v interface{}) string { 434 data, err := json.MarshalIndent(v, "", "\t") 435 if err != nil { 436 log.Fatal(err) 437 } 438 return string(data) 439 }