github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/mkbench/write.go (about) 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "compress/bzip2" 7 "compress/gzip" 8 "encoding/json" 9 "fmt" 10 "io" 11 "math" 12 "os" 13 "path/filepath" 14 "sort" 15 "strings" 16 "time" 17 18 "github.com/cockroachdb/errors/oserror" 19 "github.com/spf13/cobra" 20 ) 21 22 // A note to the reader on nomenclature used in this command. 23 // 24 // The write-throughput benchmark is generated by a roachtest with a number of 25 // independent worker VMs running the same benchmark (to allow for an average 26 // value to be recorded). 27 // 28 // An instance of the roachtest on a given day, for a given workload type (e.g. 29 // values of size 1024B, values of size 64B, etc.) is modelled as a `writeRun`. 30 // Each worker VM in a `writeRun` produces data modelled as a `rawWriteRun`. 31 // Each `rawWriteRun` contains the raw data points emitted periodically by the 32 // VM and are modelled as `writePoint`s. 33 // 34 // A `writeWorkload` (i.e. singular) models all data for a particular type of 35 // benchmark run (e.g. values of size 1024B), across all days. It is a mapping 36 // of day to `writeRun`, which is a collection of `rawWriteRun`s. 37 // 38 // The `writeWorkloads` (i.e. plural) is a mapping from workload name to its 39 // `writeWorkload`. 40 // 41 // The data can be thought of being modelled as follows: 42 // 43 // `writeWorkloads`---------\ 44 // - workload-name-A: `writeWorkload`-------\ | 45 // - day-1: `writeRun`---------\ | | 46 // - VM-1: `rawWriteRun`----\ | | | 47 // [ ... raw data point ... ] `writePoint` x | | | 48 // ... | | | 49 // - VM-N: | | | 50 // [ ... raw data point ... ] x | | 51 // ... | | 52 // - day-N: | | 53 // - VM-1: | | 54 // [ ... raw data point ... ] | | 55 // ... | | 56 // - VM-N: | | 57 // [ ... raw data point ... ] x | 58 // ... | 59 // - workload-name-Z: | 60 // - day-1: | 61 // - VM-1: | 62 // [ ... raw data point ... ] | 63 // ... | 64 // - VM-N: | 65 // [ ... raw data point ... ] | 66 // ... | 67 // - day-N: | 68 // - VM-1: | 69 // [ ... raw data point ... ] | 70 // ... | 71 // - VM-N: | 72 // [ ... raw data point ... ] x 73 74 const ( 75 // summaryFilename is the filename for the top-level summary output. 76 summaryFilename = "summary.json" 77 78 // rawRunFmt is the format string for raw benchmark data. 79 rawRunFmt = "BenchmarkRaw%s %d ops/sec %v pass %s elapsed %d bytes %d levels %f writeAmp" 80 ) 81 82 func getWriteCommand() *cobra.Command { 83 c := &cobra.Command{ 84 Use: "write", 85 Short: "parse write throughput benchmark data", 86 Long: ` 87 Parses write-throughput benchmark data into two sets of JSON "summary" files: 88 89 1. A top-level summary.json file. Data in this file is reported per-day, per 90 workload (i.e. values=1024, etc.), and is responsible for the top-level 91 write-throughput visualizations on the Pebble benchmarks page. 92 93 Each data-point for a time-series contains an ops/sec figure (measured as a 94 simple average over all data points for that workload run), and a relative path 95 to a per-run summary JSON file, containing the raw data for the run. 96 97 2. A per-run *-summary.json file. Data in this file contains the raw data for 98 each of the benchmark instances participating in the workload run on the given 99 day. Each key in the file is the relative path to the original raw data file. 100 Each data point contains the calculated optimal ops/sec for the instance of the 101 run (see split.go for more detail on the algorithm), in addition to the raw data 102 in CSV format. 103 104 This command can be run without flags at the root of the directory containing 105 the raw data. By default the raw data will be pulled from "data", and the 106 resulting top-level and per-run summary files are written to "write-throughput". 107 Both locations can be overridden with the --data-dir and --summary-dir flags, 108 respectively. 109 `, 110 RunE: func(cmd *cobra.Command, args []string) error { 111 dataDir, err := cmd.Flags().GetString("data-dir") 112 if err != nil { 113 return err 114 } 115 116 summaryDir, err := cmd.Flags().GetString("summary-dir") 117 if err != nil { 118 return err 119 } 120 121 return parseWrite(dataDir, summaryDir) 122 }, 123 } 124 125 c.Flags().String("data-dir", "data", "path to the raw data directory") 126 c.Flags().String("summary-dir", "write-throughput", "output directory containing the summary files") 127 c.SilenceUsage = true 128 129 return c 130 } 131 132 // writePoint is a raw datapoint from an individual write-throughput benchmark 133 // run. 134 type writePoint struct { 135 elapsedSecs int 136 opsSec int 137 passed bool 138 size uint64 139 levels int 140 writeAmp float64 141 } 142 143 // formatCSV returns a comma-separated string representation of the datapoint. 144 func (p writePoint) formatCSV() string { 145 return fmt.Sprintf( 146 "%d,%d,%v,%d,%d,%.2f", 147 p.elapsedSecs, p.opsSec, p.passed, p.size, p.levels, p.writeAmp) 148 } 149 150 // rawWriteRun is a collection of datapoints from a single instance of a 151 // benchmark run (i.e. datapoints comprising a single roachtest instance of a 152 // write-throughput benchmark). 153 type rawWriteRun struct { 154 points []writePoint 155 split int // memoized 156 } 157 158 // opsPerSecSplit returns an optimal-split point that divides the passes and 159 // fails from the datapoints in a rawWriteRun. 160 func (r *rawWriteRun) opsPerSecSplit() int { 161 if r.split > 0 { 162 return r.split 163 } 164 165 // Pre-process by partitioning the datapoint into passes and fails. 166 var passes, fails []int 167 for _, p := range r.points { 168 if p.passed { 169 passes = append(passes, p.opsSec) 170 } else { 171 fails = append(fails, p.opsSec) 172 } 173 } 174 175 // Compute and cache the split point as we only need to calculate it once. 176 split := findOptimalSplit(passes, fails) 177 r.split = split 178 179 return split 180 } 181 182 // writeAmp returns the value of the write-amplification at the end of the run. 183 func (r *rawWriteRun) writeAmp() float64 { 184 return r.points[len(r.points)-1].writeAmp 185 } 186 187 // formatCSV returns a comma-separated string representation of the rawWriteRun. 188 // The value itself is a newline-delimited string value comprised of the CSV 189 // representation of the individual writePoints. 190 func (r rawWriteRun) formatCSV() string { 191 var b bytes.Buffer 192 for _, p := range r.points { 193 _, _ = fmt.Fprintf(&b, "%s\n", p.formatCSV()) 194 } 195 return b.String() 196 } 197 198 // writeRunSummary represents a single summary datapoint across all rawWriteRuns 199 // that comprise a writeRun. The datapoint contains a summary ops-per-second 200 // value, in addition to a path to the summary.json file with the combined data 201 // for the run. 202 type writeRunSummary struct { 203 Name string `json:"name"` 204 Date string `json:"date"` 205 OpsSec int `json:"opsSec"` 206 WriteAmp float64 `json:"writeAmp"` 207 SummaryPath string `json:"summaryPath"` 208 } 209 210 // writeWorkloadSummary is an alias for a slice of writeRunSummaries. 211 type writeWorkloadSummary []writeRunSummary 212 213 // writeRun is a collection of one or more rawWriteRuns (i.e. the union of all 214 // rawWriteRuns from each worker participating in the roachtest cluster used for 215 // running the write-throughput benchmarks). 216 type writeRun struct { 217 // name is the benchmark workload name (i.e. "values=1024"). 218 name string 219 220 // date is the date on which the writeRun took place. 221 date string 222 223 // dir is path to the directory containing the raw data. The path is 224 // relative to the data-dir. 225 dir string 226 227 // rawRuns is a map from input data filename to its rawWriteRun data. 228 rawRuns map[string]rawWriteRun 229 } 230 231 // summaryFilename returns the filename to be used for storing the summary 232 // output for the writeRun. The filename preserves the original data source path 233 // for ease of debugging / data-provenance. 234 func (r writeRun) summaryFilename() string { 235 parts := strings.Split(r.dir, string(os.PathSeparator)) 236 parts = append(parts, summaryFilename) 237 return strings.Join(parts, "-") 238 } 239 240 // summarize computes a writeRunSummary datapoint for the writeRun. 241 func (r writeRun) summarize() writeRunSummary { 242 var ( 243 sumOpsSec int 244 sumWriteAmp float64 245 ) 246 for _, rr := range r.rawRuns { 247 sumOpsSec += rr.opsPerSecSplit() 248 sumWriteAmp += rr.writeAmp() 249 } 250 l := len(r.rawRuns) 251 252 return writeRunSummary{ 253 Name: r.name, 254 Date: r.date, 255 SummaryPath: r.summaryFilename(), 256 // Calculate an average across all raw runs in this run. 257 // TODO(travers): test how this works in practice, after we have 258 // gathered enough data. 259 OpsSec: sumOpsSec / l, 260 WriteAmp: math.Round(100*sumWriteAmp/float64(l)) / 100, // round to 2dp. 261 } 262 } 263 264 // cookedWriteRun is a representation of a previously parsed (or "cooked") 265 // writeRun. 266 type cookedWriteRun struct { 267 OpsSec int `json:"opsSec"` 268 Raw string `json:"rawData"` 269 } 270 271 // formatSummaryJSON returns a JSON representation of the combined raw data from 272 // all rawWriteRuns that comprise the writeRun. It has the form: 273 // 274 // { 275 // "original-raw-write-run-log-file-1.gz": { 276 // "opsSec": ..., 277 // "raw": ..., 278 // }, 279 // ... 280 // "original-raw-write-run-log-file-N.gz": { 281 // "opsSec": ..., 282 // "raw": ..., 283 // }, 284 // } 285 func (r writeRun) formatSummaryJSON() ([]byte, error) { 286 m := make(map[string]cookedWriteRun) 287 for name, data := range r.rawRuns { 288 m[name] = cookedWriteRun{ 289 OpsSec: data.opsPerSecSplit(), 290 Raw: data.formatCSV(), 291 } 292 } 293 return prettyJSON(&m), nil 294 } 295 296 // write workload is a map from "day" to corresponding writeRun, for a given 297 // write-throughput benchmark workload (i.e. values=1024). 298 type writeWorkload struct { 299 days map[string]*writeRun // map from day to runs for the given workload 300 } 301 302 // writeWorkloads is an alias for a map from workload name to its corresponding 303 // map from day to writeRun. 304 type writeWorkloads map[string]*writeWorkload 305 306 // nameDay is a (name, day) tuple, used as a map key. 307 type nameDay struct { 308 name, day string 309 } 310 311 type writeLoader struct { 312 // rootDir is the path to the root directory containing the data. 313 dataDir string 314 315 // summaryFilename is the name of the file containing the summary data. 316 summaryDir string 317 318 // workloads is a map from workload name to its corresponding data. 319 workloads writeWorkloads 320 321 // cooked is a "set" of (workload, day) tuples representing whether 322 // previously parsed data was present for the (workload, day). 323 cooked map[nameDay]bool 324 325 // cookedSummaries is a map from workload name to previously generated data 326 // for the workload. This data is "mixed-in" with new data when the summary 327 // files are written out. 328 cookedSummaries map[string]writeWorkloadSummary 329 } 330 331 // newWriteLoader returns a new writeLoader that can be used to generate the 332 // summary files for write-throughput benchmarking data. 333 func newWriteLoader(dataDir, summaryDir string) *writeLoader { 334 return &writeLoader{ 335 dataDir: dataDir, 336 summaryDir: summaryDir, 337 workloads: make(writeWorkloads), 338 cooked: make(map[nameDay]bool), 339 cookedSummaries: make(map[string]writeWorkloadSummary), 340 } 341 } 342 343 // loadCooked loads previously summarized write throughput benchmark data. 344 func (l *writeLoader) loadCooked() error { 345 b, err := os.ReadFile(filepath.Join(l.summaryDir, summaryFilename)) 346 if err != nil { 347 // The first ever run will not find the summary file. Return early in 348 // this case, and we'll start afresh. 349 if oserror.IsNotExist(err) { 350 return nil 351 } 352 return err 353 } 354 355 // Reconstruct the summary. 356 summaries := make(map[string]writeWorkloadSummary) 357 err = json.Unmarshal(b, &summaries) 358 if err != nil { 359 return err 360 } 361 362 // Populate the cooked map. 363 l.cookedSummaries = summaries 364 365 // Populate the set used for determining whether we can skip a raw file. 366 for name, workloadSummary := range summaries { 367 for _, runSummary := range workloadSummary { 368 l.cooked[nameDay{name, runSummary.Date}] = true 369 } 370 } 371 372 return nil 373 } 374 375 // loadRaw loads the raw data from the root data directory. 376 func (l *writeLoader) loadRaw() error { 377 walkFn := func(path, pathRel string, info os.FileInfo) error { 378 // The relative directory structure is of the form: 379 // $day/pebble/write/$name/$run/$file 380 parts := strings.Split(pathRel, string(os.PathSeparator)) 381 if len(parts) < 6 { 382 return nil // stumble forward on invalid paths 383 } 384 385 // Filter out files that aren't in write benchmark directories. 386 if parts[2] != "write" { 387 return nil 388 } 389 day := parts[0] 390 391 f, err := os.Open(path) 392 if err != nil { 393 _, _ = fmt.Fprintf(os.Stderr, "%+v\n", err) 394 return nil // stumble forward on error 395 } 396 defer func() { _ = f.Close() }() 397 398 rd := io.Reader(f) 399 if strings.HasSuffix(path, ".bz2") { 400 rd = bzip2.NewReader(f) 401 } else if strings.HasSuffix(path, ".gz") { 402 var err error 403 rd, err = gzip.NewReader(f) 404 if err != nil { 405 _, _ = fmt.Fprintf(os.Stderr, "%+v\n", err) 406 return nil // stumble forward on error 407 } 408 } 409 410 // Parse the data for this file and add to the appropriate workload. 411 s := bufio.NewScanner(rd) 412 r := rawWriteRun{} 413 var name string 414 for s.Scan() { 415 line := s.Text() 416 if !strings.HasPrefix(line, "BenchmarkRaw") { 417 continue 418 } 419 420 var p writePoint 421 var nameInner, elapsed string 422 n, err := fmt.Sscanf(line, rawRunFmt, 423 &nameInner, &p.opsSec, &p.passed, &elapsed, &p.size, &p.levels, &p.writeAmp) 424 if err != nil || n != 7 { 425 // Stumble forward on error. 426 _, _ = fmt.Fprintf(os.Stderr, "%s: %v\n", s.Text(), err) 427 continue 428 } 429 430 // The first datapoint we see in the file is assumed to be the same 431 // for all datapoints. 432 if name == "" { 433 name = nameInner 434 435 // Skip files for (workload, day) pairs that have been parsed 436 // previously. Note that this relies on loadCooked having been 437 // called previously to seed the map with cooked data. 438 if ok := l.cooked[nameDay{name, day}]; ok { 439 _, _ = fmt.Fprintf(os.Stderr, 440 "skipping previously cooked data in file %s (workload=%q, day=%q)\n", 441 pathRel, name, day) 442 return nil 443 } 444 } else if name != nameInner { 445 _, _ = fmt.Fprintf(os.Stderr, 446 "WARN: benchmark name %q differs from previously seen name %q: %s", 447 nameInner, name, s.Text()) 448 } 449 450 // Convert the elapsed time into seconds. 451 secs, err := time.ParseDuration(elapsed) 452 if err != nil { 453 // Stumble forward on error. 454 _, _ = fmt.Fprintf(os.Stderr, "%s: %v\n", s.Text(), err) 455 continue 456 } 457 p.elapsedSecs = int(secs.Seconds()) 458 459 // Add this data point to the collection of points for this run. 460 r.points = append(r.points, p) 461 } 462 463 // Add the raw run to the map. 464 l.addRawRun(name, day, pathRel, r) 465 466 return nil 467 } 468 return walkDir(l.dataDir, walkFn) 469 } 470 471 // addRawRun adds a rawWriteRun to the corresponding datastructures by looking 472 // up the workload name (i.e. "values=1024"), then appending the rawWriteRun to 473 // the corresponding slice of all rawWriteRuns. 474 func (l *writeLoader) addRawRun(name, day, path string, raw rawWriteRun) { 475 // Skip files with no points (i.e. files that couldn't be parsed). 476 if len(raw.points) == 0 { 477 return 478 } 479 480 _, _ = fmt.Fprintf( 481 os.Stderr, "adding raw run: (workload=%q, day=%q); nPoints=%d; file=%s\n", 482 name, day, len(raw.points), path) 483 484 w := l.workloads[name] 485 if w == nil { 486 w = &writeWorkload{days: make(map[string]*writeRun)} 487 l.workloads[name] = w 488 } 489 490 r := w.days[day] 491 if r == nil { 492 r = &writeRun{ 493 name: name, 494 date: day, 495 dir: filepath.Dir(path), 496 rawRuns: make(map[string]rawWriteRun), 497 } 498 w.days[day] = r 499 } 500 r.rawRuns[path] = raw 501 } 502 503 // cookSummary writes out the data in the loader to the summary file (new or 504 // existing). 505 func (l *writeLoader) cookSummary() error { 506 summary := make(map[string]writeWorkloadSummary) 507 for name, w := range l.workloads { 508 summary[name] = cookWriteSummary(w) 509 } 510 511 // Mix in the previously cooked values. 512 for name, cooked := range l.cookedSummaries { 513 existing, ok := summary[name] 514 if !ok { 515 summary[name] = cooked 516 } else { 517 // We must merge and re-sort by date. 518 existing = append(existing, cooked...) 519 sort.Slice(existing, func(i, j int) bool { 520 return existing[i].Date < existing[j].Date 521 }) 522 summary[name] = existing 523 } 524 } 525 b := prettyJSON(&summary) 526 b = append(b, '\n') 527 528 outputPath := filepath.Join(l.summaryDir, summaryFilename) 529 err := os.WriteFile(outputPath, b, 0644) 530 if err != nil { 531 return err 532 } 533 534 return nil 535 } 536 537 // cookWriteSummary is a helper that generates the summary for a write workload 538 // by computing the per-day summaries across all runs. 539 func cookWriteSummary(w *writeWorkload) writeWorkloadSummary { 540 days := make([]string, 0, len(w.days)) 541 for day := range w.days { 542 days = append(days, day) 543 } 544 sort.Strings(days) 545 546 var summary writeWorkloadSummary 547 for _, day := range days { 548 r := w.days[day] 549 summary = append(summary, r.summarize()) 550 } 551 552 return summary 553 } 554 555 // cookWriteRunSummaries writes out the per-run summary files. 556 func (l *writeLoader) cookWriteRunSummaries() error { 557 for _, w := range l.workloads { 558 for _, r := range w.days { 559 // Write out files preserving the original directory structure for 560 // ease of understanding / debugging. 561 outputPath := filepath.Join(l.summaryDir, r.summaryFilename()) 562 if err := outputWriteRunSummary(r, outputPath); err != nil { 563 return err 564 } 565 } 566 } 567 return nil 568 } 569 570 // outputWriteRunSummary is a helper that generates the summary JSON for the 571 // writeRun and writes it to the given output path. 572 func outputWriteRunSummary(r *writeRun, outputPath string) error { 573 f, err := os.OpenFile(outputPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) 574 if err != nil { 575 return err 576 } 577 defer func() { _ = f.Close() }() 578 579 b, err := r.formatSummaryJSON() 580 if err != nil { 581 return err 582 } 583 b = append(b, '\n') 584 585 _, err = f.Write(b) 586 return err 587 } 588 589 // parseWrite parses the raw write-throughput benchmark data and writes out the 590 // summary files. 591 func parseWrite(dataDir, summaryDir string) error { 592 l := newWriteLoader(dataDir, summaryDir) 593 if err := l.loadCooked(); err != nil { 594 return err 595 } 596 597 if err := l.loadRaw(); err != nil { 598 return err 599 } 600 601 if err := l.cookSummary(); err != nil { 602 return err 603 } 604 605 return l.cookWriteRunSummaries() 606 }