
     1  package main
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"cmp"
     7  	"compress/bzip2"
     8  	"compress/gzip"
     9  	"encoding/json"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"os"
    14  	"path/filepath"
    15  	"slices"
    16  	"sort"
    17  	"strings"
    18  	"time"
    20  	""
    21  	""
    22  )
    24  // A note to the reader on nomenclature used in this command.
    25  //
    26  // The write-throughput benchmark is generated by a roachtest with a number of
    27  // independent worker VMs running the same benchmark (to allow for an average
    28  // value to be recorded).
    29  //
    30  // An instance of the roachtest on a given day, for a given workload type (e.g.
    31  // values of size 1024B, values of size 64B, etc.) is modelled as a `writeRun`.
    32  // Each worker VM in a `writeRun` produces data modelled as a `rawWriteRun`.
    33  // Each `rawWriteRun` contains the raw data points emitted periodically by the
    34  // VM and are modelled as `writePoint`s.
    35  //
    36  // A `writeWorkload` (i.e. singular) models all data for a particular type of
    37  // benchmark run (e.g. values of size 1024B), across all days. It is a mapping
    38  // of day to `writeRun`, which is a collection of `rawWriteRun`s.
    39  //
    40  // The `writeWorkloads` (i.e. plural) is a mapping from workload name to its
    41  // `writeWorkload`.
    42  //
    43  // The data can be thought of being modelled as follows:
    44  //
    45  //                                     `writeWorkloads`---------\
    46  // - workload-name-A:                  `writeWorkload`-------\  |
    47  //   - day-1:                          `writeRun`---------\  |  |
    48  //      - VM-1:                        `rawWriteRun`----\ |  |  |
    49  //        [ ... raw data point ... ]   `writePoint`     x |  |  |
    50  //        ...                                             |  |  |
    51  //      - VM-N:                                           |  |  |
    52  //     	  [ ... raw data point ... ]                      x  |  |
    53  //     ...                                                   |  |
    54  //   - day-N:                                                |  |
    55  //      - VM-1:                                              |  |
    56  //     	  [ ... raw data point ... ]                         |  |
    57  //        ...                                                |  |
    58  //      - VM-N:                                              |  |
    59  //     	  [ ... raw data point ... ]                         x  |
    60  //   ...                                                        |
    61  // - workload-name-Z:                                           |
    62  //   - day-1:                                                   |
    63  //      - VM-1:                                                 |
    64  //     	  [ ... raw data point ... ]                            |
    65  //        ...                                                   |
    66  //      - VM-N:                                                 |
    67  //     	  [ ... raw data point ... ]                            |
    68  //     ...                                                      |
    69  //   - day-N:                                                   |
    70  //      - VM-1:                                                 |
    71  //     	  [ ... raw data point ... ]                            |
    72  //        ...                                                   |
    73  //      - VM-N:                                                 |
    74  //     	  [ ... raw data point ... ]                            x
    76  const (
    77  	// summaryFilename is the filename for the top-level summary output.
    78  	summaryFilename = "summary.json"
    80  	// rawRunFmt is the format string for raw benchmark data.
    81  	rawRunFmt = "BenchmarkRaw%s %d ops/sec %v pass %s elapsed %d bytes %d levels %f writeAmp"
    82  )
    84  func getWriteCommand() *cobra.Command {
    85  	c := &cobra.Command{
    86  		Use:   "write",
    87  		Short: "parse write throughput benchmark data",
    88  		Long: `
    89  Parses write-throughput benchmark data into two sets of JSON "summary" files:
    91  1. A top-level summary.json file. Data in this file is reported per-day, per
    92  workload (i.e. values=1024, etc.), and is responsible for the top-level
    93  write-throughput visualizations on the Pebble benchmarks page.
    95  Each data-point for a time-series contains an ops/sec figure (measured as a
    96  simple average over all data points for that workload run), and a relative path
    97  to a per-run summary JSON file, containing the raw data for the run.
    99  2. A per-run *-summary.json file. Data in this file contains the raw data for
   100  each of the benchmark instances participating in the workload run on the given
   101  day. Each key in the file is the relative path to the original raw data file.
   102  Each data point contains the calculated optimal ops/sec for the instance of the
   103  run (see split.go for more detail on the algorithm), in addition to the raw data
   104  in CSV format.
   106  This command can be run without flags at the root of the directory containing
   107  the raw data. By default the raw data will be pulled from "data", and the
   108  resulting top-level and per-run summary files are written to "write-throughput".
   109  Both locations can be overridden with the --data-dir and --summary-dir flags,
   110  respectively.
   111  `,
   112  		RunE: func(cmd *cobra.Command, args []string) error {
   113  			dataDir, err := cmd.Flags().GetString("data-dir")
   114  			if err != nil {
   115  				return err
   116  			}
   118  			summaryDir, err := cmd.Flags().GetString("summary-dir")
   119  			if err != nil {
   120  				return err
   121  			}
   123  			return parseWrite(dataDir, summaryDir)
   124  		},
   125  	}
   127  	c.Flags().String("data-dir", "data", "path to the raw data directory")
   128  	c.Flags().String("summary-dir", "write-throughput", "output directory containing the summary files")
   129  	c.SilenceUsage = true
   131  	return c
   132  }
   134  // writePoint is a raw datapoint from an individual write-throughput benchmark
   135  // run.
   136  type writePoint struct {
   137  	elapsedSecs int
   138  	opsSec      int
   139  	passed      bool
   140  	size        uint64
   141  	levels      int
   142  	writeAmp    float64
   143  }
   145  // formatCSV returns a comma-separated string representation of the datapoint.
   146  func (p writePoint) formatCSV() string {
   147  	return fmt.Sprintf(
   148  		"%d,%d,%v,%d,%d,%.2f",
   149  		p.elapsedSecs, p.opsSec, p.passed, p.size, p.levels, p.writeAmp)
   150  }
   152  // rawWriteRun is a collection of datapoints from a single instance of a
   153  // benchmark run (i.e. datapoints comprising a single roachtest instance of a
   154  // write-throughput benchmark).
   155  type rawWriteRun struct {
   156  	points []writePoint
   157  	split  int // memoized
   158  }
   160  // opsPerSecSplit returns an optimal-split point that divides the passes and
   161  // fails from the datapoints in a rawWriteRun.
   162  func (r *rawWriteRun) opsPerSecSplit() int {
   163  	if r.split > 0 {
   164  		return r.split
   165  	}
   167  	// Pre-process by partitioning the datapoint into passes and fails.
   168  	var passes, fails []int
   169  	for _, p := range r.points {
   170  		if p.passed {
   171  			passes = append(passes, p.opsSec)
   172  		} else {
   173  			fails = append(fails, p.opsSec)
   174  		}
   175  	}
   177  	// Compute and cache the split point as we only need to calculate it once.
   178  	split := findOptimalSplit(passes, fails)
   179  	r.split = split
   181  	return split
   182  }
   184  // writeAmp returns the value of the write-amplification at the end of the run.
   185  func (r *rawWriteRun) writeAmp() float64 {
   186  	return r.points[len(r.points)-1].writeAmp
   187  }
   189  // formatCSV returns a comma-separated string representation of the rawWriteRun.
   190  // The value itself is a newline-delimited string value comprised of the CSV
   191  // representation of the individual writePoints.
   192  func (r rawWriteRun) formatCSV() string {
   193  	var b bytes.Buffer
   194  	for _, p := range r.points {
   195  		_, _ = fmt.Fprintf(&b, "%s\n", p.formatCSV())
   196  	}
   197  	return b.String()
   198  }
   200  // writeRunSummary represents a single summary datapoint across all rawWriteRuns
   201  // that comprise a writeRun. The datapoint contains a summary ops-per-second
   202  // value, in addition to a path to the summary.json file with the combined data
   203  // for the run.
   204  type writeRunSummary struct {
   205  	Name        string  `json:"name"`
   206  	Date        string  `json:"date"`
   207  	OpsSec      int     `json:"opsSec"`
   208  	WriteAmp    float64 `json:"writeAmp"`
   209  	SummaryPath string  `json:"summaryPath"`
   210  }
   212  // writeWorkloadSummary is an alias for a slice of writeRunSummaries.
   213  type writeWorkloadSummary []writeRunSummary
   215  // writeRun is a collection of one or more rawWriteRuns (i.e. the union of all
   216  // rawWriteRuns from each worker participating in the roachtest cluster used for
   217  // running the write-throughput benchmarks).
   218  type writeRun struct {
   219  	// name is the benchmark workload name (i.e. "values=1024").
   220  	name string
   222  	// date is the date on which the writeRun took place.
   223  	date string
   225  	// dir is path to the directory containing the raw data. The path is
   226  	// relative to the data-dir.
   227  	dir string
   229  	// rawRuns is a map from input data filename to its rawWriteRun data.
   230  	rawRuns map[string]rawWriteRun
   231  }
   233  // summaryFilename returns the filename to be used for storing the summary
   234  // output for the writeRun. The filename preserves the original data source path
   235  // for ease of debugging / data-provenance.
   236  func (r writeRun) summaryFilename() string {
   237  	parts := strings.Split(r.dir, string(os.PathSeparator))
   238  	parts = append(parts, summaryFilename)
   239  	return strings.Join(parts, "-")
   240  }
   242  // summarize computes a writeRunSummary datapoint for the writeRun.
   243  func (r writeRun) summarize() writeRunSummary {
   244  	var (
   245  		sumOpsSec   int
   246  		sumWriteAmp float64
   247  	)
   248  	for _, rr := range r.rawRuns {
   249  		sumOpsSec += rr.opsPerSecSplit()
   250  		sumWriteAmp += rr.writeAmp()
   251  	}
   252  	l := len(r.rawRuns)
   254  	return writeRunSummary{
   255  		Name:,
   256  		Date:,
   257  		SummaryPath: r.summaryFilename(),
   258  		// Calculate an average across all raw runs in this run.
   259  		// TODO(travers): test how this works in practice, after we have
   260  		// gathered enough data.
   261  		OpsSec:   sumOpsSec / l,
   262  		WriteAmp: math.Round(100*sumWriteAmp/float64(l)) / 100, // round to 2dp.
   263  	}
   264  }
   266  // cookedWriteRun is a representation of a previously parsed (or "cooked")
   267  // writeRun.
   268  type cookedWriteRun struct {
   269  	OpsSec int    `json:"opsSec"`
   270  	Raw    string `json:"rawData"`
   271  }
   273  // formatSummaryJSON returns a JSON representation of the combined raw data from
   274  // all rawWriteRuns that comprise the writeRun. It has the form:
   275  //
   276  //	{
   277  //	  "original-raw-write-run-log-file-1.gz": {
   278  //	    "opsSec": ...,
   279  //	    "raw": ...,
   280  //	  },
   281  //	   ...
   282  //	  "original-raw-write-run-log-file-N.gz": {
   283  //	    "opsSec": ...,
   284  //	    "raw": ...,
   285  //	  },
   286  //	}
   287  func (r writeRun) formatSummaryJSON() ([]byte, error) {
   288  	m := make(map[string]cookedWriteRun)
   289  	for name, data := range r.rawRuns {
   290  		m[name] = cookedWriteRun{
   291  			OpsSec: data.opsPerSecSplit(),
   292  			Raw:    data.formatCSV(),
   293  		}
   294  	}
   295  	return prettyJSON(&m), nil
   296  }
   298  // write workload is a map from "day" to corresponding writeRun, for a given
   299  // write-throughput benchmark workload (i.e. values=1024).
   300  type writeWorkload struct {
   301  	days map[string]*writeRun // map from day to runs for the given workload
   302  }
   304  // writeWorkloads is an alias for a map from workload name to its corresponding
   305  // map from day to writeRun.
   306  type writeWorkloads map[string]*writeWorkload
   308  // nameDay is a (name, day) tuple, used as a map key.
   309  type nameDay struct {
   310  	name, day string
   311  }
   313  type writeLoader struct {
   314  	// rootDir is the path to the root directory containing the data.
   315  	dataDir string
   317  	// summaryFilename is the name of the file containing the summary data.
   318  	summaryDir string
   320  	// workloads is a map from workload name to its corresponding data.
   321  	workloads writeWorkloads
   323  	// cooked is a "set" of (workload, day) tuples representing whether
   324  	// previously parsed data was present for the (workload, day).
   325  	cooked map[nameDay]bool
   327  	// cookedSummaries is a map from workload name to previously generated data
   328  	// for the workload. This data is "mixed-in" with new data when the summary
   329  	// files are written out.
   330  	cookedSummaries map[string]writeWorkloadSummary
   331  }
   333  // newWriteLoader returns a new writeLoader that can be used to generate the
   334  // summary files for write-throughput benchmarking data.
   335  func newWriteLoader(dataDir, summaryDir string) *writeLoader {
   336  	return &writeLoader{
   337  		dataDir:         dataDir,
   338  		summaryDir:      summaryDir,
   339  		workloads:       make(writeWorkloads),
   340  		cooked:          make(map[nameDay]bool),
   341  		cookedSummaries: make(map[string]writeWorkloadSummary),
   342  	}
   343  }
   345  // loadCooked loads previously summarized write throughput benchmark data.
   346  func (l *writeLoader) loadCooked() error {
   347  	b, err := os.ReadFile(filepath.Join(l.summaryDir, summaryFilename))
   348  	if err != nil {
   349  		// The first ever run will not find the summary file. Return early in
   350  		// this case, and we'll start afresh.
   351  		if oserror.IsNotExist(err) {
   352  			return nil
   353  		}
   354  		return err
   355  	}
   357  	// Reconstruct the summary.
   358  	summaries := make(map[string]writeWorkloadSummary)
   359  	err = json.Unmarshal(b, &summaries)
   360  	if err != nil {
   361  		return err
   362  	}
   364  	// Populate the cooked map.
   365  	l.cookedSummaries = summaries
   367  	// Populate the set used for determining whether we can skip a raw file.
   368  	for name, workloadSummary := range summaries {
   369  		for _, runSummary := range workloadSummary {
   370  			l.cooked[nameDay{name, runSummary.Date}] = true
   371  		}
   372  	}
   374  	return nil
   375  }
   377  // loadRaw loads the raw data from the root data directory.
   378  func (l *writeLoader) loadRaw() error {
   379  	walkFn := func(path, pathRel string, info os.FileInfo) error {
   380  		// The relative directory structure is of the form:
   381  		//   $day/pebble/write/$name/$run/$file
   382  		parts := strings.Split(pathRel, string(os.PathSeparator))
   383  		if len(parts) < 6 {
   384  			return nil // stumble forward on invalid paths
   385  		}
   387  		// Filter out files that aren't in write benchmark directories.
   388  		if parts[2] != "write" {
   389  			return nil
   390  		}
   391  		day := parts[0]
   393  		f, err := os.Open(path)
   394  		if err != nil {
   395  			_, _ = fmt.Fprintf(os.Stderr, "%+v\n", err)
   396  			return nil // stumble forward on error
   397  		}
   398  		defer func() { _ = f.Close() }()
   400  		rd := io.Reader(f)
   401  		if strings.HasSuffix(path, ".bz2") {
   402  			rd = bzip2.NewReader(f)
   403  		} else if strings.HasSuffix(path, ".gz") {
   404  			var err error
   405  			rd, err = gzip.NewReader(f)
   406  			if err != nil {
   407  				_, _ = fmt.Fprintf(os.Stderr, "%+v\n", err)
   408  				return nil // stumble forward on error
   409  			}
   410  		}
   412  		// Parse the data for this file and add to the appropriate workload.
   413  		s := bufio.NewScanner(rd)
   414  		r := rawWriteRun{}
   415  		var name string
   416  		for s.Scan() {
   417  			line := s.Text()
   418  			if !strings.HasPrefix(line, "BenchmarkRaw") {
   419  				continue
   420  			}
   422  			var p writePoint
   423  			var nameInner, elapsed string
   424  			n, err := fmt.Sscanf(line, rawRunFmt,
   425  				&nameInner, &p.opsSec, &p.passed, &elapsed, &p.size, &p.levels, &p.writeAmp)
   426  			if err != nil || n != 7 {
   427  				// Stumble forward on error.
   428  				_, _ = fmt.Fprintf(os.Stderr, "%s: %v\n", s.Text(), err)
   429  				continue
   430  			}
   432  			// The first datapoint we see in the file is assumed to be the same
   433  			// for all datapoints.
   434  			if name == "" {
   435  				name = nameInner
   437  				// Skip files for (workload, day) pairs that have been parsed
   438  				// previously. Note that this relies on loadCooked having been
   439  				// called previously to seed the map with cooked data.
   440  				if ok := l.cooked[nameDay{name, day}]; ok {
   441  					_, _ = fmt.Fprintf(os.Stderr,
   442  						"skipping previously cooked data in file %s (workload=%q, day=%q)\n",
   443  						pathRel, name, day)
   444  					return nil
   445  				}
   446  			} else if name != nameInner {
   447  				_, _ = fmt.Fprintf(os.Stderr,
   448  					"WARN: benchmark name %q differs from previously seen name %q: %s",
   449  					nameInner, name, s.Text())
   450  			}
   452  			// Convert the elapsed time into seconds.
   453  			secs, err := time.ParseDuration(elapsed)
   454  			if err != nil {
   455  				// Stumble forward on error.
   456  				_, _ = fmt.Fprintf(os.Stderr, "%s: %v\n", s.Text(), err)
   457  				continue
   458  			}
   459  			p.elapsedSecs = int(secs.Seconds())
   461  			// Add this data point to the collection of points for this run.
   462  			r.points = append(r.points, p)
   463  		}
   465  		// Add the raw run to the map.
   466  		l.addRawRun(name, day, pathRel, r)
   468  		return nil
   469  	}
   470  	return walkDir(l.dataDir, walkFn)
   471  }
   473  // addRawRun adds a rawWriteRun to the corresponding datastructures by looking
   474  // up the workload name (i.e. "values=1024"), then appending the rawWriteRun to
   475  // the corresponding slice of all rawWriteRuns.
   476  func (l *writeLoader) addRawRun(name, day, path string, raw rawWriteRun) {
   477  	// Skip files with no points (i.e. files that couldn't be parsed).
   478  	if len(raw.points) == 0 {
   479  		return
   480  	}
   482  	_, _ = fmt.Fprintf(
   483  		os.Stderr, "adding raw run: (workload=%q, day=%q); nPoints=%d; file=%s\n",
   484  		name, day, len(raw.points), path)
   486  	w := l.workloads[name]
   487  	if w == nil {
   488  		w = &writeWorkload{days: make(map[string]*writeRun)}
   489  		l.workloads[name] = w
   490  	}
   492  	r := w.days[day]
   493  	if r == nil {
   494  		r = &writeRun{
   495  			name:    name,
   496  			date:    day,
   497  			dir:     filepath.Dir(path),
   498  			rawRuns: make(map[string]rawWriteRun),
   499  		}
   500  		w.days[day] = r
   501  	}
   502  	r.rawRuns[path] = raw
   503  }
   505  // cookSummary writes out the data in the loader to the summary file (new or
   506  // existing).
   507  func (l *writeLoader) cookSummary() error {
   508  	summary := make(map[string]writeWorkloadSummary)
   509  	for name, w := range l.workloads {
   510  		summary[name] = cookWriteSummary(w)
   511  	}
   513  	// Mix in the previously cooked values.
   514  	for name, cooked := range l.cookedSummaries {
   515  		existing, ok := summary[name]
   516  		if !ok {
   517  			summary[name] = cooked
   518  		} else {
   519  			// We must merge and re-sort by date.
   520  			existing = append(existing, cooked...)
   521  			slices.SortFunc(existing, func(a, b writeRunSummary) int {
   522  				return cmp.Compare(a.Date, b.Date)
   523  			})
   524  			summary[name] = existing
   525  		}
   526  	}
   527  	b := prettyJSON(&summary)
   528  	b = append(b, '\n')
   530  	outputPath := filepath.Join(l.summaryDir, summaryFilename)
   531  	err := os.WriteFile(outputPath, b, 0644)
   532  	if err != nil {
   533  		return err
   534  	}
   536  	return nil
   537  }
   539  // cookWriteSummary is a helper that generates the summary for a write workload
   540  // by computing the per-day summaries across all runs.
   541  func cookWriteSummary(w *writeWorkload) writeWorkloadSummary {
   542  	days := make([]string, 0, len(w.days))
   543  	for day := range w.days {
   544  		days = append(days, day)
   545  	}
   546  	sort.Strings(days)
   548  	var summary writeWorkloadSummary
   549  	for _, day := range days {
   550  		r := w.days[day]
   551  		summary = append(summary, r.summarize())
   552  	}
   554  	return summary
   555  }
   557  // cookWriteRunSummaries writes out the per-run summary files.
   558  func (l *writeLoader) cookWriteRunSummaries() error {
   559  	for _, w := range l.workloads {
   560  		for _, r := range w.days {
   561  			// Write out files preserving the original directory structure for
   562  			// ease of understanding / debugging.
   563  			outputPath := filepath.Join(l.summaryDir, r.summaryFilename())
   564  			if err := outputWriteRunSummary(r, outputPath); err != nil {
   565  				return err
   566  			}
   567  		}
   568  	}
   569  	return nil
   570  }
   572  // outputWriteRunSummary is a helper that generates the summary JSON for the
   573  // writeRun and writes it to the given output path.
   574  func outputWriteRunSummary(r *writeRun, outputPath string) error {
   575  	f, err := os.OpenFile(outputPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
   576  	if err != nil {
   577  		return err
   578  	}
   579  	defer func() { _ = f.Close() }()
   581  	b, err := r.formatSummaryJSON()
   582  	if err != nil {
   583  		return err
   584  	}
   585  	b = append(b, '\n')
   587  	_, err = f.Write(b)
   588  	return err
   589  }
   591  // parseWrite parses the raw write-throughput benchmark data and writes out the
   592  // summary files.
   593  func parseWrite(dataDir, summaryDir string) error {
   594  	l := newWriteLoader(dataDir, summaryDir)
   595  	if err := l.loadCooked(); err != nil {
   596  		return err
   597  	}
   599  	if err := l.loadRaw(); err != nil {
   600  		return err
   601  	}
   603  	if err := l.cookSummary(); err != nil {
   604  		return err
   605  	}
   607  	return l.cookWriteRunSummaries()
   608  }