gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/tools/bigquery/bigquery.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package bigquery defines a BigQuery schema for benchmarks.
    16  //
    17  // This package contains a schema for BigQuery and methods for publishing
    18  // benchmark data into tables.
    19  package bigquery
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"regexp"
    25  	"sort"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	bq "cloud.google.com/go/bigquery"
    31  	"google.golang.org/api/option"
    32  )
    33  
    34  // Suite is the top level structure for a benchmark run. BigQuery
    35  // will infer the schema from this.
    36  type Suite struct {
    37  	Name       string       `bq:"name"`
    38  	Conditions []*Condition `bq:"conditions"`
    39  	Benchmarks []*Benchmark `bq:"benchmarks"`
    40  	Official   bool         `bq:"official"`
    41  	Timestamp  time.Time    `bq:"timestamp"`
    42  }
    43  
    44  func (s *Suite) String() string {
    45  	var sb strings.Builder
    46  	s.debugString(&sb, "")
    47  	return sb.String()
    48  }
    49  
    50  // writeLine writes a line of text to the given string builder with a prefix.
    51  func writeLine(sb *strings.Builder, prefix string, format string, values ...any) {
    52  	if prefix != "" {
    53  		sb.WriteString(prefix)
    54  	}
    55  	sb.WriteString(fmt.Sprintf(format, values...))
    56  	sb.WriteString("\n")
    57  }
    58  
    59  // debugString writes debug information to the given string builder with the
    60  // given prefix.
    61  func (s *Suite) debugString(sb *strings.Builder, prefix string) {
    62  	writeLine(sb, prefix, "Benchmark suite %s:", s.Name)
    63  	writeLine(sb, prefix, "Timestamp: %v", s.Timestamp)
    64  	if !s.Official {
    65  		writeLine(sb, prefix, " **** NOTE: Data is not official. **** ")
    66  	}
    67  	if numConditions := len(s.Conditions); numConditions == 0 {
    68  		writeLine(sb, prefix, "Conditions: None.")
    69  	} else {
    70  		writeLine(sb, prefix, "Conditions (%d):", numConditions)
    71  		for _, condition := range s.Conditions {
    72  			condition.debugString(sb, prefix+"  ")
    73  		}
    74  	}
    75  	if numBenchmarks := len(s.Benchmarks); numBenchmarks == 0 {
    76  		writeLine(sb, prefix, "Benchmarks: None.")
    77  	} else {
    78  		writeLine(sb, prefix, "Benchmarks (%d):", numBenchmarks)
    79  		for _, benchmark := range s.Benchmarks {
    80  			benchmark.debugString(sb, prefix+"  ")
    81  		}
    82  	}
    83  	sb.WriteString(fmt.Sprintf("End of data for benchmark suite %s.", s.Name))
    84  }
    85  
    86  // Benchstat returns a benchstat-formatted output string.
    87  // See https://pkg.go.dev/golang.org/x/perf/cmd/benchstat
    88  // `includeConditions` contains names of `Condition`s that should be included
    89  // as part of the benchmark name.
    90  func (s *Suite) Benchstat(includeConditions []string) string {
    91  	var sb strings.Builder
    92  	benchmarkNames := make([]string, 0, len(s.Benchmarks))
    93  	benchmarks := make(map[string]*Benchmark, len(s.Benchmarks))
    94  	for _, bm := range s.Benchmarks {
    95  		if _, found := benchmarks[bm.Name]; !found {
    96  			benchmarkNames = append(benchmarkNames, bm.Name)
    97  			benchmarks[bm.Name] = bm
    98  		}
    99  	}
   100  	sort.Strings(benchmarkNames)
   101  	includeConditionsMap := make(map[string]bool, len(includeConditions))
   102  	for _, condName := range includeConditions {
   103  		includeConditionsMap[condName] = true
   104  	}
   105  	for _, bmName := range benchmarkNames {
   106  		benchmarks[bmName].benchstat(&sb, s.Name, includeConditionsMap, s.Conditions)
   107  	}
   108  	return sb.String()
   109  }
   110  
   111  // Benchmark represents an individual benchmark in a suite.
   112  type Benchmark struct {
   113  	Name      string       `bq:"name"`
   114  	Condition []*Condition `bq:"cond"`
   115  	Metric    []*Metric    `bq:"metric"`
   116  }
   117  
   118  // String implements the String method for Benchmark
   119  func (bm *Benchmark) String() string {
   120  	var sb strings.Builder
   121  	bm.debugString(&sb, "")
   122  	return sb.String()
   123  }
   124  
   125  // debugString writes debug information to the given string builder with the
   126  // given prefix.
   127  func (bm *Benchmark) debugString(sb *strings.Builder, prefix string) {
   128  	writeLine(sb, prefix, "Benchmark: %s", bm.Name)
   129  	if numConditions := len(bm.Condition); numConditions == 0 {
   130  		writeLine(sb, prefix, "  Conditions: None.")
   131  	} else {
   132  		writeLine(sb, prefix, "  Conditions (%d):", numConditions)
   133  		for _, condition := range bm.Condition {
   134  			condition.debugString(sb, prefix+"    ")
   135  		}
   136  	}
   137  	if numMetrics := len(bm.Metric); numMetrics == 0 {
   138  		writeLine(sb, prefix, "  Metrics: None.")
   139  	} else {
   140  		writeLine(sb, prefix, "  Metrics (%d):", numMetrics)
   141  		for _, metric := range bm.Metric {
   142  			metric.debugString(sb, prefix+"    ")
   143  		}
   144  	}
   145  }
   146  
   147  // noSpaceRe is used to remove whitespace characters in `noSpace`.
   148  var noSpaceRe = regexp.MustCompile("\\s+")
   149  
   150  // noSpace replaces whitespace characters from `s` with "_".
   151  func noSpace(s string) string {
   152  	return noSpaceRe.ReplaceAllString(s, "_")
   153  }
   154  
   155  // benchstat produces benchmark-formatted output for this Benchmark.
   156  func (bm *Benchmark) benchstat(sb *strings.Builder, suiteName string, includeConditions map[string]bool, suiteConditions []*Condition) {
   157  	var conditionsStr string
   158  	conditionNames := make([]string, 0, len(suiteConditions)+len(bm.Condition))
   159  	conditionMap := make(map[string]string, len(suiteConditions)+len(bm.Condition))
   160  	for _, c := range suiteConditions {
   161  		cName := noSpace(c.Name)
   162  		if _, found := conditionMap[cName]; !found && includeConditions[cName] {
   163  			conditionNames = append(conditionNames, cName)
   164  			conditionMap[cName] = noSpace(c.Value)
   165  		}
   166  	}
   167  	for _, c := range bm.Condition {
   168  		cName := noSpace(c.Name)
   169  		if _, found := conditionMap[cName]; !found && includeConditions[cName] {
   170  			conditionNames = append(conditionNames, cName)
   171  			conditionMap[cName] = noSpace(c.Value)
   172  		}
   173  	}
   174  	sort.Strings(conditionNames)
   175  	var conditionsBuilder strings.Builder
   176  	if len(conditionNames) > 0 {
   177  		conditionsBuilder.WriteByte('{')
   178  		for i, condName := range conditionNames {
   179  			if i != 0 {
   180  				conditionsBuilder.WriteByte(',')
   181  			}
   182  			conditionsBuilder.WriteString(condName)
   183  			conditionsBuilder.WriteByte('=')
   184  			conditionsBuilder.WriteString(conditionMap[condName])
   185  		}
   186  		conditionsBuilder.WriteByte('}')
   187  	}
   188  	conditionsStr = conditionsBuilder.String()
   189  	for _, m := range bm.Metric {
   190  		if !strings.HasPrefix(suiteName, "Benchmark") {
   191  			// benchstat format requires all benchmark names to start with "Benchmark".
   192  			sb.WriteString("Benchmark")
   193  		}
   194  		sb.WriteString(noSpace(suiteName))
   195  		if suiteName != bm.Name {
   196  			sb.WriteByte('/')
   197  			sb.WriteString(noSpace(bm.Name))
   198  		}
   199  		sb.WriteString(conditionsStr)
   200  		sb.WriteByte('/')
   201  		sb.WriteString(noSpace(m.Name))
   202  		sb.WriteString(" 1 ") // 1 sample
   203  		sb.WriteString(fmt.Sprintf("%f", m.Sample))
   204  		sb.WriteByte(' ')
   205  		sb.WriteString(noSpace(m.Unit))
   206  		sb.WriteByte('\n')
   207  	}
   208  }
   209  
   210  // AddMetric adds a metric to an existing Benchmark.
   211  func (bm *Benchmark) AddMetric(metricName, unit string, sample float64) {
   212  	m := &Metric{
   213  		Name:   metricName,
   214  		Unit:   unit,
   215  		Sample: sample,
   216  	}
   217  	bm.Metric = append(bm.Metric, m)
   218  }
   219  
   220  // AddCondition adds a condition to an existing Benchmark.
   221  func (bm *Benchmark) AddCondition(name, value string) {
   222  	bm.Condition = append(bm.Condition, NewCondition(name, value))
   223  }
   224  
   225  // NewBenchmark initializes a new benchmark.
   226  func NewBenchmark(name string, iters int) *Benchmark {
   227  	return &Benchmark{
   228  		Name:   name,
   229  		Metric: make([]*Metric, 0),
   230  		Condition: []*Condition{
   231  			{
   232  				Name:  "iterations",
   233  				Value: strconv.Itoa(iters),
   234  			},
   235  		},
   236  	}
   237  }
   238  
   239  // Condition represents qualifiers for the benchmark or suite. For example:
   240  // Get_Pid/1/real_time would have Benchmark Name "Get_Pid" with "1"
   241  // and "real_time" parameters as conditions. Suite conditions include
   242  // information such as the CL number and platform name.
   243  type Condition struct {
   244  	Name  string `bq:"name"`
   245  	Value string `bq:"value"`
   246  }
   247  
   248  // NewCondition returns a new Condition with the given name and value.
   249  func NewCondition(name, value string) *Condition {
   250  	return &Condition{
   251  		Name:  name,
   252  		Value: value,
   253  	}
   254  }
   255  
   256  func (c *Condition) String() string {
   257  	var sb strings.Builder
   258  	c.debugString(&sb, "")
   259  	return sb.String()
   260  }
   261  
   262  // debugString writes debug information to the given string builder with the
   263  // given prefix.
   264  func (c *Condition) debugString(sb *strings.Builder, prefix string) {
   265  	writeLine(sb, prefix, "Condition: %s = %s", c.Name, c.Value)
   266  }
   267  
   268  // Metric holds the actual metric data and unit information for this benchmark.
   269  type Metric struct {
   270  	Name   string  `bq:"name"`
   271  	Unit   string  `bq:"unit"`
   272  	Sample float64 `bq:"sample"`
   273  }
   274  
   275  func (m *Metric) String() string {
   276  	var sb strings.Builder
   277  	m.debugString(&sb, "")
   278  	return sb.String()
   279  }
   280  
   281  // debugString writes debug information to the given string builder with the
   282  // given prefix.
   283  func (m *Metric) debugString(sb *strings.Builder, prefix string) {
   284  	writeLine(sb, prefix, "Metric %s: %f %s", m.Name, m.Sample, m.Unit)
   285  }
   286  
   287  // InitBigQuery initializes a BigQuery dataset/table in the project. If the dataset/table already exists, it is not duplicated.
   288  func InitBigQuery(ctx context.Context, projectID, datasetID, tableID string, opts []option.ClientOption) error {
   289  	client, err := bq.NewClient(ctx, projectID, opts...)
   290  	if err != nil {
   291  		return fmt.Errorf("failed to initialize client on project %s: %v", projectID, err)
   292  	}
   293  	defer client.Close()
   294  
   295  	dataset := client.Dataset(datasetID)
   296  	if err := dataset.Create(ctx, nil); err != nil && !checkDuplicateError(err) {
   297  		return fmt.Errorf("failed to create dataset: %s: %v", datasetID, err)
   298  	}
   299  
   300  	table := dataset.Table(tableID)
   301  	schema, err := bq.InferSchema(Suite{})
   302  	if err != nil {
   303  		return fmt.Errorf("failed to infer schema: %v", err)
   304  	}
   305  
   306  	if err := table.Create(ctx, &bq.TableMetadata{Schema: schema}); err != nil && !checkDuplicateError(err) {
   307  		return fmt.Errorf("failed to create table: %s: %v", tableID, err)
   308  	}
   309  	return nil
   310  }
   311  
   312  // NewBenchmarkWithMetric creates a new sending to BigQuery, initialized with a
   313  // single iteration and single metric.
   314  func NewBenchmarkWithMetric(name, metric, unit string, value float64) *Benchmark {
   315  	b := NewBenchmark(name, 1)
   316  	b.AddMetric(metric, unit, value)
   317  	return b
   318  }
   319  
   320  // NewSuite initializes a new Suite.
   321  func NewSuite(name string, official bool) *Suite {
   322  	return &Suite{
   323  		Name:       name,
   324  		Timestamp:  time.Now().UTC(),
   325  		Benchmarks: make([]*Benchmark, 0),
   326  		Conditions: make([]*Condition, 0),
   327  		Official:   official,
   328  	}
   329  }
   330  
   331  // SendBenchmarks sends the slice of benchmarks to the BigQuery dataset/table.
   332  func SendBenchmarks(ctx context.Context, suite *Suite, projectID, datasetID, tableID string, opts []option.ClientOption) error {
   333  	client, err := bq.NewClient(ctx, projectID, opts...)
   334  	if err != nil {
   335  		return fmt.Errorf("failed to initialize client on project: %s: %v", projectID, err)
   336  	}
   337  	defer client.Close()
   338  
   339  	uploader := client.Dataset(datasetID).Table(tableID).Uploader()
   340  	if err = uploader.Put(ctx, suite); err != nil {
   341  		return fmt.Errorf("failed to upload benchmarks %s to project %s, table %s.%s: %v", suite.Name, projectID, datasetID, tableID, err)
   342  	}
   343  
   344  	return nil
   345  }
   346  
   347  // BigQuery will error "409" for duplicate tables and datasets.
   348  func checkDuplicateError(err error) bool {
   349  	return strings.Contains(err.Error(), "googleapi: Error 409: Already Exists")
   350  }