github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/experiments/internal/pkg/tabulator/tabulator.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package tabulator is used to store experiment results into a BigQuery table
    16  package tabulator
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"strconv"
    24  	"strings"
    25  
    26  	"github.com/bazelbuild/reclient/experiments/internal/pkg/gcs"
    27  
    28  	"cloud.google.com/go/bigquery"
    29  	"google.golang.org/protobuf/encoding/prototext"
    30  
    31  	spb "github.com/bazelbuild/reclient/api/stats"
    32  	epb "github.com/bazelbuild/reclient/experiments/api/experiment"
    33  
    34  	log "github.com/golang/glog"
    35  )
    36  
    37  const (
    38  	dataset = "results"
    39  	table   = "results"
    40  )
    41  
    42  // RunExperimentResultCollector stores an experiment's results into a BigQuery table for
    43  // easy querying.
    44  func RunExperimentResultCollector(resBucket string, expName string,
    45  	gcpProject string) error {
    46  	ctx := context.Background()
    47  	configs, err := gcs.List(ctx, fmt.Sprintf("gs://%v/%v", resBucket, expName))
    48  	if err != nil {
    49  		return fmt.Errorf("Failed to find directory for experiment %v: %v", expName, err)
    50  	}
    51  	exp := &epb.Results{Name: expName}
    52  	for _, c := range configs {
    53  		if strings.HasSuffix(c, "textproto") {
    54  			exp.ConfigUrl = c
    55  			continue
    56  		}
    57  		exp.ConfigResults = append(exp.ConfigResults, getConfig(ctx, c))
    58  	}
    59  	client, err := bigquery.NewClient(ctx, gcpProject)
    60  	if err != nil {
    61  		return fmt.Errorf("Failed to create bigquery client: %v", err)
    62  	}
    63  	schema, err := bigquery.InferSchema(epb.Results{})
    64  	if err != nil {
    65  		return fmt.Errorf("Failed to generate schema: %v", err)
    66  	}
    67  	schema = schema.Relax()
    68  	t := client.Dataset(dataset).Table(table)
    69  	if _, err := t.Metadata(ctx); err != nil {
    70  		if err := t.Create(ctx, &bigquery.TableMetadata{Schema: schema}); err != nil {
    71  			return fmt.Errorf("Failed to create table: %v", err)
    72  		}
    73  	}
    74  	if err := t.Uploader().Put(ctx, exp); err != nil {
    75  		return fmt.Errorf("Failed to insert experiment: %v", err)
    76  		if multiError, ok := err.(bigquery.PutMultiError); ok {
    77  			for _, err1 := range multiError {
    78  				for _, err2 := range err1.Errors {
    79  					return fmt.Errorf("Failed to insert: %v", err2)
    80  				}
    81  			}
    82  		} else {
    83  			return fmt.Errorf("Failed to insert: %v", err)
    84  		}
    85  	}
    86  	fmt.Printf("Results uploaded to bigquery. You can query bigquery as follows: \n"+
    87  		"SELECT * FROM `%v.%v.%v` WHERE name='%v'\n"+
    88  		"Or in plx as follows:\n"+
    89  		"SET bigquery_billing_project = '%v';\n"+
    90  		"SELECT c.name, AVG(d) as avg_duration\n"+
    91  		"FROM `%v.%v.%v` r, UNNEST(ConfigResults) c, UNNEST(c.durations) d\n"+
    92  		"WHERE r.Name = '%v'\n"+
    93  		"GROUP BY 1;\n", gcpProject, dataset, table, expName, gcpProject, gcpProject, dataset, table, expName)
    94  	return nil
    95  }
    96  
    97  func getConfig(ctx context.Context, path string) *epb.ConfigurationResult {
    98  	trials, err := gcs.List(ctx, path)
    99  	if err != nil {
   100  		log.Warningf("Failed to find trials for experiment %v: %v", path, err)
   101  	}
   102  	cr := &epb.ConfigurationResult{Name: filepath.Base(path)}
   103  	log.Infof("Trials: %v", trials)
   104  	for _, t := range trials {
   105  		if err := gcs.Copy(ctx, fmt.Sprintf("%vtime.txt", t), "/tmp/time.txt"); err != nil {
   106  			log.Warningf("Couldn't find elapsed time for %v: %v", t, err)
   107  			continue
   108  		}
   109  		dur, err := os.ReadFile("/tmp/time.txt")
   110  		if err != nil {
   111  			log.Warningf("Couldn't read elapsed time from /tmp/time.txt: %v", err)
   112  			continue
   113  		}
   114  		d, err := strconv.ParseInt(strings.Trim(string(dur), "s\n"), 10, 64)
   115  		if err != nil {
   116  			log.Warningf("Couldn't parse duration in %v: %v", string(dur), err)
   117  			continue
   118  		}
   119  		cr.Durations = append(cr.Durations, d)
   120  		if err := gcs.Copy(ctx, fmt.Sprintf("%vrbe_metrics.txt", t), "/tmp/rbe_metrics.txt"); err != nil {
   121  			log.Warningf("Couldn't find RBE metrics for %v: %v", t, err)
   122  			continue
   123  		}
   124  		data, err := os.ReadFile("/tmp/rbe_metrics.txt")
   125  		if err != nil {
   126  			log.Warningf("Couldn't read RBE metrics for %v: %v", t, err)
   127  			continue
   128  		}
   129  		stats := &spb.Stats{}
   130  		if err := prototext.Unmarshal(data, stats); err != nil {
   131  			log.Warningf("Couldn't unmarshal RBE metrics for %v: %v", t, err)
   132  			continue
   133  		}
   134  		cr.Stats = append(cr.Stats, &epb.Stats{
   135  			NumRecords:   stats.NumRecords,
   136  			Stats:        stats.Stats,
   137  			ToolVersion:  stats.ToolVersion,
   138  			Verification: stats.Verification,
   139  		})
   140  	}
   141  	return cr
   142  }