github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/experiments/internal/pkg/tabulator/tabulator.go (about) 1 // Copyright 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tabulator is used to store experiment results into a BigQuery table 16 package tabulator 17 18 import ( 19 "context" 20 "fmt" 21 "os" 22 "path/filepath" 23 "strconv" 24 "strings" 25 26 "github.com/bazelbuild/reclient/experiments/internal/pkg/gcs" 27 28 "cloud.google.com/go/bigquery" 29 "google.golang.org/protobuf/encoding/prototext" 30 31 spb "github.com/bazelbuild/reclient/api/stats" 32 epb "github.com/bazelbuild/reclient/experiments/api/experiment" 33 34 log "github.com/golang/glog" 35 ) 36 37 const ( 38 dataset = "results" 39 table = "results" 40 ) 41 42 // RunExperimentResultCollector stores an experiment's results into a BigQuery table for 43 // easy querying. 44 func RunExperimentResultCollector(resBucket string, expName string, 45 gcpProject string) error { 46 ctx := context.Background() 47 configs, err := gcs.List(ctx, fmt.Sprintf("gs://%v/%v", resBucket, expName)) 48 if err != nil { 49 return fmt.Errorf("Failed to find directory for experiment %v: %v", expName, err) 50 } 51 exp := &epb.Results{Name: expName} 52 for _, c := range configs { 53 if strings.HasSuffix(c, "textproto") { 54 exp.ConfigUrl = c 55 continue 56 } 57 exp.ConfigResults = append(exp.ConfigResults, getConfig(ctx, c)) 58 } 59 client, err := bigquery.NewClient(ctx, gcpProject) 60 if err != nil { 61 return fmt.Errorf("Failed to create bigquery client: %v", err) 62 } 63 schema, err := bigquery.InferSchema(epb.Results{}) 64 if err != nil { 65 return fmt.Errorf("Failed to generate schema: %v", err) 66 } 67 schema = schema.Relax() 68 t := client.Dataset(dataset).Table(table) 69 if _, err := t.Metadata(ctx); err != nil { 70 if err := t.Create(ctx, &bigquery.TableMetadata{Schema: schema}); err != nil { 71 return fmt.Errorf("Failed to create table: %v", err) 72 } 73 } 74 if err := t.Uploader().Put(ctx, exp); err != nil { 75 return fmt.Errorf("Failed to insert experiment: %v", err) 76 if multiError, ok := err.(bigquery.PutMultiError); ok { 77 for _, err1 := range multiError { 78 for _, err2 := range err1.Errors { 79 return fmt.Errorf("Failed to insert: %v", err2) 80 } 81 } 82 } else { 83 return fmt.Errorf("Failed to insert: %v", err) 84 } 85 } 86 fmt.Printf("Results uploaded to bigquery. You can query bigquery as follows: \n"+ 87 "SELECT * FROM `%v.%v.%v` WHERE name='%v'\n"+ 88 "Or in plx as follows:\n"+ 89 "SET bigquery_billing_project = '%v';\n"+ 90 "SELECT c.name, AVG(d) as avg_duration\n"+ 91 "FROM `%v.%v.%v` r, UNNEST(ConfigResults) c, UNNEST(c.durations) d\n"+ 92 "WHERE r.Name = '%v'\n"+ 93 "GROUP BY 1;\n", gcpProject, dataset, table, expName, gcpProject, gcpProject, dataset, table, expName) 94 return nil 95 } 96 97 func getConfig(ctx context.Context, path string) *epb.ConfigurationResult { 98 trials, err := gcs.List(ctx, path) 99 if err != nil { 100 log.Warningf("Failed to find trials for experiment %v: %v", path, err) 101 } 102 cr := &epb.ConfigurationResult{Name: filepath.Base(path)} 103 log.Infof("Trials: %v", trials) 104 for _, t := range trials { 105 if err := gcs.Copy(ctx, fmt.Sprintf("%vtime.txt", t), "/tmp/time.txt"); err != nil { 106 log.Warningf("Couldn't find elapsed time for %v: %v", t, err) 107 continue 108 } 109 dur, err := os.ReadFile("/tmp/time.txt") 110 if err != nil { 111 log.Warningf("Couldn't read elapsed time from /tmp/time.txt: %v", err) 112 continue 113 } 114 d, err := strconv.ParseInt(strings.Trim(string(dur), "s\n"), 10, 64) 115 if err != nil { 116 log.Warningf("Couldn't parse duration in %v: %v", string(dur), err) 117 continue 118 } 119 cr.Durations = append(cr.Durations, d) 120 if err := gcs.Copy(ctx, fmt.Sprintf("%vrbe_metrics.txt", t), "/tmp/rbe_metrics.txt"); err != nil { 121 log.Warningf("Couldn't find RBE metrics for %v: %v", t, err) 122 continue 123 } 124 data, err := os.ReadFile("/tmp/rbe_metrics.txt") 125 if err != nil { 126 log.Warningf("Couldn't read RBE metrics for %v: %v", t, err) 127 continue 128 } 129 stats := &spb.Stats{} 130 if err := prototext.Unmarshal(data, stats); err != nil { 131 log.Warningf("Couldn't unmarshal RBE metrics for %v: %v", t, err) 132 continue 133 } 134 cr.Stats = append(cr.Stats, &epb.Stats{ 135 NumRecords: stats.NumRecords, 136 Stats: stats.Stats, 137 ToolVersion: stats.ToolVersion, 138 Verification: stats.Verification, 139 }) 140 } 141 return cr 142 }