github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/cmd/bigquery/main.go (about) 1 // Copyright 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Binary biqeury is used to stream an reproxy_log generated from a build 16 // using re-client to bigquery so that it can be further queried upon. 17 // 18 // Example invocation (assuming the bigquery table already exists): 19 // 20 // bazelisk run //cmd/bigquery:bigquery -- \ 21 // --log_path text:///tmp/reproxy_log.txt \ 22 // --alsologtostderr=true \ 23 // --table <bigquery-dataset-id>.<bigquery-table-id> \ 24 // --project_id <gcp-project-id> # (ex:"foundry-x-experiments") 25 // 26 // If you don't have a bigquery table yet, you can create it using the following steps: 27 // 1. Run scripts/gen_reproxy_log_big_query_schema.sh 28 // 2. Run the following command using "bq" tool to create table: 29 // bq mk --table \ 30 // --expiration 600 \ # in seconds. This argument is optional and the table doesn't expire if you don't set it. 31 // foundry-x-experiments:reproxylogs.reproxy_log_1 \ # Format: <project-id>:<dataset-id>.<table-id> 32 // `pwd`/reproxy_log_bigquery_schema/proxy/reproxy_log.schema 33 // 34 // Note: It can take upto 5mins for the bigquery table to become active 35 // after it is created. 36 package main 37 38 import ( 39 "context" 40 "flag" 41 "fmt" 42 "sync" 43 "time" 44 45 "github.com/bazelbuild/reclient/internal/pkg/bigquery" 46 "github.com/bazelbuild/reclient/internal/pkg/bigquerytranslator" 47 "github.com/bazelbuild/reclient/internal/pkg/logger" 48 "github.com/bazelbuild/reclient/internal/pkg/rbeflag" 49 50 lpb "github.com/bazelbuild/reclient/api/log" 51 52 log "github.com/golang/glog" 53 "golang.org/x/sync/errgroup" 54 ) 55 56 var ( 57 // TODO: support --proxy_log_dir. 58 logPath = flag.String("log_path", "", "If provided, the path to a log file of all executed records. The format is e.g. text:///full/file/path.") 59 projectID = flag.String("project_id", "foundry-x-experiments", "The project containing the big query table to which log records should be streamed to.") 60 tableSpec = flag.String("table", "reproxy_log_test.test_1", "Resource specifier of the BigQuery to which log records should be streamed to. If the project is not provided in the specifier project_id will be used.") 61 numConcurrentOps = flag.Int("num_concurrent_uploads", 100, "Number of concurrent upload operations to perform.") 62 ) 63 64 func insertRows(logs []*lpb.LogRecord) error { 65 ctx := context.Background() 66 inserter, cleanup, err := bigquery.NewInserter(ctx, *tableSpec, *projectID, nil) 67 defer cleanup() 68 if err != nil { 69 return fmt.Errorf("bigquery.NewInserter: %v", err) 70 } 71 72 items := make(chan *bigquerytranslator.Item, *numConcurrentOps) 73 74 g, _ := errgroup.WithContext(context.Background()) 75 76 var processed int32 77 var processedMu sync.Mutex 78 79 for i := 0; i < *numConcurrentOps; i++ { 80 g.Go(func() error { 81 for item := range items { 82 if err := inserter.Put(ctx, item); err != nil { 83 // In case of error (gpaste/6313679673360384), retrying the job with 84 // back-off as described in BigQuery Service Level Agreement 85 // https://cloud.google.com/bigquery/sla 86 time.Sleep(1 * time.Second) 87 if err := inserter.Put(ctx, item); err != nil { 88 log.Errorf("Failed to insert record after retry: %v", err) 89 return err 90 } 91 } 92 processedMu.Lock() 93 processed++ 94 processedMu.Unlock() 95 } 96 return nil 97 }) 98 } 99 go func() { 100 for range time.Tick(5 * time.Second) { 101 processedMu.Lock() 102 log.Infof("Finished %v/%v items...", int(processed), len(logs)) 103 processedMu.Unlock() 104 } 105 }() 106 if len(logs) < 1 { 107 log.Infof("No items to load to bigquery.") 108 return nil 109 } 110 log.Infof("Total number of items: %v", len(logs)) 111 for _, r := range logs { 112 items <- &bigquerytranslator.Item{r} 113 } 114 close(items) 115 116 if err := g.Wait(); err != nil { 117 log.Errorf("Error while uploading to bigquery: %v", err) 118 } 119 return nil 120 } 121 122 func main() { 123 defer log.Flush() 124 rbeflag.Parse() 125 if *logPath == "" { 126 log.Fatal("Must provide proxy log path.") 127 } 128 129 log.Infof("Loading stats from %v...", *logPath) 130 logRecords, err := logger.ParseFromFormatFile(*logPath) 131 if err != nil { 132 log.Fatalf("Failed reading proxy log: %v", err) 133 } 134 135 log.Infof("Inserting stats into bigquery table...") 136 if err := insertRows(logRecords); err != nil { 137 log.Fatalf("Unable to insert records into bigquery table: %+v", err) 138 } 139 }