golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/buildstats/buildstats.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package buildstats contains code to sync the coordinator's build 6 // logs from Datastore to BigQuery. 7 package buildstats // import "golang.org/x/build/internal/buildstats" 8 import ( 9 "context" 10 "fmt" 11 "log" 12 "reflect" 13 "sort" 14 "strings" 15 "time" 16 17 "cloud.google.com/go/bigquery" 18 "cloud.google.com/go/datastore" 19 "golang.org/x/build/buildenv" 20 "golang.org/x/build/types" 21 "google.golang.org/api/googleapi" 22 "google.golang.org/api/iterator" 23 ) 24 25 // Verbose controls logging verbosity. 26 var Verbose = false 27 28 // SyncBuilds syncs the datastore "Build" entities to the BigQuery "Builds" table. 29 // This stores information on each build as a whole, without details. 30 func SyncBuilds(ctx context.Context, env *buildenv.Environment) error { 31 bq, err := bigquery.NewClient(ctx, env.ProjectName) 32 if err != nil { 33 return err 34 } 35 defer bq.Close() 36 37 buildsTable := bq.Dataset("builds").Table("Builds") 38 meta, err := buildsTable.Metadata(ctx) 39 if ae, ok := err.(*googleapi.Error); ok && ae.Code == 404 { 40 if Verbose { 41 log.Printf("Creating table Builds...") 42 } 43 err = buildsTable.Create(ctx, nil) 44 if err == nil { 45 meta, err = buildsTable.Metadata(ctx) 46 } 47 } 48 if err != nil { 49 return fmt.Errorf("getting Builds table metadata: %v", err) 50 } 51 if Verbose { 52 log.Printf("buildstats: Builds metadata: %#v", meta) 53 } 54 if len(meta.Schema) == 0 { 55 if Verbose { 56 log.Printf("buildstats: builds table has empty schema") 57 } 58 schema, err := bigquery.InferSchema(types.BuildRecord{}) 59 if err != nil { 60 return fmt.Errorf("InferSchema: %v", err) 61 } 62 blindWrite := "" 63 meta, err = buildsTable.Update(ctx, bigquery.TableMetadataToUpdate{Schema: schema}, blindWrite) 64 if err != nil { 65 return fmt.Errorf("table.Update schema: %v", err) 66 } 67 } 68 if Verbose { 69 for i, fs := range meta.Schema { 70 log.Printf(" schema[%v]: %+v", i, fs) 71 for j, fs := range fs.Schema { 72 log.Printf(" .. schema[%v]: %+v", j, fs) 73 } 74 } 75 } 76 77 q := bq.Query("SELECT MAX(EndTime) FROM builds.Builds") 78 it, err := q.Read(ctx) 79 if err != nil { 80 return fmt.Errorf("Read: %v", err) 81 } 82 var values []bigquery.Value 83 err = it.Next(&values) 84 if err == iterator.Done { 85 return fmt.Errorf("No result.") 86 } 87 if err != nil { 88 return fmt.Errorf("Next: %v", err) 89 } 90 var since time.Time 91 switch t := values[0].(type) { 92 case nil: 93 // NULL. No rows. 94 if Verbose { 95 log.Printf("buildstats: syncing Builds from the beginning") 96 } 97 case time.Time: 98 since = values[0].(time.Time) 99 default: 100 return fmt.Errorf("MAX(EndType) = %T: want nil or time.Time", t) 101 } 102 103 if Verbose { 104 log.Printf("Max is %v (%v)", since, since.Location()) 105 } 106 107 ds, err := datastore.NewClient(ctx, env.ProjectName) 108 if err != nil { 109 return fmt.Errorf("datastore.NewClient: %v", err) 110 } 111 defer ds.Close() 112 113 up := buildsTable.Uploader() 114 115 if Verbose { 116 log.Printf("buildstats: Builds max time: %v", since) 117 } 118 dsq := datastore.NewQuery("Build") 119 if !since.IsZero() { 120 dsq = dsq.Filter("EndTime >", since).Filter("EndTime <", since.Add(24*90*time.Hour)) 121 } else { 122 // Ignore rows without endtime. 123 dsq = dsq.Filter("EndTime >", time.Unix(1, 0)) 124 } 125 dsq = dsq.Order("EndTime") 126 dsit := ds.Run(ctx, dsq) 127 var maxPut time.Time 128 for { 129 n := 0 130 var rows []*bigquery.ValuesSaver 131 for { 132 var s types.BuildRecord 133 key, err := dsit.Next(&s) 134 if err == iterator.Done { 135 break 136 } 137 n++ 138 if err != nil { 139 return fmt.Errorf("error querying max EndTime: %v", err) 140 } 141 if s.EndTime.IsZero() { 142 return fmt.Errorf("got zero EndTime") 143 } 144 145 var row []bigquery.Value 146 var putSchema bigquery.Schema 147 rv := reflect.ValueOf(s) 148 for _, fs := range meta.Schema { 149 if fs.Name[0] == '_' { 150 continue 151 } 152 putSchema = append(putSchema, fs) 153 row = append(row, rv.FieldByName(fs.Name).Interface()) 154 maxPut = s.EndTime 155 } 156 157 rows = append(rows, &bigquery.ValuesSaver{ 158 Schema: putSchema, 159 InsertID: key.Encode(), 160 Row: row, 161 }) 162 if len(rows) == 1000 { 163 break 164 } 165 } 166 if n == 0 { 167 return nil 168 } 169 err = up.Put(ctx, rows) 170 log.Printf("buildstats: Build sync put %d rows, up to %v. error = %v", len(rows), maxPut, err) 171 if err != nil { 172 return err 173 } 174 } 175 } 176 177 // SyncSpans syncs the datastore "Span" entities to the BigQuery "Spans" table. 178 // These contain the fine-grained timing details of how a build ran. 179 func SyncSpans(ctx context.Context, env *buildenv.Environment) error { 180 bq, err := bigquery.NewClient(ctx, env.ProjectName) 181 if err != nil { 182 log.Fatal(err) 183 } 184 defer bq.Close() 185 186 table := bq.Dataset("builds").Table("Spans") 187 meta, err := table.Metadata(ctx) 188 if ae, ok := err.(*googleapi.Error); ok && ae.Code == 404 { 189 log.Printf("Creating table Spans...") 190 err = table.Create(ctx, nil) 191 if err == nil { 192 meta, err = table.Metadata(ctx) 193 } 194 } 195 if err != nil { 196 return fmt.Errorf("Metadata: %#v", err) 197 } 198 if Verbose { 199 log.Printf("buildstats: Spans metadata: %#v", meta) 200 } 201 schema := meta.Schema 202 if len(schema) == 0 { 203 if Verbose { 204 log.Printf("EMPTY SCHEMA") 205 } 206 schema, err = bigquery.InferSchema(types.SpanRecord{}) 207 if err != nil { 208 return fmt.Errorf("InferSchema: %v", err) 209 } 210 blindWrite := "" 211 meta, err := table.Update(ctx, bigquery.TableMetadataToUpdate{Schema: schema}, blindWrite) 212 if err != nil { 213 return fmt.Errorf("table.Update schema: %v", err) 214 } 215 schema = meta.Schema 216 } 217 if Verbose { 218 for i, fs := range schema { 219 log.Printf(" schema[%v]: %+v", i, fs) 220 for j, fs := range fs.Schema { 221 log.Printf(" .. schema[%v]: %+v", j, fs) 222 } 223 } 224 } 225 226 q := bq.Query("SELECT MAX(EndTime) FROM builds.Spans") 227 it, err := q.Read(ctx) 228 if err != nil { 229 return fmt.Errorf("Read: %v", err) 230 } 231 232 var since time.Time 233 var values []bigquery.Value 234 if err := it.Next(&values); err != nil { 235 if err == iterator.Done { 236 return fmt.Errorf("Expected at least one row fro MAX(EndTime) query; got none.") 237 } 238 return fmt.Errorf("Next: %v", err) 239 } 240 switch t := values[0].(type) { 241 case nil: 242 // NULL. No rows. 243 log.Printf("starting from the beginning...") 244 case time.Time: 245 since = values[0].(time.Time) 246 default: 247 return fmt.Errorf("MAX(EndType) = %T: want nil or time.Time", t) 248 } 249 if since.IsZero() { 250 since = time.Unix(1, 0) // arbitrary 251 } 252 253 ds, err := datastore.NewClient(ctx, env.ProjectName) 254 if err != nil { 255 return fmt.Errorf("datastore.NewClient: %v", err) 256 } 257 defer ds.Close() 258 259 up := table.Uploader() 260 261 if Verbose { 262 log.Printf("buildstats: Span max time: %v", since) 263 } 264 dsit := ds.Run(ctx, datastore.NewQuery("Span").Filter("EndTime >", since).Order("EndTime")) 265 var maxPut time.Time 266 for { 267 n := 0 268 var rows []*bigquery.ValuesSaver 269 for { 270 var s types.SpanRecord 271 key, err := dsit.Next(&s) 272 if err == iterator.Done { 273 break 274 } 275 n++ 276 if err != nil { 277 log.Fatal(err) 278 } 279 if s.EndTime.IsZero() { 280 return fmt.Errorf("got zero endtime") 281 } 282 283 var row []bigquery.Value 284 var putSchema bigquery.Schema 285 rv := reflect.ValueOf(s) 286 for _, fs := range meta.Schema { 287 if fs.Name[0] == '_' { 288 continue 289 } 290 putSchema = append(putSchema, fs) 291 row = append(row, rv.FieldByName(fs.Name).Interface()) 292 maxPut = s.EndTime 293 } 294 295 rows = append(rows, &bigquery.ValuesSaver{ 296 Schema: putSchema, 297 InsertID: key.Encode(), 298 Row: row, 299 }) 300 if len(rows) == 1000 { 301 break 302 } 303 } 304 if n == 0 { 305 return nil 306 } 307 err = up.Put(ctx, rows) 308 log.Printf("buildstats: Spans sync put %d rows, up to %v. error = %v", len(rows), maxPut, err) 309 if err != nil { 310 return err 311 } 312 } 313 } 314 315 // TestStats describes stats for a cmd/dist test on a particular build 316 // configuration (a "builder"). 317 type TestStats struct { 318 // AsOf is the time that the stats were queried from BigQuery. 319 AsOf time.Time 320 321 // BuilderTestStats maps from a builder name to that builder's 322 // test stats. 323 BuilderTestStats map[string]*BuilderTestStats 324 } 325 326 // Duration returns the median time to run testName on builder, if known. 327 // Otherwise it returns some non-zero default value. 328 func (ts *TestStats) Duration(builder, testName string) time.Duration { 329 if ts != nil { 330 if bs, ok := ts.BuilderTestStats[builder]; ok { 331 if d, ok := bs.MedianDuration[testName]; ok { 332 return d 333 } 334 } 335 } 336 return 3 * time.Second // some arbitrary value if unknown 337 } 338 339 func (ts *TestStats) Builders() []string { 340 s := make([]string, 0, len(ts.BuilderTestStats)) 341 for k := range ts.BuilderTestStats { 342 s = append(s, k) 343 } 344 sort.Strings(s) 345 return s 346 } 347 348 type BuilderTestStats struct { 349 // Builder is which build configuration this is for. 350 Builder string 351 352 // Runs is how many times tests have run recently, for some 353 // fuzzy definition of "recently". 354 // The map key is a cmd/dist test name. 355 Runs map[string]int 356 357 // MedianDuration is the median duration for a test to 358 // pass on this BuilderTestStat's Builder. 359 // The map key is a cmd/dist test name. 360 MedianDuration map[string]time.Duration 361 } 362 363 func (ts *BuilderTestStats) Tests() []string { 364 s := make([]string, 0, len(ts.Runs)) 365 for k := range ts.Runs { 366 s = append(s, k) 367 } 368 sort.Strings(s) 369 return s 370 } 371 372 // QueryTestStats returns stats on all tests for all builders. 373 func QueryTestStats(ctx context.Context, env *buildenv.Environment) (*TestStats, error) { 374 ts := &TestStats{ 375 AsOf: time.Now(), 376 BuilderTestStats: map[string]*BuilderTestStats{}, 377 } 378 bq, err := bigquery.NewClient(ctx, env.ProjectName) 379 if err != nil { 380 return nil, err 381 } 382 defer bq.Close() 383 ctx, cancel := context.WithCancel(ctx) 384 defer cancel() 385 q := bq.Query(` 386 SELECT 387 Builder, Event, APPROX_QUANTILES(Seconds, 100)[OFFSET(50)] as MedianSec, COUNT(*) as N 388 FROM 389 builds.Spans 390 WHERE 391 Error='' AND 392 StartTime > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 500 HOUR) 393 AND Repo = "go" 394 AND Event LIKE 'run_test:%' 395 GROUP BY 1, 2 396 `) 397 it, err := q.Read(ctx) 398 if err != nil { 399 return nil, err 400 } 401 n := 0 402 for { 403 var row struct { 404 Builder string 405 Event string 406 MedianSec float64 407 N int 408 } 409 err := it.Next(&row) 410 if err == iterator.Done { 411 break 412 } 413 if err != nil { 414 return nil, err 415 } 416 n++ 417 if n > 50000 { 418 break 419 } 420 bs := ts.BuilderTestStats[row.Builder] 421 if bs == nil { 422 bs = &BuilderTestStats{ 423 Builder: row.Builder, 424 Runs: map[string]int{}, 425 MedianDuration: map[string]time.Duration{}, 426 } 427 ts.BuilderTestStats[row.Builder] = bs 428 } 429 distTest := strings.TrimPrefix(row.Event, "run_test:") 430 bs.Runs[distTest] = row.N 431 bs.MedianDuration[distTest] = time.Duration(row.MedianSec * 1e9) 432 } 433 return ts, nil 434 }