github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/updater/read.go (about) 1 /* 2 Copyright 2020 The TestGrid Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package updater 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "path" 25 "sort" 26 "strings" 27 "sync" 28 "time" 29 30 "cloud.google.com/go/storage" 31 configpb "github.com/GoogleCloudPlatform/testgrid/pb/config" 32 evalpb "github.com/GoogleCloudPlatform/testgrid/pb/custom_evaluator" 33 statepb "github.com/GoogleCloudPlatform/testgrid/pb/state" 34 statuspb "github.com/GoogleCloudPlatform/testgrid/pb/test_status" 35 "github.com/GoogleCloudPlatform/testgrid/util/gcs" 36 "github.com/fvbommel/sortorder" 37 "github.com/sirupsen/logrus" 38 ) 39 40 // hintStarted returns the maximum hint 41 func hintStarted(cols []InflatedColumn) string { 42 var hint string 43 for i, col := range cols { 44 if newHint := col.Column.Hint; i == 0 || sortorder.NaturalLess(hint, newHint) { 45 hint = newHint 46 } 47 } 48 return hint 49 } 50 51 func gcsColumnReader(client gcs.Client, buildTimeout time.Duration, readResult *resultReader, enableIgnoreSkip bool) ColumnReader { 52 return func(ctx context.Context, parentLog logrus.FieldLogger, tg *configpb.TestGroup, oldCols []InflatedColumn, stop time.Time, receivers chan<- InflatedColumn) error { 53 tgPaths, err := groupPaths(tg) 54 if err != nil { 55 return fmt.Errorf("group path: %w", err) 56 } 57 58 since := hintStarted(oldCols) 59 log := parentLog.WithField("since", since) 60 61 log.Trace("Listing builds...") 62 listBuildsStart := time.Now() 63 builds, err := listBuilds(ctx, client, since, tgPaths...) 64 if errors.Is(err, storage.ErrBucketNotExist) { 65 log.WithError(err).Info("Bucket does not exist") 66 return nil 67 } 68 if err != nil { 69 return fmt.Errorf("list builds: %w", err) 70 } 71 log.WithField("listBuilds", time.Since(listBuildsStart)).WithField("total", len(builds)).Debug("Listed builds") 72 73 readColumns(ctx, client, log, tg, builds, stop, buildTimeout, receivers, readResult, enableIgnoreSkip) 74 return nil 75 } 76 } 77 78 func resultReaderPool(poolCtx context.Context, log *logrus.Entry, concurrency int) *resultReader { 79 80 type request struct { 81 ctx context.Context 82 client gcs.Downloader 83 build gcs.Build 84 stop time.Time 85 res *gcsResult 86 err error 87 wg sync.WaitGroup 88 } 89 90 ch := make(chan *request, concurrency) 91 92 var wg sync.WaitGroup 93 wg.Add(concurrency) 94 log = log.WithField("concurrency", concurrency) 95 log.Info("Starting up result reader pool") 96 97 for i := 0; i < concurrency; i++ { 98 go func() { 99 defer wg.Done() 100 for req := range ch { 101 req.res, req.err = readResult(req.ctx, req.client, req.build, req.stop) 102 req.wg.Done() 103 } 104 }() 105 } 106 107 go func() { 108 <-poolCtx.Done() 109 log.Info("Shutting down result reader pool") 110 close(ch) 111 wg.Wait() 112 log.Info("Result reader pool stopped") 113 }() 114 115 readResultViaPool := func(ctx context.Context, client gcs.Downloader, build gcs.Build, stop time.Time) func() (*gcsResult, error) { 116 117 req := &request{ 118 ctx: ctx, 119 client: client, 120 build: build, 121 stop: stop, 122 } 123 req.wg.Add(1) 124 select { 125 case <-ctx.Done(): 126 return func() (*gcsResult, error) { return nil, ctx.Err() } 127 case ch <- req: // wait for request to get onto the queue 128 return func() (*gcsResult, error) { 129 req.wg.Wait() 130 return req.res, req.err 131 } 132 } 133 } 134 135 return &resultReader{ 136 lock: &sync.Mutex{}, 137 read: readResultViaPool, 138 } 139 } 140 141 type resultReader struct { 142 lock sync.Locker 143 read func(context.Context, gcs.Downloader, gcs.Build, time.Time) func() (*gcsResult, error) 144 } 145 146 // readColumns will list, download and process builds into inflatedColumns. 147 func readColumns(ctx context.Context, client gcs.Downloader, log logrus.FieldLogger, group *configpb.TestGroup, builds []gcs.Build, stop time.Time, buildTimeout time.Duration, receivers chan<- InflatedColumn, readResult *resultReader, enableIgnoreSkip bool) { 148 if len(builds) == 0 { 149 return 150 } 151 152 ctx, cancel := context.WithCancel(ctx) 153 defer cancel() 154 155 nameCfg := makeNameConfig(group) 156 var heads []string 157 for _, h := range group.ColumnHeader { 158 heads = append(heads, h.ConfigurationValue) 159 } 160 161 type resp struct { 162 build gcs.Build 163 res func() (*gcsResult, error) 164 } 165 166 ch := make(chan resp) 167 var wg sync.WaitGroup 168 169 wg.Add(1) 170 go func() { 171 defer wg.Done() 172 // TODO(fejta): restore inter-build concurrency 173 var failures int // since last good column 174 var extra []string 175 var started float64 176 for resp := range ch { 177 b := resp.build 178 log := log.WithField("build", b) 179 result, err := resp.res() 180 id := path.Base(b.Path.Object()) 181 var col InflatedColumn 182 if err != nil { 183 failures++ 184 log.WithError(err).Trace("Failed to read build") 185 if extra == nil { 186 extra = make([]string, len(heads)) 187 } 188 when := started + 0.01*float64(failures) 189 var ancientErr *ancientError 190 var noStartErr *noStartError 191 if errors.As(err, &ancientErr) { 192 col = ancientColumn(id, when, extra, ancientErr.Error()) 193 } else if errors.As(err, &noStartErr) { 194 col = noStartColumn(id, when, extra, noStartErr.Error()) 195 } else { 196 msg := fmt.Sprintf("Failed to download %s: %s", b, err.Error()) 197 col = erroredColumn(id, when, extra, msg) 198 } 199 } else { 200 opts := makeOptions(group) 201 if !enableIgnoreSkip { 202 opts.ignoreSkip = false 203 } 204 col = convertResult(log, nameCfg, id, heads, *result, opts) 205 log.WithField("rows", len(col.Cells)).Debug("Read result") 206 failures = 0 207 extra = col.Column.Extra 208 started = col.Column.Started 209 } 210 211 select { 212 case <-ctx.Done(): 213 return 214 case receivers <- col: 215 } 216 } 217 }() 218 defer wg.Wait() 219 220 defer close(ch) 221 if len(builds) > 2 { 222 readResult.lock.Lock() 223 defer readResult.lock.Unlock() 224 } 225 for i := len(builds) - 1; i >= 0; i-- { 226 b := builds[i] 227 r := resp{ 228 build: b, 229 res: readResult.read(ctx, client, b, stop), 230 } 231 select { 232 case <-ctx.Done(): 233 return 234 case ch <- r: 235 } 236 } 237 } 238 239 func ancientColumn(id string, when float64, extra []string, msg string) InflatedColumn { 240 return InflatedColumn{ 241 Column: &statepb.Column{ 242 Build: id, 243 Hint: id, 244 Started: when, 245 Extra: extra, 246 }, 247 Cells: map[string]Cell{ 248 overallRow: { 249 Message: msg, 250 Result: statuspb.TestStatus_UNKNOWN, 251 }, 252 }, 253 } 254 } 255 256 func noStartColumn(id string, when float64, extra []string, msg string) InflatedColumn { 257 return InflatedColumn{ 258 Column: &statepb.Column{ 259 Build: id, 260 Hint: id, 261 Started: when, 262 Extra: extra, 263 }, 264 Cells: map[string]Cell{ 265 overallRow: { 266 Message: msg, 267 Result: statuspb.TestStatus_RUNNING, 268 }, 269 }, 270 } 271 } 272 273 func erroredColumn(id string, when float64, extra []string, msg string) InflatedColumn { 274 return InflatedColumn{ 275 Column: &statepb.Column{ 276 Build: id, 277 Hint: id, 278 Started: when, 279 Extra: extra, 280 }, 281 Cells: map[string]Cell{ 282 overallRow: { 283 Message: msg, 284 Result: statuspb.TestStatus_TOOL_FAIL, 285 }, 286 }, 287 } 288 } 289 290 type groupOptions struct { 291 merge bool 292 analyzeProwJob bool 293 addCellID bool 294 metricKey string 295 buildKey string 296 userKey string 297 annotations []*configpb.TestGroup_TestAnnotation 298 rules []*evalpb.Rule 299 ignoreSkip bool 300 } 301 302 func makeOptions(group *configpb.TestGroup) groupOptions { 303 return groupOptions{ 304 merge: !group.DisableMergedStatus, 305 analyzeProwJob: !group.DisableProwjobAnalysis, 306 addCellID: group.BuildOverrideStrftime != "", 307 metricKey: group.ShortTextMetric, 308 buildKey: group.BuildOverrideConfigurationValue, 309 userKey: group.UserProperty, 310 annotations: group.TestAnnotations, 311 rules: group.GetCustomEvaluatorRuleSet().GetRules(), 312 ignoreSkip: group.GetIgnoreSkip(), 313 } 314 } 315 316 const ( 317 testsName = "Tests name" 318 jobName = "Job name" 319 ) 320 321 type nameConfig struct { 322 format string 323 parts []string 324 multiJob bool 325 } 326 327 // render the metadata into the expect test name format. 328 // 329 // Argument order determines precedence. 330 func (nc nameConfig) render(job, test string, metadatas ...map[string]string) string { 331 parsed := make([]interface{}, len(nc.parts)) 332 for i, p := range nc.parts { 333 var s string 334 switch p { 335 case jobName: 336 s = job 337 case testsName: 338 s = test 339 default: 340 for _, metadata := range metadatas { 341 v, present := metadata[p] 342 if present { 343 s = v 344 break 345 } 346 } 347 } 348 parsed[i] = s 349 } 350 return fmt.Sprintf(nc.format, parsed...) 351 } 352 353 func makeNameConfig(group *configpb.TestGroup) nameConfig { 354 nameCfg := convertNameConfig(group.TestNameConfig) 355 if strings.Contains(gcsPrefix(group), ",") { 356 nameCfg.multiJob = true 357 ensureJobName(&nameCfg) 358 } 359 return nameCfg 360 } 361 362 func firstFilled(strs ...string) string { 363 for _, s := range strs { 364 if s != "" { 365 return s 366 } 367 } 368 return "" 369 } 370 371 func convertNameConfig(tnc *configpb.TestNameConfig) nameConfig { 372 if tnc == nil { 373 return nameConfig{ 374 format: "%s", 375 parts: []string{testsName}, 376 } 377 } 378 nc := nameConfig{ 379 format: tnc.NameFormat, 380 parts: make([]string, len(tnc.NameElements)), 381 } 382 for i, e := range tnc.NameElements { 383 // TODO(fejta): build_target = true 384 // TODO(fejta): tags = 'SOMETHING' 385 nc.parts[i] = firstFilled(e.TargetConfig, e.TestProperty) 386 } 387 return nc 388 } 389 390 func ensureJobName(nc *nameConfig) { 391 for _, p := range nc.parts { 392 if p == jobName { 393 return 394 } 395 } 396 nc.format = "%s." + nc.format 397 nc.parts = append([]string{jobName}, nc.parts...) 398 } 399 400 type ancientError struct { 401 msg string 402 } 403 404 func (e *ancientError) Error() string { 405 return e.msg 406 } 407 408 type noStartError struct{} 409 410 func (e *noStartError) Error() string { 411 return "Start timestamp for this job is 0." 412 } 413 414 // readResult will download all GCS artifacts in parallel. 415 // 416 // Specifically download the following files: 417 // * started.json 418 // * finished.json 419 // * any junit.xml files under the artifacts directory. 420 func readResult(parent context.Context, client gcs.Downloader, build gcs.Build, stop time.Time) (*gcsResult, error) { 421 ctx, cancel := context.WithCancel(parent) // Allows aborting after first error 422 defer cancel() 423 result := gcsResult{ 424 job: build.Job(), 425 build: build.Build(), 426 } 427 ec := make(chan error) // Receives errors from anyone 428 429 var lock sync.Mutex 430 addMalformed := func(s ...string) { 431 lock.Lock() 432 defer lock.Unlock() 433 result.malformed = append(result.malformed, s...) 434 } 435 436 var work int 437 438 // Download podinfo.json 439 work++ 440 go func() { 441 pi, err := build.PodInfo(ctx, client) 442 switch { 443 case errors.Is(err, io.EOF): 444 addMalformed("podinfo.json") 445 err = nil 446 case err != nil: 447 err = fmt.Errorf("podinfo: %w", err) 448 case pi != nil: 449 result.podInfo = *pi 450 } 451 select { 452 case <-ctx.Done(): 453 case ec <- err: 454 } 455 }() 456 457 // Download started.json 458 work++ 459 go func() { 460 s, err := build.Started(ctx, client) 461 switch { 462 case errors.Is(err, io.EOF): 463 addMalformed("started.json") 464 err = nil 465 case err != nil: 466 err = fmt.Errorf("started: %w", err) 467 case time.Unix(s.Timestamp, 0).Before(stop): 468 err = &ancientError{fmt.Sprintf("build too old; started %v before %v)", s.Timestamp, stop.Unix())} 469 if s.Timestamp == 0 { 470 err = &noStartError{} 471 } 472 default: 473 result.started = *s 474 } 475 select { 476 case <-ctx.Done(): 477 case ec <- err: 478 } 479 }() 480 481 // Download finished.json 482 work++ 483 go func() { 484 f, err := build.Finished(ctx, client) 485 switch { 486 case errors.Is(err, io.EOF): 487 addMalformed("finished.json") 488 err = nil 489 case err != nil: 490 err = fmt.Errorf("finished: %w", err) 491 default: 492 result.finished = *f 493 } 494 select { 495 case <-ctx.Done(): 496 case ec <- err: 497 } 498 }() 499 500 // Download suites 501 work++ 502 go func() { 503 suites, err := readSuites(ctx, client, build) 504 if err != nil { 505 err = fmt.Errorf("suites: %w", err) 506 } 507 var problems []string 508 for _, s := range suites { 509 if s.Err != nil { 510 p := strings.TrimPrefix(s.Path, build.Path.String()) 511 problems = append(problems, fmt.Sprintf("%s: %s", p, s.Err)) 512 } else { 513 result.suites = append(result.suites, s) 514 } 515 } 516 if len(problems) > 0 { 517 addMalformed(problems...) 518 } 519 520 select { 521 case <-ctx.Done(): 522 case ec <- err: 523 } 524 }() 525 526 for ; work > 0; work-- { 527 select { 528 case <-ctx.Done(): 529 return nil, fmt.Errorf("timeout: %w", ctx.Err()) 530 case err := <-ec: 531 if err != nil { 532 return nil, err 533 } 534 } 535 } 536 sort.Slice(result.malformed, func(i, j int) bool { 537 return result.malformed[i] < result.malformed[j] 538 }) 539 return &result, nil 540 } 541 542 // readSuites asynchronously lists and downloads junit.xml files 543 func readSuites(parent context.Context, client gcs.Downloader, build gcs.Build) ([]gcs.SuitesMeta, error) { 544 ctx, cancel := context.WithCancel(parent) 545 defer cancel() 546 ec := make(chan error) 547 548 // List 549 artifacts := make(chan string, 1) 550 go func() { 551 defer close(artifacts) // No more artifacts 552 if err := build.Artifacts(ctx, client, artifacts); err != nil { 553 select { 554 case <-ctx.Done(): 555 case ec <- fmt.Errorf("list: %w", err): 556 } 557 } 558 }() 559 560 // Download 561 suitesChan := make(chan gcs.SuitesMeta, 1) 562 go func() { 563 defer close(suitesChan) // No more rows 564 const max = 1000 565 if err := build.Suites(ctx, client, artifacts, suitesChan, max); err != nil { 566 select { 567 case <-ctx.Done(): 568 case ec <- fmt.Errorf("download: %w", err): 569 } 570 } 571 }() 572 573 // Append 574 var suites []gcs.SuitesMeta 575 go func() { 576 for suite := range suitesChan { 577 suites = append(suites, suite) 578 } 579 select { 580 case <-ctx.Done(): 581 case ec <- nil: 582 } 583 }() 584 585 select { 586 case <-ctx.Done(): 587 return nil, ctx.Err() 588 case err := <-ec: 589 if err != nil { 590 return nil, err 591 } 592 } 593 return suites, nil 594 }