github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/updater/updater.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package updater reads the latest test results and saves updated state. 18 package updater 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "math" 25 "math/rand" 26 "net/url" 27 "path" 28 "sort" 29 "strings" 30 "sync" 31 "time" 32 "unicode/utf8" 33 34 "cloud.google.com/go/storage" 35 "github.com/GoogleCloudPlatform/testgrid/config" 36 "github.com/GoogleCloudPlatform/testgrid/config/snapshot" 37 "github.com/GoogleCloudPlatform/testgrid/internal/result" 38 configpb "github.com/GoogleCloudPlatform/testgrid/pb/config" 39 statepb "github.com/GoogleCloudPlatform/testgrid/pb/state" 40 statuspb "github.com/GoogleCloudPlatform/testgrid/pb/test_status" 41 "github.com/GoogleCloudPlatform/testgrid/util/gcs" 42 "github.com/GoogleCloudPlatform/testgrid/util/metrics" 43 "github.com/fvbommel/sortorder" 44 "github.com/golang/protobuf/ptypes/timestamp" 45 "github.com/sirupsen/logrus" 46 ) 47 48 const componentName = "updater" 49 50 // Metrics holds metrics relevant to the Updater. 51 type Metrics struct { 52 UpdateState metrics.Cyclic 53 DelaySeconds metrics.Duration 54 } 55 56 // CreateMetrics creates metrics for this controller 57 func CreateMetrics(factory metrics.Factory) *Metrics { 58 return &Metrics{ 59 UpdateState: factory.NewCyclic(componentName), 60 DelaySeconds: factory.NewDuration("delay", "Seconds updater is behind schedule", "component"), 61 } 62 } 63 64 func (mets *Metrics) delay(dur time.Duration) { 65 if mets == nil { 66 return 67 } 68 mets.DelaySeconds.Set(dur, componentName) 69 } 70 71 func (mets *Metrics) start() *metrics.CycleReporter { 72 if mets == nil { 73 return nil 74 } 75 return mets.UpdateState.Start() 76 } 77 78 // GroupUpdater will compile the grid state proto for the specified group and upload it. 79 // 80 // This typically involves downloading the existing state, dropping old columns, 81 // compiling any new columns and inserting them into the front and then uploading 82 // the proto to GCS. 83 // 84 // Disable pooled downloads with a nil poolCtx, otherwise at most concurrency builds 85 // will be downloaded at the same time. 86 // 87 // Return true if there are more results to process. 88 type GroupUpdater func(parent context.Context, log logrus.FieldLogger, client gcs.Client, tg *configpb.TestGroup, gridPath gcs.Path) (bool, error) 89 90 // GCS returns a GCS-based GroupUpdater, which knows how to process result data stored in GCS. 91 func GCS(poolCtx context.Context, colClient gcs.Client, groupTimeout, buildTimeout time.Duration, concurrency int, write bool, enableIgnoreSkip bool) GroupUpdater { 92 var readResult *resultReader 93 if poolCtx == nil { 94 // TODO(fejta): remove check soon 95 panic("Context must be non-nil") 96 } 97 readResult = resultReaderPool(poolCtx, logrus.WithField("pool", "readResult"), concurrency) 98 99 return func(parent context.Context, log logrus.FieldLogger, client gcs.Client, tg *configpb.TestGroup, gridPath gcs.Path) (bool, error) { 100 if !tg.UseKubernetesClient && (tg.ResultSource == nil || tg.ResultSource.GetGcsConfig() == nil) { 101 log.Debug("Skipping non-kubernetes client group") 102 return false, nil 103 } 104 ctx, cancel := context.WithTimeout(parent, groupTimeout) 105 defer cancel() 106 gcsColReader := gcsColumnReader(colClient, buildTimeout, readResult, enableIgnoreSkip) 107 reprocess := 20 * time.Minute // allow 20m for prow to finish uploading artifacts 108 return InflateDropAppend(ctx, log, client, tg, gridPath, write, gcsColReader, reprocess) 109 } 110 } 111 112 func gridPaths(configPath gcs.Path, gridPrefix string, groups []*configpb.TestGroup) ([]gcs.Path, error) { 113 paths := make([]gcs.Path, 0, len(groups)) 114 for _, tg := range groups { 115 tgp, err := TestGroupPath(configPath, gridPrefix, tg.Name) 116 if err != nil { 117 return nil, fmt.Errorf("%s bad group path: %w", tg.Name, err) 118 } 119 paths = append(paths, *tgp) 120 } 121 return paths, nil 122 } 123 124 // lockGroup makes a conditional GCS write operation to ensure it has authority to update this object. 125 // 126 // This allows multiple decentralized updaters to collaborate on updating groups: 127 // Regardless of how many updaters are trying to concurrently update an object foo at generation X, GCS 128 // will only allow one of them to "win". The others receive a PreconditionFailed error and can 129 // move onto the next group. 130 func lockGroup(ctx context.Context, client gcs.ConditionalClient, path gcs.Path, generation int64) (*storage.ObjectAttrs, error) { 131 var buf []byte 132 if generation == 0 { 133 var grid statepb.Grid 134 var err error 135 if buf, err = gcs.MarshalGrid(&grid); err != nil { 136 return nil, fmt.Errorf("marshal: %w", err) 137 } 138 } 139 140 return gcs.Touch(ctx, client, path, generation, buf) 141 } 142 143 func testGroups(cfg *snapshot.Config, groupNames ...string) ([]*configpb.TestGroup, error) { 144 var groups []*configpb.TestGroup 145 146 if len(groupNames) == 0 { 147 groups = make([]*configpb.TestGroup, 0, len(groupNames)) 148 for _, testConfig := range cfg.Groups { 149 groups = append(groups, testConfig) 150 } 151 return groups, nil 152 } 153 154 groups = make([]*configpb.TestGroup, 0, len(groupNames)) 155 for _, groupName := range groupNames { 156 tg := cfg.Groups[groupName] 157 if tg == nil { 158 return nil, fmt.Errorf("group %q not found", groupName) 159 } 160 groups = append(groups, tg) 161 } 162 return groups, nil 163 } 164 165 type lastUpdated struct { 166 client gcs.ConditionalClient 167 gridPrefix string 168 configPath gcs.Path 169 freq time.Duration 170 } 171 172 func (fixer lastUpdated) fixOnce(ctx context.Context, log logrus.FieldLogger, q *config.TestGroupQueue, groups []*configpb.TestGroup) error { 173 ctx, cancel := context.WithCancel(ctx) 174 defer cancel() 175 paths, err := gridPaths(fixer.configPath, fixer.gridPrefix, groups) 176 if err != nil { 177 return err 178 } 179 attrs := gcs.StatExisting(ctx, log, fixer.client, paths...) 180 updates := make(map[string]time.Time, len(attrs)) 181 var wg sync.WaitGroup 182 for i, attr := range attrs { 183 if attr == nil { 184 continue 185 } 186 name := groups[i].Name 187 if attr.Generation > 0 { 188 updates[name] = attr.Updated.Add(fixer.freq) 189 } else if attr.Generation == 0 { 190 wg.Add(1) 191 go func(i int) { 192 defer wg.Done() 193 if _, err := lockGroup(ctx, fixer.client, paths[i], 0); err != nil && !gcs.IsPreconditionFailed(err) { 194 log.WithError(err).Error("Failed to create empty group state") 195 } 196 }(i) 197 updates[name] = time.Now() 198 } 199 } 200 wg.Wait() 201 q.Init(log, groups, time.Now().Add(fixer.freq)) 202 q.FixAll(updates, false) 203 return nil 204 } 205 206 func (fixer lastUpdated) Fix(ctx context.Context, log logrus.FieldLogger, q *config.TestGroupQueue, groups []*configpb.TestGroup) error { 207 if fixer.freq == 0 { 208 return nil 209 } 210 ticker := time.NewTicker(fixer.freq) 211 fix := func() { 212 if err := fixer.fixOnce(ctx, log, q, groups); err != nil { 213 log.WithError(err).Warning("Failed to fix groups based on last update time") 214 } 215 } 216 fix() 217 218 for { 219 select { 220 case <-ctx.Done(): 221 ticker.Stop() 222 return ctx.Err() 223 case <-ticker.C: 224 fix() 225 } 226 } 227 } 228 229 // Fixer will fix the TestGroupQueue's next time for TestGroups. 230 // 231 // Fixer should: 232 // * work continually and not return until the context expires. 233 // * expect to be called multiple times with different contexts and test groups. 234 // 235 // For example, it might use the last updated time of the test group to 236 // specify the next update time. Or it might watch the data backing these groups and 237 // request an immediate update whenever the data changes. 238 type Fixer func(context.Context, logrus.FieldLogger, *config.TestGroupQueue, []*configpb.TestGroup) error 239 240 // UpdateOptions aggregates the Update function parameter into a single structure. 241 type UpdateOptions struct { 242 ConfigPath gcs.Path 243 GridPrefix string 244 GroupConcurrency int 245 GroupNames []string 246 Write bool 247 Freq time.Duration 248 } 249 250 // Update test groups with the specified freq. 251 // 252 // Retries errors at double and unfinished groups as soon as possible. 253 // 254 // Filters down to a single group when set. 255 // Returns after all groups updated once if freq is zero. 256 func Update(parent context.Context, client gcs.ConditionalClient, mets *Metrics, updateGroup GroupUpdater, opts *UpdateOptions, fixers ...Fixer) error { 257 ctx, cancel := context.WithCancel(parent) 258 defer cancel() 259 log := logrus.WithField("config", opts.ConfigPath) 260 261 var q config.TestGroupQueue 262 263 log.Debug("Observing config...") 264 newConfig, err := snapshot.Observe(ctx, log, client, opts.ConfigPath, time.NewTicker(time.Minute).C) 265 if err != nil { 266 return fmt.Errorf("observe config: %w", err) 267 268 } 269 cfg := <-newConfig 270 groups, err := testGroups(cfg, opts.GroupNames...) 271 if err != nil { 272 return fmt.Errorf("filter test groups: %w", err) 273 } 274 275 q.Init(log, groups, time.Now().Add(opts.Freq)) 276 277 log.Debug("Fetching initial start times...") 278 fixLastUpdated := lastUpdated{ 279 client: client, 280 gridPrefix: opts.GridPrefix, 281 configPath: opts.ConfigPath, 282 freq: opts.Freq, 283 } 284 if err := fixLastUpdated.fixOnce(ctx, log, &q, groups); err != nil { 285 return fmt.Errorf("get generations: %v", err) 286 } 287 log.Info("Fetched initial start times") 288 289 fixers = append(fixers, fixLastUpdated.Fix) 290 291 go func() { 292 ticker := time.NewTicker(time.Minute) 293 log := log 294 for { 295 depth, next, when := q.Status() 296 log := log.WithField("depth", depth) 297 if next != nil { 298 log = log.WithField("next", next.Name) 299 } 300 delay := time.Since(when) 301 if delay < 0 { 302 delay = 0 303 log = log.WithField("sleep", -delay) 304 } 305 if max := opts.Freq * 2; max > 0 && delay > max { 306 delay = max 307 } 308 log = log.WithField("delay", delay.Round(time.Second)) 309 mets.delay(delay) 310 select { 311 case <-ctx.Done(): 312 return 313 case <-ticker.C: 314 log.Info("Queue Status") 315 } 316 } 317 }() 318 319 go func() { 320 fixCtx, fixCancel := context.WithCancel(ctx) 321 var fixWg sync.WaitGroup 322 fixAll := func() { 323 n := len(fixers) 324 log.WithField("fixers", n).Trace("Starting fixers on current test groups...") 325 fixWg.Add(n) 326 for i, fix := range fixers { 327 go func(i int, fix Fixer) { 328 defer fixWg.Done() 329 if err := fix(fixCtx, log, &q, groups); err != nil && !errors.Is(err, context.Canceled) { 330 log.WithError(err).WithField("fixer", i).Warning("Fixer failed") 331 } 332 }(i, fix) 333 } 334 log.Debug("Started fixers on current test groups") 335 } 336 fixAll() 337 for { 338 select { 339 case <-ctx.Done(): 340 fixCancel() 341 return 342 case cfg, ok := <-newConfig: 343 if !ok { 344 fixCancel() 345 return 346 } 347 log.Info("Updating config") 348 groups, err = testGroups(cfg, opts.GroupNames...) 349 if err != nil { 350 log.Errorf("Error during config update: %v", err) 351 } 352 log.Debug("Cancelling fixers on old test groups...") 353 fixCancel() 354 fixWg.Wait() 355 q.Init(log, groups, time.Now().Add(opts.Freq)) 356 log.Debug("Canceled fixers on old test groups") 357 fixCtx, fixCancel = context.WithCancel(ctx) 358 fixAll() 359 } 360 } 361 }() 362 363 active := map[string]bool{} 364 var lock sync.RWMutex 365 var wg sync.WaitGroup 366 wg.Add(opts.GroupConcurrency) 367 defer wg.Wait() 368 channel := make(chan *configpb.TestGroup) 369 defer close(channel) 370 371 updateTestGroup := func(tg *configpb.TestGroup) { 372 name := tg.Name 373 log := log.WithField("group", name) 374 lock.RLock() 375 on := active[name] 376 lock.RUnlock() 377 if on { 378 log.Debug("Already updating...") 379 return 380 } 381 fin := mets.start() 382 tgp, err := TestGroupPath(opts.ConfigPath, opts.GridPrefix, name) 383 if err != nil { 384 fin.Fail() 385 log.WithError(err).Error("Bad path") 386 return 387 } 388 lock.Lock() 389 if active[name] { 390 lock.Unlock() 391 log.Debug("Another routine started updating...") 392 return 393 } 394 active[name] = true 395 lock.Unlock() 396 defer func() { 397 lock.Lock() 398 active[name] = false 399 lock.Unlock() 400 }() 401 start := time.Now() 402 unprocessed, err := updateGroup(ctx, log, client, tg, *tgp) 403 log.WithField("duration", time.Since(start)).Info("Finished processing group.") 404 if err != nil { 405 log := log.WithError(err) 406 if gcs.IsPreconditionFailed(err) { 407 fin.Skip() 408 log.Info("Group was modified while updating") 409 } else { 410 fin.Fail() 411 log.Error("Failed to update group") 412 } 413 var delay time.Duration 414 if opts.Freq > 0 { 415 delay = opts.Freq/4 + time.Duration(rand.Int63n(int64(opts.Freq/4))) // Int63n() panics if freq <= 0 416 log = log.WithField("delay", delay.Seconds()) 417 q.Fix(tg.Name, time.Now().Add(delay), true) 418 } 419 return 420 } 421 fin.Success() 422 if unprocessed { // process another chunk ASAP 423 q.Fix(name, time.Now(), false) 424 } 425 } 426 427 for i := 0; i < opts.GroupConcurrency; i++ { 428 go func() { 429 defer wg.Done() 430 for tg := range channel { 431 updateTestGroup(tg) 432 } 433 }() 434 } 435 436 log.Info("Starting to process test groups...") 437 return q.Send(ctx, channel, opts.Freq) 438 } 439 440 // TestGroupPath returns the path to a test_group proto given this proto 441 func TestGroupPath(g gcs.Path, gridPrefix, groupName string) (*gcs.Path, error) { 442 name := path.Join(gridPrefix, groupName) 443 u, err := url.Parse(name) 444 if err != nil { 445 return nil, fmt.Errorf("invalid url %s: %w", name, err) 446 } 447 np, err := g.ResolveReference(u) 448 if err != nil { 449 return nil, fmt.Errorf("resolve reference: %w", err) 450 } 451 if np.Bucket() != g.Bucket() { 452 return nil, fmt.Errorf("testGroup %s should not change bucket", name) 453 } 454 return np, nil 455 } 456 457 func gcsPrefix(tg *configpb.TestGroup) string { 458 if tg.ResultSource == nil { 459 return tg.GcsPrefix 460 } 461 if gcsCfg := tg.ResultSource.GetGcsConfig(); gcsCfg != nil { 462 return gcsCfg.GcsPrefix 463 } 464 return tg.GcsPrefix 465 } 466 467 func groupPaths(tg *configpb.TestGroup) ([]gcs.Path, error) { 468 var out []gcs.Path 469 prefixes := strings.Split(gcsPrefix(tg), ",") 470 for idx, prefix := range prefixes { 471 prefix := strings.TrimSpace(prefix) 472 if prefix == "" { 473 continue 474 } 475 u, err := url.Parse("gs://" + prefix) 476 if err != nil { 477 return nil, fmt.Errorf("parse: %w", err) 478 } 479 if u.Path != "" && u.Path[len(u.Path)-1] != '/' { 480 u.Path += "/" 481 } 482 483 var p gcs.Path 484 if err := p.SetURL(u); err != nil { 485 if idx > 0 { 486 return nil, fmt.Errorf("%d: %s: %w", idx, prefix, err) 487 } 488 return nil, err 489 } 490 out = append(out, p) 491 } 492 return out, nil 493 } 494 495 // truncateRunning filters out all columns until the oldest still running column. 496 // 497 // If there are 20 columns where all are complete except the 3rd and 7th, this will 498 // return the 8th and later columns. 499 // 500 // Running columns more than 3 days old are not considered. 501 func truncateRunning(cols []InflatedColumn, floorTime time.Time) []InflatedColumn { 502 if len(cols) == 0 { 503 return cols 504 } 505 506 floor := float64(floorTime.UTC().Unix() * 1000) 507 508 for i := len(cols) - 1; i >= 0; i-- { 509 if cols[i].Column.Started < floor { 510 continue 511 } 512 for _, cell := range cols[i].Cells { 513 if cell.Result == statuspb.TestStatus_RUNNING { 514 return cols[i+1:] 515 } 516 } 517 } 518 // No cells are found to be running; do not truncate 519 return cols 520 } 521 522 func listBuilds(ctx context.Context, client gcs.Lister, since string, paths ...gcs.Path) ([]gcs.Build, error) { 523 var out []gcs.Build 524 525 for idx, tgPath := range paths { 526 var offset *gcs.Path 527 var err error 528 if since != "" { 529 if !strings.HasSuffix(since, "/") { 530 since = since + "/" 531 } 532 if offset, err = tgPath.ResolveReference(&url.URL{Path: since}); err != nil { 533 return nil, fmt.Errorf("resolve since: %w", err) 534 } 535 } 536 builds, err := gcs.ListBuilds(ctx, client, tgPath, offset) 537 if err != nil { 538 return nil, fmt.Errorf("%d: %s: %w", idx, tgPath, err) 539 } 540 out = append(out, builds...) 541 } 542 543 if len(paths) > 1 { 544 gcs.Sort(out) 545 } 546 547 return out, nil 548 } 549 550 // ColumnReader finds, processes and new columns to send to the receivers. 551 // 552 // * Columns with the same Name and Build will get merged together. 553 // * Readers must be reentrant. 554 // - Processing must expect every sent column to be the final column this cycle. 555 // AKA calling this method once and reading two columns should be equivalent to 556 // calling the method once, reading one column and then calling it a second time 557 // and reading a second column. 558 type ColumnReader func(ctx context.Context, log logrus.FieldLogger, tg *configpb.TestGroup, oldCols []InflatedColumn, stop time.Time, receivers chan<- InflatedColumn) error 559 560 // SortStarted sorts InflatedColumns by column start time. 561 func SortStarted(cols []InflatedColumn) { 562 sort.SliceStable(cols, func(i, j int) bool { 563 return cols[i].Column.Started > cols[j].Column.Started 564 }) 565 } 566 567 const byteCeiling = 2e6 // 2 megabytes 568 569 // InflateDropAppend updates groups by downloading the existing grid, dropping old rows and appending new ones. 570 func InflateDropAppend(ctx context.Context, alog logrus.FieldLogger, client gcs.Client, tg *configpb.TestGroup, gridPath gcs.Path, write bool, readCols ColumnReader, reprocess time.Duration) (bool, error) { 571 log := alog.(logrus.Ext1FieldLogger) // Add trace method 572 ctx, cancel := context.WithCancel(ctx) 573 defer cancel() 574 575 // Grace period to read additional column. 576 var grace context.Context 577 if deadline, present := ctx.Deadline(); present { 578 var cancel context.CancelFunc 579 dur := time.Until(deadline) / 2 580 grace, cancel = context.WithTimeout(context.Background(), dur) 581 defer cancel() 582 } else { 583 grace = context.Background() 584 } 585 586 var shrinkGrace context.Context 587 if deadline, present := ctx.Deadline(); present { 588 var cancel context.CancelFunc 589 dur := 3 * time.Until(deadline) / 4 590 shrinkGrace, cancel = context.WithTimeout(context.Background(), dur) 591 defer cancel() 592 } else { 593 shrinkGrace = context.Background() 594 } 595 596 var dur time.Duration 597 if tg.DaysOfResults > 0 { 598 dur = days(float64(tg.DaysOfResults)) 599 } else { 600 dur = days(7) 601 } 602 603 stop := time.Now().Add(-dur) 604 log = log.WithField("stop", stop) 605 606 var oldCols []InflatedColumn 607 var issues map[string][]string 608 609 log.Trace("Downloading existing grid...") 610 old, attrs, err := gcs.DownloadGrid(ctx, client, gridPath) 611 if err != nil { 612 log.WithField("path", gridPath).WithError(err).Error("Failed to download existing grid") 613 } 614 inflateStart := time.Now() 615 if old != nil { 616 var cols []InflatedColumn 617 var err error 618 log.Trace("Inflating grid...") 619 if cols, issues, err = InflateGrid(ctx, old, stop, time.Now().Add(-reprocess)); err != nil { 620 return false, fmt.Errorf("inflate: %w", err) 621 } 622 var floor time.Time 623 when := time.Now().Add(-7 * 24 * time.Hour) 624 if col := reprocessColumn(log, old, tg, when); col != nil { 625 cols = append(cols, *col) 626 floor = when 627 } 628 SortStarted(cols) // Our processing requires descending start time. 629 oldCols = truncateRunning(cols, floor) 630 } 631 inflateDur := time.Since(inflateStart) 632 readColsStart := time.Now() 633 var cols []InflatedColumn 634 var unreadColumns bool 635 if attrs != nil && attrs.Size >= int64(byteCeiling) { 636 log.WithField("size", attrs.Size).Info("Grid too large, compressing...") 637 unreadColumns = true 638 cols = oldCols 639 } else { 640 if condClient, ok := client.(gcs.ConditionalClient); ok { 641 var cond storage.Conditions 642 if attrs == nil { 643 cond.DoesNotExist = true 644 } else { 645 cond.GenerationMatch = attrs.Generation 646 } 647 client = condClient.If(&cond, &cond) 648 } 649 650 newCols := make(chan InflatedColumn) 651 ec := make(chan error) 652 653 log.Trace("Reading first column...") 654 go func() { 655 err := readCols(ctx, log, tg, oldCols, stop, newCols) 656 select { 657 case <-ctx.Done(): 658 case ec <- err: 659 } 660 }() 661 662 // Must read at least one column every cycle to ensure we make forward progress. 663 more := true 664 select { 665 case <-ctx.Done(): 666 return false, fmt.Errorf("first column: %w", ctx.Err()) 667 case col := <-newCols: 668 if len(col.Cells) == 0 { 669 // Group all empty columns together by setting build/name empty. 670 col.Column.Build = "" 671 col.Column.Name = "" 672 } 673 cols = append(cols, col) 674 case err := <-ec: 675 if err != nil { 676 return false, fmt.Errorf("read first column: %w", err) 677 } 678 more = false 679 } 680 681 // Read as many additional columns as we can within the allocated time. 682 log.Trace("Reading additional columns...") 683 for more { 684 select { 685 case <-grace.Done(): 686 unreadColumns = true 687 more = false 688 case <-ctx.Done(): 689 return false, ctx.Err() 690 case col := <-newCols: 691 if len(col.Cells) == 0 { 692 // Group all empty columns together by setting build/name empty. 693 col.Column.Build = "" 694 col.Column.Name = "" 695 } 696 cols = append(cols, col) 697 case err := <-ec: 698 if err != nil { 699 return false, fmt.Errorf("read columns: %w", err) 700 } 701 more = false 702 } 703 } 704 705 log = log.WithField("appended", len(cols)) 706 707 overrideBuild(tg, cols) // so we group correctly 708 cols = append(cols, oldCols...) 709 cols = groupColumns(tg, cols) 710 } 711 readColsDur := time.Since(readColsStart) 712 713 SortStarted(cols) 714 715 shrinkStart := time.Now() 716 cols = truncateGrid(cols, byteCeiling) // Assume each cell is at least 1 byte 717 var grid *statepb.Grid 718 var buf []byte 719 grid, buf, err = shrinkGridInline(shrinkGrace, log, tg, cols, issues, byteCeiling) 720 if err != nil { 721 return false, fmt.Errorf("shrink grid inline: %v", err) 722 } 723 shrinkDur := time.Since(shrinkStart) 724 725 grid.Config = tg 726 727 log = log.WithField("url", gridPath).WithField("bytes", len(buf)) 728 if !write { 729 log = log.WithField("dryrun", true) 730 } else { 731 log.Debug("Writing grid...") 732 // TODO(fejta): configurable cache value 733 if _, err := client.Upload(ctx, gridPath, buf, gcs.DefaultACL, gcs.NoCache); err != nil { 734 return false, fmt.Errorf("upload %d bytes: %w", len(buf), err) 735 } 736 } 737 if unreadColumns { 738 log = log.WithField("more", true) 739 } 740 log.WithFields(logrus.Fields{ 741 "cols": len(grid.Columns), 742 "rows": len(grid.Rows), 743 "inflate": inflateDur, 744 "readCols": readColsDur, 745 "shrink": shrinkDur, 746 }).Info("Wrote grid") 747 return unreadColumns, nil 748 } 749 750 // truncateGrid cuts grid down to 'cellCeiling' or fewer cells 751 // Used as a cheap way to truncate before the finer-tuned shrinkGridInline. 752 func truncateGrid(cols []InflatedColumn, cellCeiling int) []InflatedColumn { 753 var cells int 754 for i := 0; i < len(cols); i++ { 755 nc := len(cols[i].Cells) 756 cells += nc 757 if i < 2 || cells <= cellCeiling { 758 continue 759 } 760 return cols[:i] 761 } 762 return cols 763 } 764 765 // reprocessColumn returns a column with a running result if the previous config differs from the current one 766 func reprocessColumn(log logrus.FieldLogger, old *statepb.Grid, currentCfg *configpb.TestGroup, when time.Time) *InflatedColumn { 767 if old.Config == nil || old.Config.String() == currentCfg.String() { 768 return nil 769 } 770 771 log.WithField("since", when.Round(time.Minute)).Info("Reprocessing results after changed config") 772 773 return &InflatedColumn{ 774 Column: &statepb.Column{ 775 Started: float64(when.UTC().Unix() * 1000), 776 }, 777 Cells: map[string]Cell{ 778 "reprocess": { 779 Result: statuspb.TestStatus_RUNNING, 780 }, 781 }, 782 } 783 } 784 785 func shrinkGridInline(ctx context.Context, log logrus.FieldLogger, tg *configpb.TestGroup, cols []InflatedColumn, issues map[string][]string, byteCeiling int) (*statepb.Grid, []byte, error) { 786 // Hopefully the grid is small enough... 787 grid := constructGridFromGroupConfig(log, tg, cols, issues) 788 buf, err := gcs.MarshalGrid(grid) 789 if err != nil { 790 return nil, nil, fmt.Errorf("marshal grid: %w", err) 791 } 792 orig := len(buf) 793 if byteCeiling == 0 || orig < byteCeiling { 794 return grid, buf, nil 795 } 796 797 // Nope, let's drop old data... 798 newCeiling := byteCeiling / 2 799 800 log = log.WithField("originally", orig) 801 for i := len(cols) / 2; i > 0; i = i / 2 { 802 select { 803 case <-ctx.Done(): 804 log.WithField("size", len(buf)).Info("Timeout shrinking row data") 805 return grid, buf, nil 806 default: 807 } 808 809 log.WithField("size", len(buf)).Debug("Shrinking row data") 810 811 // shrink cols to half and cap 812 truncateLastColumn(cols[0:i], orig, byteCeiling, "byte") 813 814 grid = constructGridFromGroupConfig(log, tg, cols[0:i], issues) 815 buf, err = gcs.MarshalGrid(grid) 816 if err != nil { 817 return nil, nil, fmt.Errorf("marshal grid: %w", err) 818 } 819 820 if len(buf) < newCeiling { 821 log.WithField("size", len(buf)).Info("Shrunk row data") 822 return grid, buf, nil 823 } 824 825 } 826 827 // One column isn't small enough. Return a single-cell grid. 828 grid = constructGridFromGroupConfig(log, tg, deletedColumn(cols[0]), nil) 829 buf, err = gcs.MarshalGrid(grid) 830 log.WithField("size", len(buf)).Info("Shrunk to minimum; storing metadata only") 831 return grid, buf, err 832 } 833 834 // Legacy row name to report data truncation 835 const truncatedRowName = "Truncated" 836 837 func truncateLastColumn(grid []InflatedColumn, orig, max int, entity string) { 838 if len(grid) == 0 { 839 return 840 } 841 last := len(grid) - 1 842 for name, cell := range grid[last].Cells { 843 if name == truncatedRowName { 844 delete(grid[last].Cells, truncatedRowName) 845 continue 846 } 847 if cell.Result == statuspb.TestStatus_NO_RESULT { 848 continue 849 } 850 cell.Result = statuspb.TestStatus_UNKNOWN 851 cell.Message = fmt.Sprintf("%d %s grid exceeds maximum size of %d %ss", orig, entity, max, entity) 852 cell.Icon = "..." // Overwritten by the UI 853 grid[last].Cells[name] = cell 854 } 855 } 856 857 // A column with the same header data, but all the rows deleted. 858 func deletedColumn(latestColumn InflatedColumn) []InflatedColumn { 859 return []InflatedColumn{ 860 { 861 Column: latestColumn.Column, 862 Cells: map[string]Cell{ 863 truncatedRowName: { 864 Result: statuspb.TestStatus_UNKNOWN, 865 ID: truncatedRowName, 866 Message: fmt.Sprintf("The grid is too large to update. Split this testgroup into multiple testgroups."), 867 }, 868 }, 869 }, 870 } 871 } 872 873 // FormatStrftime replaces python codes with what go expects. 874 // 875 // aka %Y-%m-%d becomes 2006-01-02 876 func FormatStrftime(in string) string { 877 replacements := map[string]string{ 878 "%p": "PM", 879 "%Y": "2006", 880 "%y": "06", 881 "%m": "01", 882 "%d": "02", 883 "%H": "15", 884 "%M": "04", 885 "%S": "05", 886 } 887 888 out := in 889 890 for bad, good := range replacements { 891 out = strings.ReplaceAll(out, bad, good) 892 } 893 return out 894 } 895 896 func overrideBuild(tg *configpb.TestGroup, cols []InflatedColumn) { 897 fmt := tg.BuildOverrideStrftime 898 if fmt == "" { 899 return 900 } 901 fmt = FormatStrftime(fmt) 902 for _, col := range cols { 903 started := int64(col.Column.Started) 904 when := time.Unix(started/1000, (started%1000)*int64(time.Millisecond/time.Nanosecond)) 905 col.Column.Build = when.Format(fmt) 906 } 907 } 908 909 const columnIDSeparator = "\ue000" 910 911 // GroupColumns merges columns with the same Name and Build. 912 // 913 // Cells are joined together, splitting those with the same name. 914 // Started is the smallest value. 915 // Extra is the most recent filled value. 916 func groupColumns(tg *configpb.TestGroup, cols []InflatedColumn) []InflatedColumn { 917 groups := map[string][]InflatedColumn{} 918 var ids []string 919 for _, c := range cols { 920 id := c.Column.Name + columnIDSeparator + c.Column.Build 921 groups[id] = append(groups[id], c) 922 ids = append(ids, id) 923 } 924 925 if len(groups) == 0 { 926 return nil 927 } 928 929 out := make([]InflatedColumn, 0, len(groups)) 930 931 seen := make(map[string]bool, len(groups)) 932 933 for _, id := range ids { 934 if seen[id] { 935 continue // already merged this group. 936 } 937 seen[id] = true 938 var col InflatedColumn 939 940 groupedCells := groups[id] 941 if len(groupedCells) == 1 { 942 out = append(out, groupedCells[0]) 943 continue 944 } 945 946 cells := map[string][]Cell{} 947 948 var count int 949 for i, c := range groupedCells { 950 if i == 0 { 951 col.Column = c.Column 952 } else { 953 if c.Column.Started < col.Column.Started { 954 col.Column.Started = c.Column.Started 955 } 956 if !sortorder.NaturalLess(c.Column.Hint, col.Column.Hint) { 957 col.Column.Hint = c.Column.Hint 958 } 959 for i, val := range c.Column.Extra { 960 if i == len(col.Column.Extra) { 961 col.Column.Extra = append(col.Column.Extra, val) 962 continue 963 } 964 if val == "" || val == col.Column.Extra[i] { 965 continue 966 } 967 if col.Column.Extra[i] == "" { 968 col.Column.Extra[i] = val 969 } else if i < len(tg.GetColumnHeader()) && tg.GetColumnHeader()[i].ListAllValues { 970 col.Column.Extra[i] = joinHeaders(col.Column.Extra[i], val) 971 } else { 972 col.Column.Extra[i] = "*" // values differ 973 } 974 } 975 } 976 for key, cell := range c.Cells { 977 cells[key] = append(cells[key], cell) 978 count++ 979 } 980 } 981 if tg.IgnoreOldResults { 982 col.Cells = make(map[string]Cell, len(cells)) 983 } else { 984 col.Cells = make(map[string]Cell, count) 985 } 986 for name, duplicateCells := range cells { 987 if tg.IgnoreOldResults { 988 col.Cells[name] = duplicateCells[0] 989 continue 990 } 991 for name, cell := range SplitCells(name, duplicateCells...) { 992 col.Cells[name] = cell 993 } 994 } 995 out = append(out, col) 996 } 997 return out 998 } 999 1000 func joinHeaders(headers ...string) string { 1001 headerSet := make(map[string]bool) 1002 for _, header := range headers { 1003 vals := strings.Split(header, "||") 1004 for _, val := range vals { 1005 if val == "" { 1006 continue 1007 } 1008 headerSet[val] = true 1009 } 1010 } 1011 keys := []string{} 1012 for k := range headerSet { 1013 keys = append(keys, k) 1014 } 1015 sort.Strings(keys) 1016 return strings.Join(keys, "||") 1017 } 1018 1019 // days converts days float into a time.Duration, assuming a 24 hour day. 1020 // 1021 // A day is not always 24 hours due to things like leap-seconds. 1022 // We do not need this level of precision though, so ignore the complexity. 1023 func days(d float64) time.Duration { 1024 return time.Duration(24*d) * time.Hour // Close enough 1025 } 1026 1027 // ConstructGrid will append all the inflatedColumns into the returned Grid. 1028 // 1029 // The returned Grid has correctly compressed row values. 1030 func ConstructGrid(log logrus.FieldLogger, cols []InflatedColumn, issues map[string][]string, failuresToAlert, passesToDisableAlert int, useCommitAsBuildID bool, userProperty string, brokenThreshold float32, columnHeader []*configpb.TestGroup_ColumnHeader) *statepb.Grid { 1031 // Add the columns into a grid message 1032 var grid statepb.Grid 1033 rows := map[string]*statepb.Row{} // For fast target => row lookup 1034 if failuresToAlert > 0 && passesToDisableAlert == 0 { 1035 passesToDisableAlert = 1 1036 } 1037 1038 for _, col := range cols { 1039 if brokenThreshold > 0.0 && col.Column != nil { 1040 col.Column.Stats = columnStats(col.Cells, brokenThreshold) 1041 } 1042 AppendColumn(&grid, rows, col) 1043 } 1044 1045 dropEmptyRows(log, &grid, rows) 1046 1047 for name, row := range rows { 1048 row.Issues = append(row.Issues, issues[name]...) 1049 issueSet := make(map[string]bool, len(row.Issues)) 1050 for _, i := range row.Issues { 1051 issueSet[i] = true 1052 } 1053 row.Issues = make([]string, 0, len(issueSet)) 1054 for i := range issueSet { 1055 row.Issues = append(row.Issues, i) 1056 } 1057 sort.SliceStable(row.Issues, func(i, j int) bool { 1058 // Largest issues at the front of the list 1059 return !sortorder.NaturalLess(row.Issues[i], row.Issues[j]) 1060 }) 1061 } 1062 1063 alertRows(grid.Columns, grid.Rows, failuresToAlert, passesToDisableAlert, useCommitAsBuildID, userProperty, columnHeader) 1064 sort.SliceStable(grid.Rows, func(i, j int) bool { 1065 return sortorder.NaturalLess(grid.Rows[i].Name, grid.Rows[j].Name) 1066 }) 1067 1068 for _, row := range grid.Rows { 1069 del := true 1070 for _, up := range row.UserProperty { 1071 if up != "" { 1072 del = false 1073 break 1074 } 1075 } 1076 if del { 1077 row.UserProperty = nil 1078 } 1079 sort.SliceStable(row.Metric, func(i, j int) bool { 1080 return sortorder.NaturalLess(row.Metric[i], row.Metric[j]) 1081 }) 1082 sort.SliceStable(row.Metrics, func(i, j int) bool { 1083 return sortorder.NaturalLess(row.Metrics[i].Name, row.Metrics[j].Name) 1084 }) 1085 } 1086 return &grid 1087 } 1088 1089 // constructGridFromGroupConfig will append all the inflatedColumns into the returned Grid. 1090 // 1091 // The returned Grid has correctly compressed row values. 1092 func constructGridFromGroupConfig(log logrus.FieldLogger, group *configpb.TestGroup, cols []InflatedColumn, issues map[string][]string) *statepb.Grid { 1093 usesK8sClient := group.UseKubernetesClient || (group.GetResultSource().GetGcsConfig() != nil) 1094 return ConstructGrid(log, cols, issues, int(group.GetNumFailuresToAlert()), int(group.GetNumPassesToDisableAlert()), usesK8sClient, group.GetUserProperty(), 0.0, group.GetColumnHeader()) 1095 } 1096 1097 func dropEmptyRows(log logrus.FieldLogger, grid *statepb.Grid, rows map[string]*statepb.Row) { 1098 filled := make([]*statepb.Row, 0, len(rows)) 1099 var dropped int 1100 for _, r := range grid.Rows { 1101 var found bool 1102 f := result.Iter(r.Results) 1103 for { 1104 res, more := f() 1105 if !more { 1106 break 1107 } 1108 if res == statuspb.TestStatus_NO_RESULT { 1109 continue 1110 } 1111 found = true 1112 break 1113 } 1114 if !found { 1115 dropped++ 1116 delete(rows, r.Name) 1117 continue 1118 } 1119 filled = append(filled, r) 1120 } 1121 1122 if dropped == 0 { 1123 return 1124 } 1125 1126 grid.Rows = filled 1127 log.WithField("dropped", dropped).Info("Dropped old rows") 1128 } 1129 1130 // appendMetric adds the value at index to metric. 1131 // 1132 // Handles the details of sparse-encoding the results. 1133 // Indices must be monotonically increasing for the same metric. 1134 func appendMetric(metric *statepb.Metric, idx int32, value float64) { 1135 if l := int32(len(metric.Indices)); l == 0 || metric.Indices[l-2]+metric.Indices[l-1] != idx { 1136 // If we append V to idx 9 and metric.Indices = [3, 4] then the last filled index is 3+4-1=7 1137 // So that means we have holes in idx 7 and 8, so start a new group. 1138 metric.Indices = append(metric.Indices, idx, 1) 1139 } else { 1140 metric.Indices[l-1]++ // Expand the length of the current filled list 1141 } 1142 metric.Values = append(metric.Values, value) 1143 } 1144 1145 var emptyCell = Cell{Result: statuspb.TestStatus_NO_RESULT} 1146 1147 func hasCellID(name string) bool { 1148 return !strings.Contains(name, "@TESTGRID@") 1149 } 1150 1151 // truncate truncates a message to max runes (and ellipses). Max = 0 returns the original message. 1152 func truncate(msg string, max int) string { 1153 if max == 0 || len(msg) <= max { 1154 return msg 1155 } 1156 convert := func(s string) string { 1157 if utf8.ValidString(s) { 1158 return s 1159 } 1160 return strings.ToValidUTF8(s, "") 1161 // return s 1162 } 1163 start := convert(msg[:max/2]) 1164 end := convert(msg[len(msg)-max/2:]) 1165 return start + "..." + end 1166 } 1167 1168 // appendCell adds the rowResult column to the row. 1169 // 1170 // Handles the details like missing fields and run-length-encoding the result. 1171 func appendCell(row *statepb.Row, cell Cell, start, count int) { 1172 latest := int32(cell.Result) 1173 n := len(row.Results) 1174 switch { 1175 case n == 0, row.Results[n-2] != latest: 1176 row.Results = append(row.Results, latest, int32(count)) 1177 default: 1178 row.Results[n-1] += int32(count) 1179 } 1180 1181 addCellID := hasCellID(row.Name) 1182 1183 for i := 0; i < count; i++ { 1184 columnIdx := int32(start + i) 1185 for metricName, measurement := range cell.Metrics { 1186 var metric *statepb.Metric 1187 var ok bool 1188 for _, name := range row.Metric { 1189 if name == metricName { 1190 ok = true 1191 break 1192 } 1193 } 1194 if !ok { 1195 row.Metric = append(row.Metric, metricName) 1196 } 1197 for _, metric = range row.Metrics { 1198 if metric.Name == metricName { 1199 break 1200 } 1201 metric = nil 1202 } 1203 if metric == nil { 1204 metric = &statepb.Metric{Name: metricName} 1205 row.Metrics = append(row.Metrics, metric) 1206 } 1207 // len()-1 because we already appended the cell id 1208 appendMetric(metric, columnIdx, measurement) 1209 } 1210 if cell.Result == statuspb.TestStatus_NO_RESULT { 1211 continue 1212 } 1213 if addCellID { 1214 // These values can be derived from the parent row and don't need to be repeated here. 1215 row.CellIds = append(row.CellIds, cell.CellID) 1216 row.Properties = append(row.Properties, &statepb.Property{ 1217 Property: cell.Properties, 1218 }) 1219 } 1220 // Javascript client expects no result cells to skip icons/messages 1221 row.Messages = append(row.Messages, truncate(cell.Message, 140)) 1222 row.Icons = append(row.Icons, cell.Icon) 1223 row.UserProperty = append(row.UserProperty, cell.UserProperty) 1224 } 1225 1226 row.Issues = append(row.Issues, cell.Issues...) 1227 } 1228 1229 // AppendColumn adds the build column to the grid. 1230 // 1231 // This handles details like: 1232 // * rows appearing/disappearing in the middle of the run. 1233 // * adding auto metadata like duration, commit as well as any user-added metadata 1234 // * extracting build metadata into the appropriate column header 1235 // * Ensuring row names are unique and formatted with metadata 1236 func AppendColumn(grid *statepb.Grid, rows map[string]*statepb.Row, inflated InflatedColumn) { 1237 grid.Columns = append(grid.Columns, inflated.Column) 1238 colIdx := len(grid.Columns) - 1 1239 1240 missing := map[string]*statepb.Row{} 1241 for name, row := range rows { 1242 missing[name] = row 1243 } 1244 1245 for name, cell := range inflated.Cells { 1246 delete(missing, name) 1247 1248 row, ok := rows[name] 1249 if !ok { 1250 id := cell.ID 1251 if id == "" { 1252 id = name 1253 } 1254 row = &statepb.Row{ 1255 Name: name, 1256 Id: id, 1257 CellIds: []string{}, // TODO(fejta): try and leave this nil 1258 } 1259 rows[name] = row 1260 grid.Rows = append(grid.Rows, row) 1261 if colIdx > 0 { 1262 appendCell(row, emptyCell, 0, colIdx) 1263 } 1264 } 1265 appendCell(row, cell, colIdx, 1) 1266 } 1267 1268 for _, row := range missing { 1269 appendCell(row, emptyCell, colIdx, 1) 1270 } 1271 } 1272 1273 // alertRows configures the alert for every row that has one. 1274 func alertRows(cols []*statepb.Column, rows []*statepb.Row, openFailures, closePasses int, useCommitAsBuildID bool, userProperty string, columnHeader []*configpb.TestGroup_ColumnHeader) { 1275 for _, r := range rows { 1276 r.AlertInfo = alertRow(cols, r, openFailures, closePasses, useCommitAsBuildID, userProperty, columnHeader) 1277 } 1278 } 1279 1280 // alertRow returns an AlertInfo proto if there have been failuresToOpen consecutive failures more recently than passesToClose. 1281 func alertRow(cols []*statepb.Column, row *statepb.Row, failuresToOpen, passesToClose int, useCommitAsBuildID bool, userPropertyName string, columnHeader []*configpb.TestGroup_ColumnHeader) *statepb.AlertInfo { 1282 if failuresToOpen == 0 { 1283 return nil 1284 } 1285 var concurrentFailures int 1286 var totalFailures int32 1287 var passes int 1288 var compressedIdx int 1289 f := result.Iter(row.Results) 1290 var firstFail *statepb.Column 1291 var latestFail *statepb.Column 1292 var latestPass *statepb.Column 1293 var failIdx int 1294 var latestFailIdx int 1295 customColumnHeaders := make(map[string]string) 1296 // find the first number of consecutive passesToClose (no alert) 1297 // or else failuresToOpen (alert). 1298 for _, col := range cols { 1299 // TODO(fejta): ignore old running 1300 rawRes, _ := f() 1301 res := result.Coalesce(rawRes, result.IgnoreRunning) 1302 if res == statuspb.TestStatus_NO_RESULT { 1303 if rawRes == statuspb.TestStatus_RUNNING { 1304 compressedIdx++ 1305 } 1306 continue 1307 } 1308 if res == statuspb.TestStatus_PASS { 1309 passes++ 1310 if concurrentFailures >= failuresToOpen { 1311 if latestPass == nil { 1312 latestPass = col // most recent pass before outage 1313 } 1314 if passes >= passesToClose { 1315 break // enough failures and enough passes, definitely past the start of the failure 1316 } 1317 } else if passes >= passesToClose { 1318 return nil // enough passes but not enough failures, there is no outage 1319 } else { 1320 concurrentFailures = 0 1321 } 1322 } 1323 if res == statuspb.TestStatus_FAIL { 1324 passes = 0 1325 latestPass = nil 1326 concurrentFailures++ 1327 totalFailures++ 1328 if totalFailures == 1 { // note most recent failure for this outage 1329 latestFailIdx = compressedIdx 1330 latestFail = col 1331 } 1332 failIdx = compressedIdx 1333 firstFail = col 1334 } 1335 if res == statuspb.TestStatus_FLAKY { 1336 passes = 0 1337 if concurrentFailures >= failuresToOpen { 1338 break // cannot definitively say which commit is at fault 1339 } 1340 concurrentFailures = 0 1341 } 1342 compressedIdx++ 1343 1344 for i := 0; i < len(columnHeader); i++ { 1345 if i >= len(col.Extra) { 1346 logrus.WithFields(logrus.Fields{ 1347 "started": time.Unix(0, int64(col.GetStarted()*float64(time.Millisecond))), 1348 "additionalColumnHeaders": col.GetExtra(), 1349 }).Trace("Insufficient column header values to record.") 1350 break 1351 } 1352 if columnHeader[i].Label != "" { 1353 customColumnHeaders[columnHeader[i].Label] = col.Extra[i] 1354 } else if columnHeader[i].Property != "" { 1355 customColumnHeaders[columnHeader[i].Property] = col.Extra[i] 1356 } else { 1357 customColumnHeaders[columnHeader[i].ConfigurationValue] = col.Extra[i] 1358 } 1359 } 1360 } 1361 if concurrentFailures < failuresToOpen { 1362 return nil 1363 } 1364 var id string 1365 var latestID string 1366 if len(row.CellIds) > 0 { // not all rows have cell ids 1367 id = row.CellIds[failIdx] 1368 latestID = row.CellIds[latestFailIdx] 1369 } 1370 msg := row.Messages[latestFailIdx] 1371 var userProperties map[string]string 1372 if row.UserProperty != nil && latestFailIdx < len(row.UserProperty) && row.UserProperty[latestFailIdx] != "" { 1373 userProperties = map[string]string{ 1374 userPropertyName: row.UserProperty[latestFailIdx], 1375 } 1376 } 1377 1378 return alertInfo(totalFailures, msg, id, latestID, userProperties, firstFail, latestFail, latestPass, useCommitAsBuildID, customColumnHeaders) 1379 } 1380 1381 // alertInfo returns an alert proto with the configured fields 1382 func alertInfo(failures int32, msg, cellID, latestCellID string, userProperties map[string]string, fail, latestFail, pass *statepb.Column, useCommitAsBuildID bool, customColumnHeaders map[string]string) *statepb.AlertInfo { 1383 return &statepb.AlertInfo{ 1384 FailCount: failures, 1385 FailBuildId: buildID(fail, useCommitAsBuildID), 1386 LatestFailBuildId: buildID(latestFail, useCommitAsBuildID), 1387 FailTime: stamp(fail), 1388 FailTestId: cellID, 1389 LatestFailTestId: latestCellID, 1390 FailureMessage: msg, 1391 PassTime: stamp(pass), 1392 PassBuildId: buildID(pass, useCommitAsBuildID), 1393 EmailAddresses: emailAddresses(fail), 1394 HotlistIds: hotlistIDs(fail), 1395 Properties: userProperties, 1396 CustomColumnHeaders: customColumnHeaders, 1397 } 1398 } 1399 1400 func columnStats(cells map[string]Cell, brokenThreshold float32) *statepb.Stats { 1401 var passes, fails, total int32 1402 var pending bool 1403 if brokenThreshold <= 0.0 { 1404 return nil 1405 } 1406 if cells == nil { 1407 return nil 1408 } 1409 for _, cell := range cells { 1410 if cell.Result == statuspb.TestStatus_RUNNING { 1411 pending = true 1412 } 1413 status := result.Coalesce(cell.Result, false) 1414 switch status { 1415 case statuspb.TestStatus_PASS: 1416 passes++ 1417 total++ 1418 case statuspb.TestStatus_FAIL: 1419 fails++ 1420 total++ 1421 case statuspb.TestStatus_FLAKY, statuspb.TestStatus_UNKNOWN: 1422 total++ 1423 default: 1424 // blank cell or unrecognized status, do nothing 1425 } 1426 } 1427 var failRatio float32 1428 if total != 0.0 { 1429 failRatio = float32(fails) / float32(total) 1430 } 1431 return &statepb.Stats{ 1432 FailCount: fails, 1433 PassCount: passes, 1434 TotalCount: total, 1435 Pending: pending, 1436 Broken: failRatio > brokenThreshold, 1437 } 1438 } 1439 1440 func hotlistIDs(col *statepb.Column) []string { 1441 var ids []string 1442 for _, hotlistID := range strings.Split(col.HotlistIds, ",") { 1443 if id := strings.TrimSpace(hotlistID); id != "" { 1444 ids = append(ids, strings.TrimSpace(hotlistID)) 1445 } 1446 } 1447 return ids 1448 } 1449 1450 func emailAddresses(col *statepb.Column) []string { 1451 if col == nil { 1452 return []string{} 1453 } 1454 return col.GetEmailAddresses() 1455 } 1456 1457 // buildID extracts the ID from the first extra row (where commit data is) or else the Build field. 1458 func buildID(col *statepb.Column, getCommitHeader bool) string { 1459 if col == nil { 1460 return "" 1461 } 1462 if getCommitHeader && len(col.Extra) > 0 { 1463 return col.Extra[0] 1464 } 1465 return col.Build 1466 } 1467 1468 const billion = 1e9 1469 1470 // stamp converts seconds into a timestamp proto 1471 // TODO(#683): col.Started should be a timestamp instead of a float 1472 func stamp(col *statepb.Column) *timestamp.Timestamp { 1473 if col == nil { 1474 return nil 1475 } 1476 seconds := col.Started / 1000 1477 floor := math.Floor(seconds) 1478 remain := seconds - floor 1479 return ×tamp.Timestamp{ 1480 Seconds: int64(floor), 1481 Nanos: int32(remain * billion), 1482 } 1483 }