github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/testgrid/cmd/updater/main.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "bytes" 21 "compress/zlib" 22 "context" 23 "encoding/json" 24 "encoding/xml" 25 "errors" 26 "flag" 27 "fmt" 28 "io" 29 "io/ioutil" 30 "log" 31 "net/url" 32 "path" 33 "regexp" 34 "runtime" 35 "sort" 36 "strings" 37 "sync" 38 "time" 39 40 "k8s.io/test-infra/testgrid/config" 41 "k8s.io/test-infra/testgrid/state" 42 "k8s.io/test-infra/testgrid/util/gcs" 43 44 "cloud.google.com/go/storage" 45 "github.com/golang/protobuf/proto" 46 "google.golang.org/api/iterator" 47 48 "vbom.ml/util/sortorder" 49 ) 50 51 // options configures the updater 52 type options struct { 53 config gcs.Path // gs://path/to/config/proto 54 creds string 55 confirm bool 56 group string 57 groupConcurrency int 58 buildConcurrency int 59 } 60 61 // validate ensures sane options 62 func (o *options) validate() error { 63 if o.config.String() == "" { 64 return errors.New("empty --config") 65 } 66 if o.config.Bucket() == "k8s-testgrid" { // TODO(fejta): remove 67 return fmt.Errorf("--config=%s cannot start with gs://k8s-testgrid", o.config) 68 } 69 if o.groupConcurrency == 0 { 70 o.groupConcurrency = 4 * runtime.NumCPU() 71 } 72 if o.buildConcurrency == 0 { 73 o.buildConcurrency = 4 * runtime.NumCPU() 74 } 75 76 return nil 77 } 78 79 // gatherOptions reads options from flags 80 func gatherOptions() options { 81 o := options{} 82 flag.Var(&o.config, "config", "gs://path/to/config.pb") 83 flag.StringVar(&o.creds, "gcp-service-account", "", "/path/to/gcp/creds (use local creds if empty") 84 flag.BoolVar(&o.confirm, "confirm", false, "Upload data if set") 85 flag.StringVar(&o.group, "test-group", "", "Only update named group if set") 86 flag.IntVar(&o.groupConcurrency, "group-concurrency", 0, "Manually define the number of groups to concurrently update if non-zero") 87 flag.IntVar(&o.buildConcurrency, "build-concurrency", 0, "Manually define the number of builds to concurrently read if non-zero") 88 flag.Parse() 89 return o 90 } 91 92 // testGroupPath() returns the path to a test_group proto given this proto 93 func testGroupPath(g gcs.Path, name string) (*gcs.Path, error) { 94 u, err := url.Parse(name) 95 if err != nil { 96 return nil, fmt.Errorf("invalid url %s: %v", name, err) 97 } 98 np, err := g.ResolveReference(u) 99 if err == nil && np.Bucket() != g.Bucket() { 100 return nil, fmt.Errorf("testGroup %s should not change bucket", name) 101 } 102 return np, nil 103 } 104 105 // Build points to a build stored under a particular gcs prefix. 106 type Build struct { 107 Bucket *storage.BucketHandle 108 Context context.Context 109 Prefix string 110 number *int 111 } 112 113 func (b Build) String() string { 114 return b.Prefix 115 } 116 117 // Started holds the started.json values of the build. 118 type Started struct { 119 Timestamp int64 `json:"timestamp"` // epoch seconds 120 RepoVersion string `json:"repo-version"` 121 Node string `json:"node"` 122 Pull string `json:"pull"` 123 Repos map[string]string `json:"repos"` // {repo: branch_or_pull} map 124 } 125 126 // Finished holds the finished.json values of the build 127 type Finished struct { 128 // Timestamp is epoch seconds 129 Timestamp int64 `json:"timestamp"` 130 Passed bool `json:"passed"` 131 JobVersion string `json:"job-version"` 132 Metadata Metadata `json:"metadata"` 133 running bool 134 } 135 136 // Metadata holds the finished.json values in the metadata key. 137 // 138 // Metadata values can either be string or string map of strings 139 // 140 // TODO(fejta): figure out which of these we want and document them 141 // Special values: infra-commit, repos, repo, repo-commit, others 142 type Metadata map[string]interface{} 143 144 // String returns the name key if its value is a string. 145 func (m Metadata) String(name string) (*string, bool) { 146 if v, ok := m[name]; !ok { 147 return nil, false 148 } else if t, good := v.(string); !good { 149 return nil, true 150 } else { 151 return &t, true 152 } 153 } 154 155 // Meta returns the name key if its value is a child object. 156 func (m Metadata) Meta(name string) (*Metadata, bool) { 157 if v, ok := m[name]; !ok { 158 return nil, true 159 } else if t, good := v.(Metadata); !good { 160 return nil, false 161 } else { 162 return &t, true 163 } 164 } 165 166 // ColumnMetadata returns the subset of values in the map that are strings. 167 func (m Metadata) ColumnMetadata() ColumnMetadata { 168 bm := ColumnMetadata{} 169 for k, v := range m { 170 if s, ok := v.(string); ok { 171 bm[k] = s 172 } 173 // TODO(fejta): handle sub items 174 } 175 return bm 176 } 177 178 // JunitSuites holds a <testsuites/> list of JunitSuite results 179 type JunitSuites struct { 180 XMLName xml.Name `xml:"testsuites"` 181 Suites []JunitSuite `xml:"testsuite"` 182 } 183 184 // JunitSuite holds <testsuite/> results 185 type JunitSuite struct { 186 XMLName xml.Name `xml:"testsuite"` 187 Name string `xml:"name,attr"` 188 Time float64 `xml:"time,attr"` // Seconds 189 Failures int `xml:"failures,attr"` 190 Tests int `xml:"tests,attr"` 191 Results []JunitResult `xml:"testcase"` 192 /* 193 * <properties><property name="go.version" value="go1.8.3"/></properties> 194 */ 195 } 196 197 // JunitResult holds <testcase/> results 198 type JunitResult struct { 199 Name string `xml:"name,attr"` 200 Time float64 `xml:"time,attr"` 201 ClassName string `xml:"classname,attr"` 202 Failure *string `xml:"failure,omitempty"` 203 Output *string `xml:"system-out,omitempty"` 204 Error *string `xml:"system-err,omitempty"` 205 Skipped *string `xml:"skipped,omitempty"` 206 } 207 208 // Message extracts the message for the junit test case. 209 // 210 // Will use the first non-empty <failure/>, <skipped/>, <output/> value. 211 func (jr JunitResult) Message() string { 212 const max = 140 213 var msg string 214 switch { 215 case jr.Failure != nil && *jr.Failure != "": 216 msg = *jr.Failure 217 case jr.Skipped != nil && *jr.Skipped != "": 218 msg = *jr.Skipped 219 case jr.Output != nil && *jr.Output != "": 220 msg = *jr.Output 221 } 222 l := len(msg) 223 if max == 0 || l <= max { 224 return msg 225 } 226 h := max / 2 227 return msg[:h] + "..." + msg[l-h-1:] 228 } 229 230 // Row converts the junit result into a Row result, prepending the suite name. 231 func (jr JunitResult) Row(suite string) (string, Row) { 232 n := jr.Name 233 if suite != "" { 234 n = suite + "." + n 235 } 236 r := Row{ 237 Metrics: map[string]float64{}, 238 Metadata: map[string]string{ 239 "Tests name": n, 240 }, 241 } 242 if jr.Time > 0 { 243 r.Metrics[elapsedKey] = jr.Time 244 } 245 if msg := jr.Message(); msg != "" { 246 r.Message = msg 247 } 248 switch { 249 case jr.Failure != nil: 250 r.Result = state.Row_FAIL 251 if r.Message != "" { 252 r.Icon = "F" 253 } 254 case jr.Skipped != nil: 255 r.Result = state.Row_PASS_WITH_SKIPS 256 if r.Message != "" { 257 r.Icon = "S" 258 } 259 default: 260 r.Result = state.Row_PASS 261 } 262 return n, r 263 } 264 265 func unmarshalXML(buf []byte, i interface{}) error { 266 reader := bytes.NewReader(buf) 267 dec := xml.NewDecoder(reader) 268 dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 269 switch charset { 270 case "UTF-8", "utf8", "": 271 // utf8 is not recognized by golang, but our coalesce.py writes a utf8 doc, which python accepts. 272 return input, nil 273 default: 274 return nil, fmt.Errorf("unknown charset: %s", charset) 275 } 276 } 277 return dec.Decode(i) 278 } 279 280 func extractRows(buf []byte, meta map[string]string) (map[string][]Row, error) { 281 var suites JunitSuites 282 // Try to parse it as a <testsuites/> object 283 err := unmarshalXML(buf, &suites) 284 if err != nil { 285 // Maybe it is a <testsuite/> object instead 286 suites.Suites = append([]JunitSuite(nil), JunitSuite{}) 287 ie := unmarshalXML(buf, &suites.Suites[0]) 288 if ie != nil { 289 // Nope, it just doesn't parse 290 return nil, fmt.Errorf("not valid testsuites: %v nor testsuite: %v", err, ie) 291 } 292 } 293 rows := map[string][]Row{} 294 for _, suite := range suites.Suites { 295 for _, sr := range suite.Results { 296 if sr.Skipped != nil && len(*sr.Skipped) == 0 { 297 continue 298 } 299 300 n, r := sr.Row(suite.Name) 301 for k, v := range meta { 302 r.Metadata[k] = v 303 } 304 rows[n] = append(rows[n], r) 305 } 306 } 307 return rows, nil 308 } 309 310 // ColumnMetadata holds key => value mapping of metadata info. 311 type ColumnMetadata map[string]string 312 313 // Column represents a build run, which includes one or more row results and metadata. 314 type Column struct { 315 ID string 316 Started int64 317 Finished int64 318 Passed bool 319 Rows map[string][]Row 320 Metadata ColumnMetadata 321 } 322 323 // Row holds results for a piece of a build run, such as a test result. 324 type Row struct { 325 Result state.Row_Result 326 Metrics map[string]float64 327 Metadata map[string]string 328 Message string 329 Icon string 330 } 331 332 // Overall calculates the generated-overall row value for the current column 333 func (br Column) Overall() Row { 334 r := Row{ 335 Metadata: map[string]string{"Tests name": "Overall"}, 336 } 337 switch { 338 case br.Finished > 0: 339 // Completed, did we pass? 340 if br.Passed { 341 r.Result = state.Row_PASS // Yep 342 } else { 343 r.Result = state.Row_FAIL 344 } 345 r.Metrics = map[string]float64{ 346 elapsedKey: float64(br.Finished - br.Started), 347 } 348 case time.Now().Add(-24*time.Hour).Unix() > br.Started: 349 // Timed out 350 r.Result = state.Row_FAIL 351 r.Message = "Testing did not complete within 24 hours" 352 r.Icon = "T" 353 default: 354 r.Result = state.Row_RUNNING 355 r.Message = "Still running; has not finished..." 356 r.Icon = "R" 357 } 358 return r 359 } 360 361 // AppendMetric adds the value at index to metric. 362 // 363 // Handles the details of sparse-encoding the results. 364 // Indices must be monotonically increasing for the same metric. 365 func AppendMetric(metric *state.Metric, idx int32, value float64) { 366 if l := int32(len(metric.Indices)); l == 0 || metric.Indices[l-2]+metric.Indices[l-1] != idx { 367 // If we append V to idx 9 and metric.Indices = [3, 4] then the last filled index is 3+4-1=7 368 // So that means we have holes in idx 7 and 8, so start a new group. 369 metric.Indices = append(metric.Indices, idx, 1) 370 } else { 371 metric.Indices[l-1]++ // Expand the length of the current filled list 372 } 373 metric.Values = append(metric.Values, value) 374 } 375 376 // FindMetric returns the first metric with the specified name. 377 func FindMetric(row *state.Row, name string) *state.Metric { 378 for _, m := range row.Metrics { 379 if m.Name == name { 380 return m 381 } 382 } 383 return nil 384 } 385 386 var noResult = Row{Result: state.Row_NO_RESULT} 387 388 // AppendResult adds the rowResult column to the row. 389 // 390 // Handles the details like missing fields and run-length-encoding the result. 391 func AppendResult(row *state.Row, rowResult Row, count int) { 392 latest := int32(rowResult.Result) 393 n := len(row.Results) 394 switch { 395 case n == 0, row.Results[n-2] != latest: 396 row.Results = append(row.Results, latest, int32(count)) 397 default: 398 row.Results[n-1] += int32(count) 399 } 400 401 for i := 0; i < count; i++ { // TODO(fejta): update server to allow empty cellids 402 row.CellIds = append(row.CellIds, "") 403 } 404 405 // Javascript client expects no result cells to skip icons/messages 406 // TODO(fejta): reconsider this 407 if rowResult.Result != state.Row_NO_RESULT { 408 for i := 0; i < count; i++ { 409 row.Messages = append(row.Messages, rowResult.Message) 410 row.Icons = append(row.Icons, rowResult.Icon) 411 } 412 } 413 } 414 415 type nameConfig struct { 416 format string 417 parts []string 418 } 419 420 func makeNameConfig(tnc *config.TestNameConfig) nameConfig { 421 if tnc == nil { 422 return nameConfig{ 423 format: "%s", 424 parts: []string{"Tests name"}, 425 } 426 } 427 nc := nameConfig{ 428 format: tnc.NameFormat, 429 parts: make([]string, len(tnc.NameElements)), 430 } 431 for i, e := range tnc.NameElements { 432 nc.parts[i] = e.TargetConfig 433 } 434 return nc 435 } 436 437 // Format renders any requested metadata into the name 438 func (r Row) Format(config nameConfig, meta map[string]string) string { 439 parsed := make([]interface{}, len(config.parts)) 440 for i, p := range config.parts { 441 if v, ok := r.Metadata[p]; ok { 442 parsed[i] = v 443 continue 444 } 445 parsed[i] = meta[p] // "" if missing 446 } 447 return fmt.Sprintf(config.format, parsed...) 448 } 449 450 // AppendColumn adds the build column to the grid. 451 // 452 // This handles details like: 453 // * rows appearing/disappearing in the middle of the run. 454 // * adding auto metadata like duration, commit as well as any user-added metadata 455 // * extracting build metadata into the appropriate column header 456 // * Ensuring row names are unique and formatted with metadata 457 func AppendColumn(headers []string, format nameConfig, grid *state.Grid, rows map[string]*state.Row, build Column) { 458 c := state.Column{ 459 Build: build.ID, 460 Started: float64(build.Started * 1000), 461 } 462 for _, h := range headers { 463 if build.Finished == 0 { 464 c.Extra = append(c.Extra, "") 465 continue 466 } 467 trunc := 0 468 var ah string 469 if h == "Commit" { // TODO(fejta): fix, jobs use explicit key, support truncation 470 h = "repo-commit" 471 trunc = 9 472 ah = "job-version" 473 } 474 v, ok := build.Metadata[h] 475 if !ok { 476 // TODO(fejta): fix, make jobs use one or the other 477 if ah == "" { 478 log.Printf(" %s metadata missing %s", c.Build, h) 479 v = "missing" 480 } else { 481 if av, ok := build.Metadata[ah]; ok { 482 parts := strings.SplitN(av, "+", 2) 483 v = parts[len(parts)-1] 484 } else { 485 log.Printf(" %s metadata missing both keys %s and alternate %s", c.Build, h, ah) 486 } 487 } 488 } 489 if trunc > 0 && trunc < len(v) { 490 v = v[0:trunc] 491 } 492 c.Extra = append(c.Extra, v) 493 } 494 grid.Columns = append(grid.Columns, &c) 495 496 missing := map[string]*state.Row{} 497 for name, row := range rows { 498 missing[name] = row 499 } 500 501 found := map[string]bool{} 502 503 for target, results := range build.Rows { 504 for _, br := range results { 505 prefix := br.Format(format, build.Metadata) 506 name := prefix 507 // Ensure each name is unique 508 // If we have multiple results with the same name foo 509 // then append " [n]" to the name so we wind up with: 510 // foo 511 // foo [1] 512 // foo [2] 513 // etc 514 for idx := 1; found[name]; idx++ { 515 // found[name] exists, so try foo [n+1] 516 name = fmt.Sprintf("%s [%d]", prefix, idx) 517 } 518 // hooray, name not in found 519 found[name] = true 520 delete(missing, name) 521 522 // Does this row already exist? 523 r, ok := rows[name] 524 if !ok { // New row 525 r = &state.Row{ 526 Name: name, 527 Id: target, 528 } 529 rows[name] = r 530 grid.Rows = append(grid.Rows, r) 531 if n := len(grid.Columns); n > 1 { 532 // Add missing entries for more recent builds (aka earlier columns) 533 AppendResult(r, noResult, n-1) 534 } 535 } 536 537 AppendResult(r, br, 1) 538 for k, v := range br.Metrics { 539 m := FindMetric(r, k) 540 if m == nil { 541 m = &state.Metric{Name: k} 542 r.Metrics = append(r.Metrics, m) 543 } 544 AppendMetric(m, int32(len(r.Messages)), v) 545 } 546 } 547 } 548 549 for _, row := range missing { 550 AppendResult(row, noResult, 1) 551 } 552 } 553 554 const elapsedKey = "seconds-elapsed" 555 556 // junit_CONTEXT_TIMESTAMP_THREAD.xml 557 var re = regexp.MustCompile(`.+/junit(_[^_]+)?(_\d+-\d+)?(_\d+)?\.xml$`) 558 559 // dropPrefix removes the _ in _CONTEXT to help keep the regexp simple 560 func dropPrefix(name string) string { 561 if len(name) == 0 { 562 return name 563 } 564 return name[1:] 565 } 566 567 // ValidateName checks whether the basename matches a junit file. 568 // 569 // Expected format: junit_context_20180102-1256-07.xml 570 // Results in { 571 // "Context": "context", 572 // "Timestamp": "20180102-1256", 573 // "Thread": "07", 574 // } 575 func ValidateName(name string) map[string]string { 576 mat := re.FindStringSubmatch(name) 577 if mat == nil { 578 return nil 579 } 580 return map[string]string{ 581 "Context": dropPrefix(mat[1]), 582 "Timestamp": dropPrefix(mat[2]), 583 "Thread": dropPrefix(mat[3]), 584 } 585 586 } 587 588 // ReadBuild asynchronously downloads the files in build from gcs and convert them into a build. 589 func ReadBuild(build Build) (*Column, error) { 590 var wg sync.WaitGroup // Each subtask does wg.Add(1), then we wg.Wait() for them to finish 591 ctx, cancel := context.WithTimeout(build.Context, 30*time.Second) // Allows aborting after first error 592 ec := make(chan error) // Receives errors from anyone 593 594 // Download started.json, send to sc 595 wg.Add(1) 596 sc := make(chan Started) // Receives started.json result 597 go func() { 598 defer wg.Done() 599 started, err := func() (Started, error) { 600 var started Started 601 s := build.Bucket.Object(build.Prefix + "started.json") 602 sr, err := s.NewReader(ctx) 603 if err != nil { 604 return started, fmt.Errorf("build has not started") 605 } 606 if err = json.NewDecoder(sr).Decode(&started); err != nil { 607 return started, fmt.Errorf("could not decode started.json: %v", err) 608 } 609 return started, nil 610 }() 611 if err != nil { 612 select { 613 case <-ctx.Done(): 614 case ec <- err: 615 } 616 return 617 } 618 select { 619 case <-ctx.Done(): 620 case sc <- started: 621 } 622 }() 623 624 // Download finished.json, send to fc 625 wg.Add(1) 626 fc := make(chan Finished) // Receives finished.json result 627 go func() { 628 defer wg.Done() 629 finished, err := func() (Finished, error) { 630 f := build.Bucket.Object(build.Prefix + "finished.json") 631 fr, err := f.NewReader(ctx) 632 var finished Finished 633 if err == storage.ErrObjectNotExist { // Job has not (yet) completed 634 finished.running = true 635 return finished, nil 636 } else if err != nil { 637 return finished, fmt.Errorf("could not open %s: %v", f, err) 638 } 639 if err = json.NewDecoder(fr).Decode(&finished); err != nil { 640 return finished, fmt.Errorf("could not decode finished.json: %v", err) 641 } 642 return finished, nil 643 }() 644 if err != nil { 645 select { 646 case <-ctx.Done(): 647 case ec <- err: 648 } 649 return 650 } 651 select { 652 case <-ctx.Done(): 653 case fc <- finished: 654 } 655 }() 656 657 // List artifacts, send to ac channel 658 wg.Add(1) 659 ac := make(chan string) // Receives names of arifacts 660 go func() { 661 defer wg.Done() 662 defer close(ac) // No more artifacts 663 err := func() error { 664 pref := build.Prefix + "artifacts/" 665 ai := build.Bucket.Objects(ctx, &storage.Query{Prefix: pref}) 666 for { 667 a, err := ai.Next() 668 if err == iterator.Done { 669 break 670 } 671 if err != nil { 672 return fmt.Errorf("failed to list %s: %v", pref, err) 673 } 674 select { 675 case <-ctx.Done(): 676 return fmt.Errorf("interrupted listing %s", pref) 677 case ac <- a.Name: // Added 678 } 679 } 680 return nil 681 }() 682 if err != nil { 683 select { 684 case <-ctx.Done(): 685 case ec <- err: 686 } 687 } 688 }() 689 690 // Download each artifact, send row map to rc 691 // With parallelism: 60s without: 220s 692 wg.Add(1) 693 rc := make(chan map[string][]Row) 694 go func() { 695 defer wg.Done() 696 defer close(rc) // No more rows 697 var awg sync.WaitGroup 698 for a := range ac { 699 select { // Should we stop? 700 case <-ctx.Done(): // Yes 701 return 702 default: // No, keep going 703 } 704 meta := ValidateName(a) 705 if meta == nil { // Not junit 706 continue 707 } 708 awg.Add(1) 709 // Read each artifact in a new thread 710 go func(ap string, meta map[string]string) { 711 defer awg.Done() 712 err := func() error { 713 ar, err := build.Bucket.Object(ap).NewReader(ctx) 714 if err != nil { 715 return fmt.Errorf("could not read %s: %v", ap, err) 716 } 717 if r := ar.Remain(); r > 50e6 { 718 return fmt.Errorf("too large: %s is %d > 50M", ap, r) 719 } 720 buf, err := ioutil.ReadAll(ar) 721 if err != nil { 722 return fmt.Errorf("partial read of %s: %v", ap, err) 723 } 724 725 select { // Keep going? 726 case <-ctx.Done(): // No, cancelled 727 return errors.New("aborted artifact read") 728 default: // Yes, acquire lock 729 // TODO(fejta): consider sync.Map 730 rows, err := extractRows(buf, meta) 731 if err != nil { 732 return fmt.Errorf("failed to parse %s: %v", ap, err) 733 } 734 rc <- rows 735 } 736 return nil 737 }() 738 if err == nil { 739 return 740 } 741 select { 742 case <-ctx.Done(): 743 case ec <- err: 744 } 745 }(a, meta) 746 } 747 awg.Wait() 748 }() 749 750 // Append each row into the column 751 rows := map[string][]Row{} 752 wg.Add(1) 753 go func() { 754 defer wg.Done() 755 for r := range rc { 756 select { // Should we continue 757 case <-ctx.Done(): // No, aborted 758 return 759 default: // Yes 760 } 761 for t, rs := range r { 762 rows[t] = append(rows[t], rs...) 763 } 764 } 765 }() 766 767 // Wait for everyone to complete their work 768 go func() { 769 wg.Wait() 770 select { 771 case <-ctx.Done(): 772 return 773 case ec <- nil: 774 } 775 }() 776 var finished *Finished 777 var started *Started 778 for { // Wait until we receive started and finished and/or an error 779 select { 780 case err := <-ec: 781 if err != nil { 782 cancel() 783 return nil, fmt.Errorf("failed to read %s: %v", build, err) 784 } 785 break 786 case s := <-sc: 787 started = &s 788 case f := <-fc: 789 finished = &f 790 } 791 if started != nil && finished != nil { 792 break 793 } 794 } 795 br := Column{ 796 ID: path.Base(build.Prefix), 797 Started: started.Timestamp, 798 } 799 // Has the build finished? 800 if finished.running { // No 801 cancel() 802 br.Rows = map[string][]Row{ 803 "Overall": {br.Overall()}, 804 } 805 return &br, nil 806 } 807 br.Finished = finished.Timestamp 808 br.Metadata = finished.Metadata.ColumnMetadata() 809 br.Passed = finished.Passed 810 or := br.Overall() 811 br.Rows = map[string][]Row{ 812 "Overall": {or}, 813 } 814 select { 815 case <-ctx.Done(): 816 cancel() 817 return nil, fmt.Errorf("interrupted reading %s", build) 818 case err := <-ec: 819 if err != nil { 820 cancel() 821 return nil, fmt.Errorf("failed to read %s: %v", build, err) 822 } 823 } 824 825 for t, rs := range rows { 826 br.Rows[t] = append(br.Rows[t], rs...) 827 } 828 if or.Result == state.Row_FAIL { // Ensure failing build has a failing row 829 ft := false 830 for n, rs := range br.Rows { 831 if n == "Overall" { 832 continue 833 } 834 for _, r := range rs { 835 if r.Result == state.Row_FAIL { 836 ft = true // Failing test, huzzah! 837 break 838 } 839 } 840 if ft { 841 break 842 } 843 } 844 if !ft { // Nope, add the F icon and an explanatory message 845 br.Rows["Overall"][0].Icon = "F" 846 br.Rows["Overall"][0].Message = "Build failed outside of test results" 847 } 848 } 849 850 cancel() 851 return &br, nil 852 } 853 854 // Builds is a slice of builds. 855 type Builds []Build 856 857 func (b Builds) Len() int { return len(b) } 858 func (b Builds) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 859 func (b Builds) Less(i, j int) bool { 860 return sortorder.NaturalLess(b[i].Prefix, b[j].Prefix) 861 } 862 863 // listBuilds lists and sorts builds under path, sending them to the builds channel. 864 func listBuilds(ctx context.Context, client *storage.Client, path gcs.Path) (Builds, error) { 865 log.Printf("LIST: %s", path) 866 p := path.Object() 867 if !strings.HasSuffix(p, "/") { 868 p += "/" 869 } 870 bkt := client.Bucket(path.Bucket()) 871 it := bkt.Objects(ctx, &storage.Query{ 872 Delimiter: "/", 873 Prefix: p, 874 }) 875 var all Builds 876 for { 877 objAttrs, err := it.Next() 878 if err == iterator.Done { 879 break 880 } 881 if err != nil { 882 return nil, fmt.Errorf("failed to list objects: %v", err) 883 } 884 885 // if this is a link under directory, resolve the build value 886 if link := objAttrs.Metadata["link"]; len(link) > 0 { 887 // links created by bootstrap.py have a space 888 link = strings.TrimSpace(link) 889 u, err := url.Parse(link) 890 if err != nil { 891 return nil, fmt.Errorf("could not parse link for key %s: %v", objAttrs.Name, err) 892 } 893 if !strings.HasSuffix(u.Path, "/") { 894 u.Path += "/" 895 } 896 var linkPath gcs.Path 897 if err := linkPath.SetURL(u); err != nil { 898 return nil, fmt.Errorf("could not make GCS path for key %s: %v", objAttrs.Name, err) 899 } 900 all = append(all, Build{ 901 Bucket: bkt, 902 Context: ctx, 903 Prefix: linkPath.Object(), 904 }) 905 continue 906 } 907 908 if len(objAttrs.Prefix) == 0 { 909 continue 910 } 911 912 all = append(all, Build{ 913 Bucket: bkt, 914 Context: ctx, 915 Prefix: objAttrs.Prefix, 916 }) 917 } 918 // Expect builds to be in monotonically increasing order. 919 // So build9 should be followed by build10 or build888 but not build8 920 sort.Sort(sort.Reverse(all)) 921 return all, nil 922 } 923 924 // Headers returns the list of ColumnHeader ConfigurationValues for this group. 925 func Headers(group config.TestGroup) []string { 926 var extra []string 927 for _, h := range group.ColumnHeader { 928 extra = append(extra, h.ConfigurationValue) 929 } 930 return extra 931 } 932 933 // Rows is a slice of Row pointers 934 type Rows []*state.Row 935 936 func (r Rows) Len() int { return len(r) } 937 func (r Rows) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 938 func (r Rows) Less(i, j int) bool { 939 return sortorder.NaturalLess(r[i].Name, r[j].Name) 940 } 941 942 // ReadBuilds will asynchronously construct a Grid for the group out of the specified builds. 943 func ReadBuilds(parent context.Context, group config.TestGroup, builds Builds, max int, dur time.Duration, concurrency int) (*state.Grid, error) { 944 // Spawn build readers 945 if concurrency == 0 { 946 return nil, fmt.Errorf("zero readers for %s", group.Name) 947 } 948 ctx, cancel := context.WithCancel(parent) 949 var stop time.Time 950 if dur != 0 { 951 stop = time.Now().Add(-dur) 952 } 953 lb := len(builds) 954 if lb > max { 955 log.Printf(" Truncating %d %s results to %d", lb, group.Name, max) 956 lb = max 957 } 958 cols := make([]*Column, lb) 959 log.Printf("UPDATE: %s since %s (%d)", group.Name, stop, stop.Unix()) 960 ec := make(chan error) 961 old := make(chan int) 962 var wg sync.WaitGroup 963 964 // Send build indices to readers 965 indices := make(chan int) 966 wg.Add(1) 967 go func() { 968 defer wg.Done() 969 defer close(indices) 970 for i := range builds[:lb] { 971 select { 972 case <-ctx.Done(): 973 return 974 case <-old: 975 return 976 case indices <- i: 977 } 978 } 979 }() 980 981 // Concurrently receive indices and read builds 982 for i := 0; i < concurrency; i++ { 983 wg.Add(1) 984 go func() { 985 defer wg.Done() 986 for { 987 select { 988 case <-ctx.Done(): 989 return 990 case i, open := <-indices: 991 if !open { 992 return 993 } 994 b := builds[i] 995 c, err := ReadBuild(b) 996 if err != nil { 997 ec <- err 998 return 999 } 1000 cols[i] = c 1001 if c.Started < stop.Unix() { 1002 select { 1003 case <-ctx.Done(): 1004 case old <- i: 1005 log.Printf("STOP: %d %s started at %d < %d", i, b.Prefix, c.Started, stop.Unix()) 1006 default: // Someone else may have already reported an old result 1007 } 1008 } 1009 } 1010 } 1011 }() 1012 } 1013 1014 // Wait for everyone to finish 1015 go func() { 1016 wg.Wait() 1017 select { 1018 case <-ctx.Done(): 1019 case ec <- nil: // No error 1020 } 1021 }() 1022 1023 // Determine if we got an error 1024 select { 1025 case <-ctx.Done(): 1026 cancel() 1027 return nil, fmt.Errorf("interrupted reading %s", group.Name) 1028 case err := <-ec: 1029 if err != nil { 1030 cancel() 1031 return nil, fmt.Errorf("error reading %s: %v", group.Name, err) 1032 } 1033 } 1034 1035 // Add the columns into a grid message 1036 grid := &state.Grid{} 1037 rows := map[string]*state.Row{} // For fast target => row lookup 1038 h := Headers(group) 1039 nc := makeNameConfig(group.TestNameConfig) 1040 for _, c := range cols { 1041 select { 1042 case <-ctx.Done(): 1043 cancel() 1044 return nil, fmt.Errorf("interrupted appending columns to %s", group.Name) 1045 default: 1046 } 1047 if c == nil { 1048 continue 1049 } 1050 AppendColumn(h, nc, grid, rows, *c) 1051 if c.Started < stop.Unix() { // There may be concurrency results < stop.Unix() 1052 log.Printf(" %s#%s before %s, stopping...", group.Name, c.ID, stop) 1053 break // Just process the first result < stop.Unix() 1054 } 1055 } 1056 sort.Stable(Rows(grid.Rows)) 1057 cancel() 1058 return grid, nil 1059 } 1060 1061 // Days converts days float into a time.Duration, assuming a 24 hour day. 1062 // 1063 // A day is not always 24 hours due to things like leap-seconds. 1064 // We do not need this level of precision though, so ignore the complexity. 1065 func Days(d float64) time.Duration { 1066 return time.Duration(24*d) * time.Hour // Close enough 1067 } 1068 1069 // ReadConfig reads the config from gcs and unmarshals it into a Configuration struct. 1070 func ReadConfig(ctx context.Context, obj *storage.ObjectHandle) (*config.Configuration, error) { 1071 r, err := obj.NewReader(ctx) 1072 if err != nil { 1073 return nil, fmt.Errorf("failed to open config: %v", err) 1074 } 1075 buf, err := ioutil.ReadAll(r) 1076 if err != nil { 1077 return nil, fmt.Errorf("failed to read config: %v", err) 1078 } 1079 var cfg config.Configuration 1080 if err = proto.Unmarshal(buf, &cfg); err != nil { 1081 return nil, fmt.Errorf("failed to parse: %v", err) 1082 } 1083 return &cfg, nil 1084 } 1085 1086 // Group finds the test group in cfg matching name. 1087 func Group(cfg config.Configuration, name string) (*config.TestGroup, bool) { 1088 for _, g := range cfg.TestGroups { 1089 if g.Name == name { 1090 return g, true 1091 } 1092 } 1093 return nil, false 1094 } 1095 1096 func main() { 1097 opt := gatherOptions() 1098 if err := opt.validate(); err != nil { 1099 log.Fatalf("Invalid flags: %v", err) 1100 } 1101 if !opt.confirm { 1102 log.Println("--confirm=false (DRY-RUN): will not write to gcs") 1103 } 1104 1105 ctx := context.Background() 1106 client, err := gcs.ClientWithCreds(ctx, opt.creds) 1107 if err != nil { 1108 log.Fatalf("Failed to create storage client: %v", err) 1109 } 1110 1111 cfg, err := ReadConfig(ctx, client.Bucket(opt.config.Bucket()).Object(opt.config.Object())) 1112 if err != nil { 1113 log.Fatalf("Failed to read %s: %v", opt.config, err) 1114 } 1115 log.Printf("Found %d groups", len(cfg.TestGroups)) 1116 1117 groups := make(chan config.TestGroup) 1118 var wg sync.WaitGroup 1119 1120 for i := 0; i < opt.groupConcurrency; i++ { 1121 wg.Add(1) 1122 go func() { 1123 for tg := range groups { 1124 tgp, err := testGroupPath(opt.config, tg.Name) 1125 if err == nil { 1126 err = updateGroup(ctx, client, tg, *tgp, opt.buildConcurrency, opt.confirm) 1127 } 1128 if err != nil { 1129 log.Printf("FAIL: %v", err) 1130 } 1131 } 1132 wg.Done() 1133 }() 1134 } 1135 1136 if opt.group != "" { // Just a specific group 1137 // o := "ci-kubernetes-test-go" 1138 // o = "ci-kubernetes-node-kubelet-stable3" 1139 // gs://kubernetes-jenkins/logs/ci-kubernetes-test-go 1140 // gs://kubernetes-jenkins/pr-logs/pull-ingress-gce-e2e 1141 o := opt.group 1142 if tg, ok := Group(*cfg, o); !ok { 1143 log.Fatalf("Failed to find %s in %s", o, opt.config) 1144 } else { 1145 groups <- *tg 1146 } 1147 } else { // All groups 1148 for _, tg := range cfg.TestGroups { 1149 groups <- *tg 1150 } 1151 } 1152 close(groups) 1153 wg.Wait() 1154 } 1155 1156 func updateGroup(ctx context.Context, client *storage.Client, tg config.TestGroup, gridPath gcs.Path, concurrency int, write bool) error { 1157 o := tg.Name 1158 1159 var tgPath gcs.Path 1160 if err := tgPath.Set("gs://" + tg.GcsPrefix); err != nil { 1161 return fmt.Errorf("group %s has an invalid gcs_prefix %s: %v", o, tg.GcsPrefix, err) 1162 } 1163 1164 g := state.Grid{} 1165 g.Columns = append(g.Columns, &state.Column{Build: "first", Started: 1}) 1166 builds, err := listBuilds(ctx, client, tgPath) 1167 if err != nil { 1168 return fmt.Errorf("failed to list %s builds: %v", o, err) 1169 } 1170 grid, err := ReadBuilds(ctx, tg, builds, 50, Days(7), concurrency) 1171 if err != nil { 1172 return err 1173 } 1174 buf, err := marshalGrid(*grid) 1175 if err != nil { 1176 return fmt.Errorf("failed to marshal %s grid: %v", o, err) 1177 } 1178 tgp := gridPath 1179 if !write { 1180 log.Printf(" Not writing %s (%d bytes) to %s", o, len(buf), tgp) 1181 } else { 1182 log.Printf(" Writing %s (%d bytes) to %s", o, len(buf), tgp) 1183 if err := gcs.Upload(ctx, client, tgp, buf); err != nil { 1184 return fmt.Errorf("upload %s to %s failed: %v", o, tgp, err) 1185 } 1186 } 1187 log.Printf("WROTE: %s, %dx%d grid (%s, %d bytes)", tg.Name, len(grid.Columns), len(grid.Rows), tgp, len(buf)) 1188 return nil 1189 } 1190 1191 // marhshalGrid serializes a state proto into zlib-compressed bytes. 1192 func marshalGrid(grid state.Grid) ([]byte, error) { 1193 buf, err := proto.Marshal(&grid) 1194 if err != nil { 1195 return nil, fmt.Errorf("proto encoding failed: %v", err) 1196 } 1197 var zbuf bytes.Buffer 1198 zw := zlib.NewWriter(&zbuf) 1199 if _, err = zw.Write(buf); err != nil { 1200 return nil, fmt.Errorf("zlib compression failed: %v", err) 1201 } 1202 if err = zw.Close(); err != nil { 1203 return nil, fmt.Errorf("zlib closing failed: %v", err) 1204 } 1205 return zbuf.Bytes(), nil 1206 }