github.com/yrj2011/jx-test-infra@v0.0.0-20190529031832-7a2065ee98eb/testgrid/cmd/updater/main.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "bytes" 21 "compress/zlib" 22 "context" 23 "encoding/json" 24 "encoding/xml" 25 "errors" 26 "flag" 27 "fmt" 28 "io" 29 "io/ioutil" 30 "log" 31 "net/url" 32 "path" 33 "regexp" 34 "runtime" 35 "sort" 36 "strings" 37 "sync" 38 "time" 39 40 "k8s.io/test-infra/testgrid/config" 41 "k8s.io/test-infra/testgrid/state" 42 "k8s.io/test-infra/testgrid/util/gcs" 43 44 "cloud.google.com/go/storage" 45 "github.com/golang/protobuf/proto" 46 "google.golang.org/api/iterator" 47 48 "vbom.ml/util/sortorder" 49 ) 50 51 // options configures the updater 52 type options struct { 53 config gcs.Path // gs://path/to/config/proto 54 creds string 55 confirm bool 56 group string 57 groupConcurrency int 58 buildConcurrency int 59 } 60 61 // validate ensures sane options 62 func (o *options) validate() error { 63 if o.config.String() == "" { 64 return errors.New("empty --config") 65 } 66 if o.config.Bucket() == "k8s-testgrid" { // TODO(fejta): remove 67 return fmt.Errorf("--config=%s cannot start with gs://k8s-testgrid", o.config) 68 } 69 if o.groupConcurrency == 0 { 70 o.groupConcurrency = 4 * runtime.NumCPU() 71 } 72 if o.buildConcurrency == 0 { 73 o.buildConcurrency = 4 * runtime.NumCPU() 74 } 75 76 return nil 77 } 78 79 // gatherOptions reads options from flags 80 func gatherOptions() options { 81 o := options{} 82 flag.Var(&o.config, "config", "gs://path/to/config.pb") 83 flag.StringVar(&o.creds, "gcp-service-account", "", "/path/to/gcp/creds (use local creds if empty") 84 flag.BoolVar(&o.confirm, "confirm", false, "Upload data if set") 85 flag.StringVar(&o.group, "test-group", "", "Only update named group if set") 86 flag.IntVar(&o.groupConcurrency, "group-concurrency", 0, "Manually define the number of groups to concurrently update if non-zero") 87 flag.IntVar(&o.buildConcurrency, "build-concurrency", 0, "Manually define the number of builds to concurrently read if non-zero") 88 flag.Parse() 89 return o 90 } 91 92 // testGroupPath() returns the path to a test_group proto given this proto 93 func testGroupPath(g gcs.Path, name string) (*gcs.Path, error) { 94 u, err := url.Parse(name) 95 if err != nil { 96 return nil, fmt.Errorf("invalid url %s: %v", name, err) 97 } 98 np, err := g.ResolveReference(u) 99 if err == nil && np.Bucket() != g.Bucket() { 100 return nil, fmt.Errorf("testGroup %s should not change bucket", name) 101 } 102 return np, nil 103 } 104 105 // Build points to a build stored under a particular gcs prefix. 106 type Build struct { 107 Bucket *storage.BucketHandle 108 Context context.Context 109 Prefix string 110 number *int 111 } 112 113 func (b Build) String() string { 114 return b.Prefix 115 } 116 117 // Started holds the started.json values of the build. 118 type Started struct { 119 Timestamp int64 `json:"timestamp"` // epoch seconds 120 RepoVersion string `json:"repo-version"` 121 Node string `json:"node"` 122 Pull string `json:"pull"` 123 Repos map[string]string `json:"repos"` // {repo: branch_or_pull} map 124 } 125 126 // Finished holds the finished.json values of the build 127 type Finished struct { 128 // Timestamp is epoch seconds 129 Timestamp int64 `json:"timestamp"` 130 Passed bool `json:"passed"` 131 JobVersion string `json:"job-version"` 132 Metadata Metadata `json:"metadata"` 133 running bool 134 } 135 136 // Metadata holds the finished.json values in the metadata key. 137 // 138 // Metadata values can either be string or string map of strings 139 // 140 // TODO(fejta): figure out which of these we want and document them 141 // Special values: infra-commit, repos, repo, repo-commit, others 142 type Metadata map[string]interface{} 143 144 // String returns the name key if its value is a string. 145 func (m Metadata) String(name string) (*string, bool) { 146 if v, ok := m[name]; !ok { 147 return nil, false 148 } else if t, good := v.(string); !good { 149 return nil, true 150 } else { 151 return &t, true 152 } 153 } 154 155 // Meta returns the name key if its value is a child object. 156 func (m Metadata) Meta(name string) (*Metadata, bool) { 157 if v, ok := m[name]; !ok { 158 return nil, true 159 } else if t, good := v.(Metadata); !good { 160 return nil, false 161 } else { 162 return &t, true 163 } 164 } 165 166 // ColumnMetadata returns the subset of values in the map that are strings. 167 func (m Metadata) ColumnMetadata() ColumnMetadata { 168 bm := ColumnMetadata{} 169 for k, v := range m { 170 if s, ok := v.(string); ok { 171 bm[k] = s 172 } 173 // TODO(fejta): handle sub items 174 } 175 return bm 176 } 177 178 // JunitSuites holds a <testsuites/> list of JunitSuite results 179 type JunitSuites struct { 180 XMLName xml.Name `xml:"testsuites"` 181 Suites []JunitSuite `xml:"testsuite"` 182 } 183 184 // JunitSuite holds <testsuite/> results 185 type JunitSuite struct { 186 XMLName xml.Name `xml:"testsuite"` 187 Name string `xml:"name,attr"` 188 Time float64 `xml:"time,attr"` // Seconds 189 Failures int `xml:"failures,attr"` 190 Tests int `xml:"tests,attr"` 191 Results []JunitResult `xml:"testcase"` 192 /* 193 * <properties><property name="go.version" value="go1.8.3"/></properties> 194 */ 195 } 196 197 // JunitResult holds <testcase/> results 198 type JunitResult struct { 199 Name string `xml:"name,attr"` 200 Time float64 `xml:"time,attr"` 201 ClassName string `xml:"classname,attr"` 202 Failure *string `xml:"failure,omitempty"` 203 Output *string `xml:"system-out,omitempty"` 204 Error *string `xml:"system-err,omitempty"` 205 Skipped *string `xml:"skipped,omitempty"` 206 } 207 208 // Message extracts the message for the junit test case. 209 // 210 // Will use the first non-empty <failure/>, <skipped/>, <output/> value. 211 func (jr JunitResult) Message() string { 212 const max = 140 213 var msg string 214 switch { 215 case jr.Failure != nil && *jr.Failure != "": 216 msg = *jr.Failure 217 case jr.Skipped != nil && *jr.Skipped != "": 218 msg = *jr.Skipped 219 case jr.Output != nil && *jr.Output != "": 220 msg = *jr.Output 221 } 222 l := len(msg) 223 if max == 0 || l <= max { 224 return msg 225 } 226 h := max / 2 227 return msg[:h] + "..." + msg[l-h-1:] 228 } 229 230 // Row converts the junit result into a Row result, prepending the suite name. 231 func (jr JunitResult) Row(suite string) (string, Row) { 232 n := jr.Name 233 if suite != "" { 234 n = suite + "." + n 235 } 236 r := Row{ 237 Metrics: map[string]float64{}, 238 Metadata: map[string]string{ 239 "Tests name": n, 240 }, 241 } 242 if jr.Time > 0 { 243 r.Metrics[elapsedKey] = jr.Time 244 } 245 if msg := jr.Message(); msg != "" { 246 r.Message = msg 247 } 248 switch { 249 case jr.Failure != nil: 250 r.Result = state.Row_FAIL 251 if r.Message != "" { 252 r.Icon = "F" 253 } 254 case jr.Skipped != nil: 255 r.Result = state.Row_PASS_WITH_SKIPS 256 if r.Message != "" { 257 r.Icon = "S" 258 } 259 default: 260 r.Result = state.Row_PASS 261 } 262 return n, r 263 } 264 265 func unmarshalXML(buf []byte, i interface{}) error { 266 reader := bytes.NewReader(buf) 267 dec := xml.NewDecoder(reader) 268 dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { 269 switch charset { 270 case "UTF-8", "utf8", "": 271 // utf8 is not recognized by golang, but our coalesce.py writes a utf8 doc, which python accepts. 272 return input, nil 273 default: 274 return nil, fmt.Errorf("unknown charset: %s", charset) 275 } 276 } 277 return dec.Decode(i) 278 } 279 280 func extractRows(buf []byte, meta map[string]string) (map[string][]Row, error) { 281 var suites JunitSuites 282 // Try to parse it as a <testsuites/> object 283 err := unmarshalXML(buf, &suites) 284 if err != nil { 285 // Maybe it is a <testsuite/> object instead 286 suites.Suites = append([]JunitSuite(nil), JunitSuite{}) 287 ie := unmarshalXML(buf, &suites.Suites[0]) 288 if ie != nil { 289 // Nope, it just doesn't parse 290 return nil, fmt.Errorf("not valid testsuites: %v nor testsuite: %v", err, ie) 291 } 292 } 293 rows := map[string][]Row{} 294 for _, suite := range suites.Suites { 295 for _, sr := range suite.Results { 296 if sr.Skipped != nil && len(*sr.Skipped) == 0 { 297 continue 298 } 299 300 n, r := sr.Row(suite.Name) 301 for k, v := range meta { 302 r.Metadata[k] = v 303 } 304 rows[n] = append(rows[n], r) 305 } 306 } 307 return rows, nil 308 } 309 310 // ColumnMetadata holds key => value mapping of metadata info. 311 type ColumnMetadata map[string]string 312 313 // Column represents a build run, which includes one or more row results and metadata. 314 type Column struct { 315 ID string 316 Started int64 317 Finished int64 318 Passed bool 319 Rows map[string][]Row 320 Metadata ColumnMetadata 321 } 322 323 // Row holds results for a piece of a build run, such as a test result. 324 type Row struct { 325 Result state.Row_Result 326 Metrics map[string]float64 327 Metadata map[string]string 328 Message string 329 Icon string 330 } 331 332 // Overall calculates the generated-overall row value for the current column 333 func (br Column) Overall() Row { 334 r := Row{ 335 Metadata: map[string]string{"Tests name": "Overall"}, 336 } 337 switch { 338 case br.Finished > 0: 339 // Completed, did we pass? 340 if br.Passed { 341 r.Result = state.Row_PASS // Yep 342 } else { 343 r.Result = state.Row_FAIL 344 } 345 r.Metrics = map[string]float64{ 346 elapsedKey: float64(br.Finished - br.Started), 347 } 348 case time.Now().Add(-24*time.Hour).Unix() > br.Started: 349 // Timed out 350 r.Result = state.Row_FAIL 351 r.Message = "Testing did not complete within 24 hours" 352 r.Icon = "T" 353 default: 354 r.Result = state.Row_RUNNING 355 r.Message = "Still running; has not finished..." 356 r.Icon = "R" 357 } 358 return r 359 } 360 361 // AppendMetric adds the value at index to metric. 362 // 363 // Handles the details of sparse-encoding the results. 364 // Indicies must be monotonically increasing for the same metric. 365 func AppendMetric(metric *state.Metric, idx int32, value float64) { 366 if l := int32(len(metric.Indices)); l == 0 || metric.Indices[l-2]+metric.Indices[l-1] != idx { 367 // If we append V to idx 9 and metric.Indices = [3, 4] then the last filled index is 3+4-1=7 368 // So that means we have holes in idx 7 and 8, so start a new group. 369 metric.Indices = append(metric.Indices, idx, 1) 370 } else { 371 metric.Indices[l-1]++ // Expand the length of the current filled list 372 } 373 metric.Values = append(metric.Values, value) 374 } 375 376 // FindMetric returns the first metric with the specified name. 377 func FindMetric(row *state.Row, name string) *state.Metric { 378 for _, m := range row.Metrics { 379 if m.Name == name { 380 return m 381 } 382 } 383 return nil 384 } 385 386 var noResult = Row{Result: state.Row_NO_RESULT} 387 388 // AppendResult adds the rowResult column to the row. 389 // 390 // Handles the details like missing fields and run-length-encoding the result. 391 func AppendResult(row *state.Row, rowResult Row, count int) { 392 latest := int32(rowResult.Result) 393 n := len(row.Results) 394 switch { 395 case n == 0, row.Results[n-2] != latest: 396 row.Results = append(row.Results, latest, int32(count)) 397 default: 398 row.Results[n-1] += int32(count) 399 } 400 401 for i := 0; i < count; i++ { // TODO(fejta): update server to allow empty cellids 402 row.CellIds = append(row.CellIds, "") 403 } 404 405 // Javascript client expects no result cells to skip icons/messages 406 // TODO(fejta): reconsider this 407 if rowResult.Result != state.Row_NO_RESULT { 408 for i := 0; i < count; i++ { 409 row.Messages = append(row.Messages, rowResult.Message) 410 row.Icons = append(row.Icons, rowResult.Icon) 411 } 412 } 413 } 414 415 type nameConfig struct { 416 format string 417 parts []string 418 } 419 420 func makeNameConfig(tnc *config.TestNameConfig) nameConfig { 421 if tnc == nil { 422 return nameConfig{ 423 format: "%s", 424 parts: []string{"Tests name"}, 425 } 426 } 427 nc := nameConfig{ 428 format: tnc.NameFormat, 429 parts: make([]string, len(tnc.NameElements)), 430 } 431 for i, e := range tnc.NameElements { 432 nc.parts[i] = e.TargetConfig 433 } 434 return nc 435 } 436 437 // Format renders any requested metadata into the name 438 func (r Row) Format(config nameConfig, meta map[string]string) string { 439 parsed := make([]interface{}, len(config.parts)) 440 for i, p := range config.parts { 441 if v, ok := r.Metadata[p]; ok { 442 parsed[i] = v 443 continue 444 } 445 parsed[i] = meta[p] // "" if missing 446 } 447 return fmt.Sprintf(config.format, parsed...) 448 } 449 450 // AppendColumn adds the build column to the grid. 451 // 452 // This handles details like: 453 // * rows appearing/disappearing in the middle of the run. 454 // * adding auto metadata like duration, commit as well as any user-added metadata 455 // * extracting build metadata into the appropriate column header 456 // * Ensuring row names are unique and formatted with metadata 457 func AppendColumn(headers []string, format nameConfig, grid *state.Grid, rows map[string]*state.Row, build Column) { 458 c := state.Column{ 459 Build: build.ID, 460 Started: float64(build.Started * 1000), 461 } 462 for _, h := range headers { 463 if build.Finished == 0 { 464 c.Extra = append(c.Extra, "") 465 continue 466 } 467 trunc := 0 468 var ah string 469 if h == "Commit" { // TODO(fejta): fix, jobs use explicit key, support truncation 470 h = "repo-commit" 471 trunc = 9 472 ah = "job-version" 473 } 474 v, ok := build.Metadata[h] 475 if !ok { 476 // TODO(fejta): fix, make jobs use one or the other 477 if ah == "" { 478 log.Printf(" %s metadata missing %s", c.Build, h) 479 v = "missing" 480 } else { 481 if av, ok := build.Metadata[ah]; ok { 482 parts := strings.SplitN(av, "+", 2) 483 v = parts[len(parts)-1] 484 } else { 485 log.Printf(" %s metadata missing both keys %s and alternate %s", c.Build, h, ah) 486 } 487 } 488 } 489 if trunc > 0 && trunc < len(v) { 490 v = v[0:trunc] 491 } 492 c.Extra = append(c.Extra, v) 493 } 494 grid.Columns = append(grid.Columns, &c) 495 496 missing := map[string]*state.Row{} 497 for name, row := range rows { 498 missing[name] = row 499 } 500 501 found := map[string]bool{} 502 503 for target, results := range build.Rows { 504 for _, br := range results { 505 prefix := br.Format(format, build.Metadata) 506 name := prefix 507 // Ensure each name is unique 508 // If we have multiple results with the same name foo 509 // then append " [n]" to the name so we wind up with: 510 // foo 511 // foo [1] 512 // foo [2] 513 // etc 514 for idx := 1; found[name]; idx++ { 515 // found[name] exists, so try foo [n+1] 516 name = fmt.Sprintf("%s [%d]", prefix, idx) 517 } 518 // hooray, name not in found 519 found[name] = true 520 delete(missing, name) 521 522 // Does this row already exist? 523 r, ok := rows[name] 524 if !ok { // New row 525 r = &state.Row{ 526 Name: name, 527 Id: target, 528 } 529 rows[name] = r 530 grid.Rows = append(grid.Rows, r) 531 if n := len(grid.Columns); n > 1 { 532 // Add missing entries for more recent builds (aka earlier columns) 533 AppendResult(r, noResult, n-1) 534 } 535 } 536 537 AppendResult(r, br, 1) 538 for k, v := range br.Metrics { 539 m := FindMetric(r, k) 540 if m == nil { 541 m = &state.Metric{Name: k} 542 r.Metrics = append(r.Metrics, m) 543 } 544 AppendMetric(m, int32(len(r.Messages)), v) 545 } 546 } 547 } 548 549 for _, row := range missing { 550 AppendResult(row, noResult, 1) 551 } 552 } 553 554 const elapsedKey = "seconds-elapsed" 555 556 // junit_CONTEXT_TIMESTAMP_THREAD.xml 557 var re = regexp.MustCompile(`.+/junit(_[^_]+)?(_\d+-\d+)?(_\d+)?\.xml$`) 558 559 // dropPrefix removes the _ in _CONTEXT to help keep the regexp simple 560 func dropPrefix(name string) string { 561 if len(name) == 0 { 562 return name 563 } 564 return name[1:] 565 } 566 567 // ValidateName checks whether the basename matches a junit file. 568 // 569 // Expected format: junit_context_20180102-1256-07.xml 570 // Results in { 571 // "Context": "context", 572 // "Timestamp": "20180102-1256", 573 // "Thread": "07", 574 // } 575 func ValidateName(name string) map[string]string { 576 mat := re.FindStringSubmatch(name) 577 if mat == nil { 578 return nil 579 } 580 return map[string]string{ 581 "Context": dropPrefix(mat[1]), 582 "Timestamp": dropPrefix(mat[2]), 583 "Thread": dropPrefix(mat[3]), 584 } 585 586 } 587 588 // ReadBuild asynchronously downloads the files in build from gcs and convert them into a build. 589 func ReadBuild(build Build) (*Column, error) { 590 var wg sync.WaitGroup // Each subtask does wg.Add(1), then we wg.Wait() for them to finish 591 ctx, cancel := context.WithTimeout(build.Context, 30*time.Second) // Allows aborting after first error 592 ec := make(chan error) // Receives errors from anyone 593 594 // Download started.json, send to sc 595 wg.Add(1) 596 sc := make(chan Started) // Receives started.json result 597 go func() { 598 defer wg.Done() 599 started, err := func() (Started, error) { 600 var started Started 601 s := build.Bucket.Object(build.Prefix + "started.json") 602 sr, err := s.NewReader(ctx) 603 if err != nil { 604 return started, fmt.Errorf("build has not started") 605 } 606 if err = json.NewDecoder(sr).Decode(&started); err != nil { 607 return started, fmt.Errorf("could not decode started.json: %v", err) 608 } 609 return started, nil 610 }() 611 if err != nil { 612 select { 613 case <-ctx.Done(): 614 case ec <- err: 615 } 616 return 617 } 618 select { 619 case <-ctx.Done(): 620 case sc <- started: 621 } 622 }() 623 624 // Download finished.json, send to fc 625 wg.Add(1) 626 fc := make(chan Finished) // Receives finished.json result 627 go func() { 628 defer wg.Done() 629 finished, err := func() (Finished, error) { 630 f := build.Bucket.Object(build.Prefix + "finished.json") 631 fr, err := f.NewReader(ctx) 632 var finished Finished 633 if err == storage.ErrObjectNotExist { // Job has not (yet) completed 634 finished.running = true 635 return finished, nil 636 } else if err != nil { 637 return finished, fmt.Errorf("could not open %s: %v", f, err) 638 } 639 if err = json.NewDecoder(fr).Decode(&finished); err != nil { 640 return finished, fmt.Errorf("could not decode finished.json: %v", err) 641 } 642 return finished, nil 643 }() 644 if err != nil { 645 select { 646 case <-ctx.Done(): 647 case ec <- err: 648 } 649 return 650 } 651 select { 652 case <-ctx.Done(): 653 case fc <- finished: 654 } 655 }() 656 657 // List artifacts, send to ac channel 658 wg.Add(1) 659 ac := make(chan string) // Receives names of arifacts 660 go func() { 661 defer wg.Done() 662 defer close(ac) // No more artifacts 663 err := func() error { 664 pref := build.Prefix + "artifacts/" 665 ai := build.Bucket.Objects(ctx, &storage.Query{Prefix: pref}) 666 for { 667 a, err := ai.Next() 668 if err == iterator.Done { 669 break 670 } 671 if err != nil { 672 return fmt.Errorf("failed to list %s: %v", pref, err) 673 } 674 select { 675 case <-ctx.Done(): 676 return fmt.Errorf("interrupted listing %s", pref) 677 case ac <- a.Name: // Added 678 } 679 } 680 return nil 681 }() 682 if err != nil { 683 select { 684 case <-ctx.Done(): 685 case ec <- err: 686 } 687 } 688 }() 689 690 // Download each artifact, send row map to rc 691 // With parallelism: 60s without: 220s 692 wg.Add(1) 693 rc := make(chan map[string][]Row) 694 go func() { 695 defer wg.Done() 696 defer close(rc) // No more rows 697 var awg sync.WaitGroup 698 for a := range ac { 699 select { // Should we stop? 700 case <-ctx.Done(): // Yes 701 return 702 default: // No, keep going 703 } 704 meta := ValidateName(a) 705 if meta == nil { // Not junit 706 continue 707 } 708 awg.Add(1) 709 // Read each artifact in a new thread 710 go func(ap string, meta map[string]string) { 711 defer awg.Done() 712 err := func() error { 713 ar, err := build.Bucket.Object(ap).NewReader(ctx) 714 if err != nil { 715 return fmt.Errorf("could not read %s: %v", ap, err) 716 } 717 if r := ar.Remain(); r > 50e6 { 718 return fmt.Errorf("too large: %s is %d > 50M", ap, r) 719 } 720 buf, err := ioutil.ReadAll(ar) 721 if err != nil { 722 return fmt.Errorf("partial read of %s: %v", ap, err) 723 } 724 725 select { // Keep going? 726 case <-ctx.Done(): // No, cancelled 727 return errors.New("aborted artifact read") 728 default: // Yes, acquire lock 729 // TODO(fejta): consider sync.Map 730 rows, err := extractRows(buf, meta) 731 if err != nil { 732 return fmt.Errorf("failed to parse %s: %v", ap, err) 733 } 734 rc <- rows 735 } 736 return nil 737 }() 738 if err == nil { 739 return 740 } 741 select { 742 case <-ctx.Done(): 743 case ec <- err: 744 } 745 }(a, meta) 746 } 747 awg.Wait() 748 }() 749 750 // Append each row into the column 751 rows := map[string][]Row{} 752 wg.Add(1) 753 go func() { 754 defer wg.Done() 755 for r := range rc { 756 select { // Should we continue 757 case <-ctx.Done(): // No, aborted 758 return 759 default: // Yes 760 } 761 for t, rs := range r { 762 rows[t] = append(rows[t], rs...) 763 } 764 } 765 }() 766 767 // Wait for everyone to complete their work 768 go func() { 769 wg.Wait() 770 select { 771 case <-ctx.Done(): 772 return 773 case ec <- nil: 774 } 775 }() 776 var finished *Finished 777 var started *Started 778 for { // Wait until we receive started and finished and/or an error 779 select { 780 case err := <-ec: 781 if err != nil { 782 cancel() 783 return nil, fmt.Errorf("failed to read %s: %v", build, err) 784 } 785 break 786 case s := <-sc: 787 started = &s 788 case f := <-fc: 789 finished = &f 790 } 791 if started != nil && finished != nil { 792 break 793 } 794 } 795 br := Column{ 796 ID: path.Base(build.Prefix), 797 Started: started.Timestamp, 798 } 799 // Has the build finished? 800 if finished.running { // No 801 cancel() 802 br.Rows = map[string][]Row{ 803 "Overall": {br.Overall()}, 804 } 805 return &br, nil 806 } 807 br.Finished = finished.Timestamp 808 br.Metadata = finished.Metadata.ColumnMetadata() 809 br.Passed = finished.Passed 810 or := br.Overall() 811 br.Rows = map[string][]Row{ 812 "Overall": {or}, 813 } 814 select { 815 case <-ctx.Done(): 816 cancel() 817 return nil, fmt.Errorf("interrupted reading %s", build) 818 case err := <-ec: 819 if err != nil { 820 cancel() 821 return nil, fmt.Errorf("failed to read %s: %v", build, err) 822 } 823 } 824 825 for t, rs := range rows { 826 br.Rows[t] = append(br.Rows[t], rs...) 827 } 828 if or.Result == state.Row_FAIL { // Ensure failing build has a failing row 829 ft := false 830 for n, rs := range br.Rows { 831 if n == "Overall" { 832 continue 833 } 834 for _, r := range rs { 835 if r.Result == state.Row_FAIL { 836 ft = true // Failing test, huzzah! 837 break 838 } 839 } 840 if ft { 841 break 842 } 843 } 844 if !ft { // Nope, add the F icon and an explanatory message 845 br.Rows["Overall"][0].Icon = "F" 846 br.Rows["Overall"][0].Message = "Build failed outside of test results" 847 } 848 } 849 850 cancel() 851 return &br, nil 852 } 853 854 // Builds is a slice of builds. 855 type Builds []Build 856 857 func (b Builds) Len() int { return len(b) } 858 func (b Builds) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 859 func (b Builds) Less(i, j int) bool { 860 return sortorder.NaturalLess(b[i].Prefix, b[j].Prefix) 861 } 862 863 // listBuilds lists and sorts builds under path, sending them to the builds channel. 864 func listBuilds(ctx context.Context, client *storage.Client, path gcs.Path) (Builds, error) { 865 log.Printf("LIST: %s", path) 866 p := path.Object() 867 if p[len(p)-1] != '/' { 868 p += "/" 869 } 870 bkt := client.Bucket(path.Bucket()) 871 it := bkt.Objects(ctx, &storage.Query{ 872 Delimiter: "/", 873 Prefix: p, 874 }) 875 var all Builds 876 for { 877 objAttrs, err := it.Next() 878 if err == iterator.Done { 879 break 880 } 881 if err != nil { 882 return nil, fmt.Errorf("failed to list objects: %v", err) 883 } 884 if len(objAttrs.Prefix) == 0 { 885 continue 886 } 887 888 all = append(all, Build{ 889 Bucket: bkt, 890 Context: ctx, 891 Prefix: objAttrs.Prefix, 892 }) 893 } 894 // Expect builds to be in monotonically increasing order. 895 // So build9 should be followed by build10 or build888 but not build8 896 sort.Sort(sort.Reverse(all)) 897 return all, nil 898 } 899 900 // Headers returns the list of ColumnHeader ConfigurationValues for this group. 901 func Headers(group config.TestGroup) []string { 902 var extra []string 903 for _, h := range group.ColumnHeader { 904 extra = append(extra, h.ConfigurationValue) 905 } 906 return extra 907 } 908 909 // Rows is a slice of Row pointers 910 type Rows []*state.Row 911 912 func (r Rows) Len() int { return len(r) } 913 func (r Rows) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 914 func (r Rows) Less(i, j int) bool { 915 return sortorder.NaturalLess(r[i].Name, r[j].Name) 916 } 917 918 // ReadBuilds will asynchronously construct a Grid for the group out of the specified builds. 919 func ReadBuilds(parent context.Context, group config.TestGroup, builds Builds, max int, dur time.Duration, concurrency int) (*state.Grid, error) { 920 // Spawn build readers 921 if concurrency == 0 { 922 return nil, fmt.Errorf("zero readers for %s", group.Name) 923 } 924 ctx, cancel := context.WithCancel(parent) 925 var stop time.Time 926 if dur != 0 { 927 stop = time.Now().Add(-dur) 928 } 929 lb := len(builds) 930 if lb > max { 931 log.Printf(" Truncating %d %s results to %d", lb, group.Name, max) 932 lb = max 933 } 934 cols := make([]*Column, lb) 935 log.Printf("UPDATE: %s since %s (%d)", group.Name, stop, stop.Unix()) 936 ec := make(chan error) 937 old := make(chan int) 938 var wg sync.WaitGroup 939 940 // Send build indices to readers 941 indices := make(chan int) 942 wg.Add(1) 943 go func() { 944 defer wg.Done() 945 defer close(indices) 946 for i := range builds[:lb] { 947 select { 948 case <-ctx.Done(): 949 return 950 case <-old: 951 return 952 case indices <- i: 953 } 954 } 955 }() 956 957 // Concurrently receive indicies and read builds 958 for i := 0; i < concurrency; i++ { 959 wg.Add(1) 960 go func() { 961 defer wg.Done() 962 for { 963 select { 964 case <-ctx.Done(): 965 return 966 case i, open := <-indices: 967 if !open { 968 return 969 } 970 b := builds[i] 971 c, err := ReadBuild(b) 972 if err != nil { 973 ec <- err 974 return 975 } 976 cols[i] = c 977 if c.Started < stop.Unix() { 978 select { 979 case <-ctx.Done(): 980 case old <- i: 981 log.Printf("STOP: %d %s started at %d < %d", i, b.Prefix, c.Started, stop.Unix()) 982 default: // Someone else may have already reported an old result 983 } 984 } 985 } 986 } 987 }() 988 } 989 990 // Wait for everyone to finish 991 go func() { 992 wg.Wait() 993 select { 994 case <-ctx.Done(): 995 case ec <- nil: // No error 996 } 997 }() 998 999 // Determine if we got an error 1000 select { 1001 case <-ctx.Done(): 1002 cancel() 1003 return nil, fmt.Errorf("interrupted reading %s", group.Name) 1004 case err := <-ec: 1005 if err != nil { 1006 cancel() 1007 return nil, fmt.Errorf("error reading %s: %v", group.Name, err) 1008 } 1009 } 1010 1011 // Add the columns into a grid message 1012 grid := &state.Grid{} 1013 rows := map[string]*state.Row{} // For fast target => row lookup 1014 h := Headers(group) 1015 nc := makeNameConfig(group.TestNameConfig) 1016 for _, c := range cols { 1017 select { 1018 case <-ctx.Done(): 1019 cancel() 1020 return nil, fmt.Errorf("interrupted appending columns to %s", group.Name) 1021 default: 1022 } 1023 if c == nil { 1024 continue 1025 } 1026 AppendColumn(h, nc, grid, rows, *c) 1027 if c.Started < stop.Unix() { // There may be concurrency results < stop.Unix() 1028 log.Printf(" %s#%s before %s, stopping...", group.Name, c.ID, stop) 1029 break // Just process the first result < stop.Unix() 1030 } 1031 } 1032 sort.Stable(Rows(grid.Rows)) 1033 cancel() 1034 return grid, nil 1035 } 1036 1037 // Days converts days float into a time.Duration, assuming a 24 hour day. 1038 // 1039 // A day is not always 24 hours due to things like leap-seconds. 1040 // We do not need this level of precision though, so ignore the complexity. 1041 func Days(d float64) time.Duration { 1042 return time.Duration(24*d) * time.Hour // Close enough 1043 } 1044 1045 // ReadConfig reads the config from gcs and unmarshals it into a Configuration struct. 1046 func ReadConfig(ctx context.Context, obj *storage.ObjectHandle) (*config.Configuration, error) { 1047 r, err := obj.NewReader(ctx) 1048 if err != nil { 1049 return nil, fmt.Errorf("failed to open config: %v", err) 1050 } 1051 buf, err := ioutil.ReadAll(r) 1052 if err != nil { 1053 return nil, fmt.Errorf("failed to read config: %v", err) 1054 } 1055 var cfg config.Configuration 1056 if err = proto.Unmarshal(buf, &cfg); err != nil { 1057 return nil, fmt.Errorf("failed to parse: %v", err) 1058 } 1059 return &cfg, nil 1060 } 1061 1062 // Group finds the test group in cfg matching name. 1063 func Group(cfg config.Configuration, name string) (*config.TestGroup, bool) { 1064 for _, g := range cfg.TestGroups { 1065 if g.Name == name { 1066 return g, true 1067 } 1068 } 1069 return nil, false 1070 } 1071 1072 func main() { 1073 opt := gatherOptions() 1074 if err := opt.validate(); err != nil { 1075 log.Fatalf("Invalid flags: %v", err) 1076 } 1077 if !opt.confirm { 1078 log.Println("--confirm=false (DRY-RUN): will not write to gcs") 1079 } 1080 1081 ctx := context.Background() 1082 client, err := gcs.ClientWithCreds(ctx, opt.creds) 1083 if err != nil { 1084 log.Fatalf("Failed to create storage client: %v", err) 1085 } 1086 1087 cfg, err := ReadConfig(ctx, client.Bucket(opt.config.Bucket()).Object(opt.config.Object())) 1088 if err != nil { 1089 log.Fatalf("Failed to read %s: %v", opt.config, err) 1090 } 1091 log.Printf("Found %d groups", len(cfg.TestGroups)) 1092 1093 groups := make(chan config.TestGroup) 1094 var wg sync.WaitGroup 1095 1096 for i := 0; i < opt.groupConcurrency; i++ { 1097 wg.Add(1) 1098 go func() { 1099 for tg := range groups { 1100 tgp, err := testGroupPath(opt.config, tg.Name) 1101 if err == nil { 1102 err = updateGroup(ctx, client, tg, *tgp, opt.buildConcurrency, opt.confirm) 1103 } 1104 if err != nil { 1105 log.Printf("FAIL: %v", err) 1106 } 1107 } 1108 wg.Done() 1109 }() 1110 } 1111 1112 if opt.group != "" { // Just a specific group 1113 // o := "ci-kubernetes-test-go" 1114 // o = "ci-kubernetes-node-kubelet-stable3" 1115 // gs://kubernetes-jenkins/logs/ci-kubernetes-test-go 1116 // gs://kubernetes-jenkins/pr-logs/pull-ingress-gce-e2e 1117 o := opt.group 1118 if tg, ok := Group(*cfg, o); !ok { 1119 log.Fatalf("Failed to find %s in %s", o, opt.config) 1120 } else { 1121 groups <- *tg 1122 } 1123 } else { // All groups 1124 for _, tg := range cfg.TestGroups { 1125 groups <- *tg 1126 } 1127 } 1128 close(groups) 1129 wg.Wait() 1130 } 1131 1132 func updateGroup(ctx context.Context, client *storage.Client, tg config.TestGroup, gridPath gcs.Path, concurrency int, write bool) error { 1133 o := tg.Name 1134 1135 var tgPath gcs.Path 1136 if err := tgPath.Set("gs://" + tg.GcsPrefix); err != nil { 1137 return fmt.Errorf("group %s has an invalid gcs_prefix %s: %v", o, tg.GcsPrefix, err) 1138 } 1139 1140 g := state.Grid{} 1141 g.Columns = append(g.Columns, &state.Column{Build: "first", Started: 1}) 1142 builds, err := listBuilds(ctx, client, tgPath) 1143 if err != nil { 1144 return fmt.Errorf("failed to list %s builds: %v", o, err) 1145 } 1146 grid, err := ReadBuilds(ctx, tg, builds, 50, Days(7), concurrency) 1147 if err != nil { 1148 return err 1149 } 1150 buf, err := marshalGrid(*grid) 1151 if err != nil { 1152 return fmt.Errorf("failed to marshal %s grid: %v", o, err) 1153 } 1154 tgp := gridPath 1155 if !write { 1156 log.Printf(" Not writing %s (%d bytes) to %s", o, len(buf), tgp) 1157 } else { 1158 log.Printf(" Writing %s (%d bytes) to %s", o, len(buf), tgp) 1159 if err := gcs.Upload(ctx, client, tgp, buf); err != nil { 1160 return fmt.Errorf("upload %s to %s failed: %v", o, tgp, err) 1161 } 1162 } 1163 log.Printf("WROTE: %s, %dx%d grid (%s, %d bytes)", tg.Name, len(grid.Columns), len(grid.Rows), tgp, len(buf)) 1164 return nil 1165 } 1166 1167 // marhshalGrid serializes a state proto into zlib-compressed bytes. 1168 func marshalGrid(grid state.Grid) ([]byte, error) { 1169 buf, err := proto.Marshal(&grid) 1170 if err != nil { 1171 return nil, fmt.Errorf("proto encoding failed: %v", err) 1172 } 1173 var zbuf bytes.Buffer 1174 zw := zlib.NewWriter(&zbuf) 1175 if _, err = zw.Write(buf); err != nil { 1176 return nil, fmt.Errorf("zlib compression failed: %v", err) 1177 } 1178 if err = zw.Close(); err != nil { 1179 return nil, fmt.Errorf("zlib closing failed: %v", err) 1180 } 1181 return zbuf.Bytes(), nil 1182 }