github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/testgrid/cmd/updater/main.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"bytes"
    21  	"compress/zlib"
    22  	"context"
    23  	"encoding/json"
    24  	"encoding/xml"
    25  	"errors"
    26  	"flag"
    27  	"fmt"
    28  	"io"
    29  	"io/ioutil"
    30  	"log"
    31  	"net/url"
    32  	"path"
    33  	"regexp"
    34  	"runtime"
    35  	"sort"
    36  	"strings"
    37  	"sync"
    38  	"time"
    39  
    40  	"k8s.io/test-infra/testgrid/config"
    41  	"k8s.io/test-infra/testgrid/state"
    42  	"k8s.io/test-infra/testgrid/util/gcs"
    43  
    44  	"cloud.google.com/go/storage"
    45  	"github.com/golang/protobuf/proto"
    46  	"google.golang.org/api/iterator"
    47  
    48  	"vbom.ml/util/sortorder"
    49  )
    50  
    51  // options configures the updater
    52  type options struct {
    53  	config           gcs.Path // gs://path/to/config/proto
    54  	creds            string
    55  	confirm          bool
    56  	group            string
    57  	groupConcurrency int
    58  	buildConcurrency int
    59  }
    60  
    61  // validate ensures sane options
    62  func (o *options) validate() error {
    63  	if o.config.String() == "" {
    64  		return errors.New("empty --config")
    65  	}
    66  	if o.config.Bucket() == "k8s-testgrid" { // TODO(fejta): remove
    67  		return fmt.Errorf("--config=%s cannot start with gs://k8s-testgrid", o.config)
    68  	}
    69  	if o.groupConcurrency == 0 {
    70  		o.groupConcurrency = 4 * runtime.NumCPU()
    71  	}
    72  	if o.buildConcurrency == 0 {
    73  		o.buildConcurrency = 4 * runtime.NumCPU()
    74  	}
    75  
    76  	return nil
    77  }
    78  
    79  // gatherOptions reads options from flags
    80  func gatherOptions() options {
    81  	o := options{}
    82  	flag.Var(&o.config, "config", "gs://path/to/config.pb")
    83  	flag.StringVar(&o.creds, "gcp-service-account", "", "/path/to/gcp/creds (use local creds if empty")
    84  	flag.BoolVar(&o.confirm, "confirm", false, "Upload data if set")
    85  	flag.StringVar(&o.group, "test-group", "", "Only update named group if set")
    86  	flag.IntVar(&o.groupConcurrency, "group-concurrency", 0, "Manually define the number of groups to concurrently update if non-zero")
    87  	flag.IntVar(&o.buildConcurrency, "build-concurrency", 0, "Manually define the number of builds to concurrently read if non-zero")
    88  	flag.Parse()
    89  	return o
    90  }
    91  
    92  // testGroupPath() returns the path to a test_group proto given this proto
    93  func testGroupPath(g gcs.Path, name string) (*gcs.Path, error) {
    94  	u, err := url.Parse(name)
    95  	if err != nil {
    96  		return nil, fmt.Errorf("invalid url %s: %v", name, err)
    97  	}
    98  	np, err := g.ResolveReference(u)
    99  	if err == nil && np.Bucket() != g.Bucket() {
   100  		return nil, fmt.Errorf("testGroup %s should not change bucket", name)
   101  	}
   102  	return np, nil
   103  }
   104  
   105  // Build points to a build stored under a particular gcs prefix.
   106  type Build struct {
   107  	Bucket  *storage.BucketHandle
   108  	Context context.Context
   109  	Prefix  string
   110  	number  *int
   111  }
   112  
   113  func (b Build) String() string {
   114  	return b.Prefix
   115  }
   116  
   117  // Started holds the started.json values of the build.
   118  type Started struct {
   119  	Timestamp   int64             `json:"timestamp"` // epoch seconds
   120  	RepoVersion string            `json:"repo-version"`
   121  	Node        string            `json:"node"`
   122  	Pull        string            `json:"pull"`
   123  	Repos       map[string]string `json:"repos"` // {repo: branch_or_pull} map
   124  }
   125  
   126  // Finished holds the finished.json values of the build
   127  type Finished struct {
   128  	// Timestamp is epoch seconds
   129  	Timestamp  int64    `json:"timestamp"`
   130  	Passed     bool     `json:"passed"`
   131  	JobVersion string   `json:"job-version"`
   132  	Metadata   Metadata `json:"metadata"`
   133  	running    bool
   134  }
   135  
   136  // Metadata holds the finished.json values in the metadata key.
   137  //
   138  // Metadata values can either be string or string map of strings
   139  //
   140  // TODO(fejta): figure out which of these we want and document them
   141  // Special values: infra-commit, repos, repo, repo-commit, others
   142  type Metadata map[string]interface{}
   143  
   144  // String returns the name key if its value is a string.
   145  func (m Metadata) String(name string) (*string, bool) {
   146  	if v, ok := m[name]; !ok {
   147  		return nil, false
   148  	} else if t, good := v.(string); !good {
   149  		return nil, true
   150  	} else {
   151  		return &t, true
   152  	}
   153  }
   154  
   155  // Meta returns the name key if its value is a child object.
   156  func (m Metadata) Meta(name string) (*Metadata, bool) {
   157  	if v, ok := m[name]; !ok {
   158  		return nil, true
   159  	} else if t, good := v.(Metadata); !good {
   160  		return nil, false
   161  	} else {
   162  		return &t, true
   163  	}
   164  }
   165  
   166  // ColumnMetadata returns the subset of values in the map that are strings.
   167  func (m Metadata) ColumnMetadata() ColumnMetadata {
   168  	bm := ColumnMetadata{}
   169  	for k, v := range m {
   170  		if s, ok := v.(string); ok {
   171  			bm[k] = s
   172  		}
   173  		// TODO(fejta): handle sub items
   174  	}
   175  	return bm
   176  }
   177  
   178  // JunitSuites holds a <testsuites/> list of JunitSuite results
   179  type JunitSuites struct {
   180  	XMLName xml.Name     `xml:"testsuites"`
   181  	Suites  []JunitSuite `xml:"testsuite"`
   182  }
   183  
   184  // JunitSuite holds <testsuite/> results
   185  type JunitSuite struct {
   186  	XMLName  xml.Name      `xml:"testsuite"`
   187  	Name     string        `xml:"name,attr"`
   188  	Time     float64       `xml:"time,attr"` // Seconds
   189  	Failures int           `xml:"failures,attr"`
   190  	Tests    int           `xml:"tests,attr"`
   191  	Results  []JunitResult `xml:"testcase"`
   192  	/*
   193  	* <properties><property name="go.version" value="go1.8.3"/></properties>
   194  	 */
   195  }
   196  
   197  // JunitResult holds <testcase/> results
   198  type JunitResult struct {
   199  	Name      string  `xml:"name,attr"`
   200  	Time      float64 `xml:"time,attr"`
   201  	ClassName string  `xml:"classname,attr"`
   202  	Failure   *string `xml:"failure,omitempty"`
   203  	Output    *string `xml:"system-out,omitempty"`
   204  	Error     *string `xml:"system-err,omitempty"`
   205  	Skipped   *string `xml:"skipped,omitempty"`
   206  }
   207  
   208  // Message extracts the message for the junit test case.
   209  //
   210  // Will use the first non-empty <failure/>, <skipped/>, <output/> value.
   211  func (jr JunitResult) Message() string {
   212  	const max = 140
   213  	var msg string
   214  	switch {
   215  	case jr.Failure != nil && *jr.Failure != "":
   216  		msg = *jr.Failure
   217  	case jr.Skipped != nil && *jr.Skipped != "":
   218  		msg = *jr.Skipped
   219  	case jr.Output != nil && *jr.Output != "":
   220  		msg = *jr.Output
   221  	}
   222  	l := len(msg)
   223  	if max == 0 || l <= max {
   224  		return msg
   225  	}
   226  	h := max / 2
   227  	return msg[:h] + "..." + msg[l-h-1:]
   228  }
   229  
   230  // Row converts the junit result into a Row result, prepending the suite name.
   231  func (jr JunitResult) Row(suite string) (string, Row) {
   232  	n := jr.Name
   233  	if suite != "" {
   234  		n = suite + "." + n
   235  	}
   236  	r := Row{
   237  		Metrics: map[string]float64{},
   238  		Metadata: map[string]string{
   239  			"Tests name": n,
   240  		},
   241  	}
   242  	if jr.Time > 0 {
   243  		r.Metrics[elapsedKey] = jr.Time
   244  	}
   245  	if msg := jr.Message(); msg != "" {
   246  		r.Message = msg
   247  	}
   248  	switch {
   249  	case jr.Failure != nil:
   250  		r.Result = state.Row_FAIL
   251  		if r.Message != "" {
   252  			r.Icon = "F"
   253  		}
   254  	case jr.Skipped != nil:
   255  		r.Result = state.Row_PASS_WITH_SKIPS
   256  		if r.Message != "" {
   257  			r.Icon = "S"
   258  		}
   259  	default:
   260  		r.Result = state.Row_PASS
   261  	}
   262  	return n, r
   263  }
   264  
   265  func unmarshalXML(buf []byte, i interface{}) error {
   266  	reader := bytes.NewReader(buf)
   267  	dec := xml.NewDecoder(reader)
   268  	dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   269  		switch charset {
   270  		case "UTF-8", "utf8", "":
   271  			// utf8 is not recognized by golang, but our coalesce.py writes a utf8 doc, which python accepts.
   272  			return input, nil
   273  		default:
   274  			return nil, fmt.Errorf("unknown charset: %s", charset)
   275  		}
   276  	}
   277  	return dec.Decode(i)
   278  }
   279  
   280  func extractRows(buf []byte, meta map[string]string) (map[string][]Row, error) {
   281  	var suites JunitSuites
   282  	// Try to parse it as a <testsuites/> object
   283  	err := unmarshalXML(buf, &suites)
   284  	if err != nil {
   285  		// Maybe it is a <testsuite/> object instead
   286  		suites.Suites = append([]JunitSuite(nil), JunitSuite{})
   287  		ie := unmarshalXML(buf, &suites.Suites[0])
   288  		if ie != nil {
   289  			// Nope, it just doesn't parse
   290  			return nil, fmt.Errorf("not valid testsuites: %v nor testsuite: %v", err, ie)
   291  		}
   292  	}
   293  	rows := map[string][]Row{}
   294  	for _, suite := range suites.Suites {
   295  		for _, sr := range suite.Results {
   296  			if sr.Skipped != nil && len(*sr.Skipped) == 0 {
   297  				continue
   298  			}
   299  
   300  			n, r := sr.Row(suite.Name)
   301  			for k, v := range meta {
   302  				r.Metadata[k] = v
   303  			}
   304  			rows[n] = append(rows[n], r)
   305  		}
   306  	}
   307  	return rows, nil
   308  }
   309  
   310  // ColumnMetadata holds key => value mapping of metadata info.
   311  type ColumnMetadata map[string]string
   312  
   313  // Column represents a build run, which includes one or more row results and metadata.
   314  type Column struct {
   315  	ID       string
   316  	Started  int64
   317  	Finished int64
   318  	Passed   bool
   319  	Rows     map[string][]Row
   320  	Metadata ColumnMetadata
   321  }
   322  
   323  // Row holds results for a piece of a build run, such as a test result.
   324  type Row struct {
   325  	Result   state.Row_Result
   326  	Metrics  map[string]float64
   327  	Metadata map[string]string
   328  	Message  string
   329  	Icon     string
   330  }
   331  
   332  // Overall calculates the generated-overall row value for the current column
   333  func (br Column) Overall() Row {
   334  	r := Row{
   335  		Metadata: map[string]string{"Tests name": "Overall"},
   336  	}
   337  	switch {
   338  	case br.Finished > 0:
   339  		// Completed, did we pass?
   340  		if br.Passed {
   341  			r.Result = state.Row_PASS // Yep
   342  		} else {
   343  			r.Result = state.Row_FAIL
   344  		}
   345  		r.Metrics = map[string]float64{
   346  			elapsedKey: float64(br.Finished - br.Started),
   347  		}
   348  	case time.Now().Add(-24*time.Hour).Unix() > br.Started:
   349  		// Timed out
   350  		r.Result = state.Row_FAIL
   351  		r.Message = "Testing did not complete within 24 hours"
   352  		r.Icon = "T"
   353  	default:
   354  		r.Result = state.Row_RUNNING
   355  		r.Message = "Still running; has not finished..."
   356  		r.Icon = "R"
   357  	}
   358  	return r
   359  }
   360  
   361  // AppendMetric adds the value at index to metric.
   362  //
   363  // Handles the details of sparse-encoding the results.
   364  // Indices must be monotonically increasing for the same metric.
   365  func AppendMetric(metric *state.Metric, idx int32, value float64) {
   366  	if l := int32(len(metric.Indices)); l == 0 || metric.Indices[l-2]+metric.Indices[l-1] != idx {
   367  		// If we append V to idx 9 and metric.Indices = [3, 4] then the last filled index is 3+4-1=7
   368  		// So that means we have holes in idx 7 and 8, so start a new group.
   369  		metric.Indices = append(metric.Indices, idx, 1)
   370  	} else {
   371  		metric.Indices[l-1]++ // Expand the length of the current filled list
   372  	}
   373  	metric.Values = append(metric.Values, value)
   374  }
   375  
   376  // FindMetric returns the first metric with the specified name.
   377  func FindMetric(row *state.Row, name string) *state.Metric {
   378  	for _, m := range row.Metrics {
   379  		if m.Name == name {
   380  			return m
   381  		}
   382  	}
   383  	return nil
   384  }
   385  
   386  var noResult = Row{Result: state.Row_NO_RESULT}
   387  
   388  // AppendResult adds the rowResult column to the row.
   389  //
   390  // Handles the details like missing fields and run-length-encoding the result.
   391  func AppendResult(row *state.Row, rowResult Row, count int) {
   392  	latest := int32(rowResult.Result)
   393  	n := len(row.Results)
   394  	switch {
   395  	case n == 0, row.Results[n-2] != latest:
   396  		row.Results = append(row.Results, latest, int32(count))
   397  	default:
   398  		row.Results[n-1] += int32(count)
   399  	}
   400  
   401  	for i := 0; i < count; i++ { // TODO(fejta): update server to allow empty cellids
   402  		row.CellIds = append(row.CellIds, "")
   403  	}
   404  
   405  	// Javascript client expects no result cells to skip icons/messages
   406  	// TODO(fejta): reconsider this
   407  	if rowResult.Result != state.Row_NO_RESULT {
   408  		for i := 0; i < count; i++ {
   409  			row.Messages = append(row.Messages, rowResult.Message)
   410  			row.Icons = append(row.Icons, rowResult.Icon)
   411  		}
   412  	}
   413  }
   414  
   415  type nameConfig struct {
   416  	format string
   417  	parts  []string
   418  }
   419  
   420  func makeNameConfig(tnc *config.TestNameConfig) nameConfig {
   421  	if tnc == nil {
   422  		return nameConfig{
   423  			format: "%s",
   424  			parts:  []string{"Tests name"},
   425  		}
   426  	}
   427  	nc := nameConfig{
   428  		format: tnc.NameFormat,
   429  		parts:  make([]string, len(tnc.NameElements)),
   430  	}
   431  	for i, e := range tnc.NameElements {
   432  		nc.parts[i] = e.TargetConfig
   433  	}
   434  	return nc
   435  }
   436  
   437  // Format renders any requested metadata into the name
   438  func (r Row) Format(config nameConfig, meta map[string]string) string {
   439  	parsed := make([]interface{}, len(config.parts))
   440  	for i, p := range config.parts {
   441  		if v, ok := r.Metadata[p]; ok {
   442  			parsed[i] = v
   443  			continue
   444  		}
   445  		parsed[i] = meta[p] // "" if missing
   446  	}
   447  	return fmt.Sprintf(config.format, parsed...)
   448  }
   449  
   450  // AppendColumn adds the build column to the grid.
   451  //
   452  // This handles details like:
   453  // * rows appearing/disappearing in the middle of the run.
   454  // * adding auto metadata like duration, commit as well as any user-added metadata
   455  // * extracting build metadata into the appropriate column header
   456  // * Ensuring row names are unique and formatted with metadata
   457  func AppendColumn(headers []string, format nameConfig, grid *state.Grid, rows map[string]*state.Row, build Column) {
   458  	c := state.Column{
   459  		Build:   build.ID,
   460  		Started: float64(build.Started * 1000),
   461  	}
   462  	for _, h := range headers {
   463  		if build.Finished == 0 {
   464  			c.Extra = append(c.Extra, "")
   465  			continue
   466  		}
   467  		trunc := 0
   468  		var ah string
   469  		if h == "Commit" { // TODO(fejta): fix, jobs use explicit key, support truncation
   470  			h = "repo-commit"
   471  			trunc = 9
   472  			ah = "job-version"
   473  		}
   474  		v, ok := build.Metadata[h]
   475  		if !ok {
   476  			// TODO(fejta): fix, make jobs use one or the other
   477  			if ah == "" {
   478  				log.Printf("  %s metadata missing %s", c.Build, h)
   479  				v = "missing"
   480  			} else {
   481  				if av, ok := build.Metadata[ah]; ok {
   482  					parts := strings.SplitN(av, "+", 2)
   483  					v = parts[len(parts)-1]
   484  				} else {
   485  					log.Printf("  %s metadata missing both keys %s and alternate %s", c.Build, h, ah)
   486  				}
   487  			}
   488  		}
   489  		if trunc > 0 && trunc < len(v) {
   490  			v = v[0:trunc]
   491  		}
   492  		c.Extra = append(c.Extra, v)
   493  	}
   494  	grid.Columns = append(grid.Columns, &c)
   495  
   496  	missing := map[string]*state.Row{}
   497  	for name, row := range rows {
   498  		missing[name] = row
   499  	}
   500  
   501  	found := map[string]bool{}
   502  
   503  	for target, results := range build.Rows {
   504  		for _, br := range results {
   505  			prefix := br.Format(format, build.Metadata)
   506  			name := prefix
   507  			// Ensure each name is unique
   508  			// If we have multiple results with the same name foo
   509  			// then append " [n]" to the name so we wind up with:
   510  			//   foo
   511  			//   foo [1]
   512  			//   foo [2]
   513  			//   etc
   514  			for idx := 1; found[name]; idx++ {
   515  				// found[name] exists, so try foo [n+1]
   516  				name = fmt.Sprintf("%s [%d]", prefix, idx)
   517  			}
   518  			// hooray, name not in found
   519  			found[name] = true
   520  			delete(missing, name)
   521  
   522  			// Does this row already exist?
   523  			r, ok := rows[name]
   524  			if !ok { // New row
   525  				r = &state.Row{
   526  					Name: name,
   527  					Id:   target,
   528  				}
   529  				rows[name] = r
   530  				grid.Rows = append(grid.Rows, r)
   531  				if n := len(grid.Columns); n > 1 {
   532  					// Add missing entries for more recent builds (aka earlier columns)
   533  					AppendResult(r, noResult, n-1)
   534  				}
   535  			}
   536  
   537  			AppendResult(r, br, 1)
   538  			for k, v := range br.Metrics {
   539  				m := FindMetric(r, k)
   540  				if m == nil {
   541  					m = &state.Metric{Name: k}
   542  					r.Metrics = append(r.Metrics, m)
   543  				}
   544  				AppendMetric(m, int32(len(r.Messages)), v)
   545  			}
   546  		}
   547  	}
   548  
   549  	for _, row := range missing {
   550  		AppendResult(row, noResult, 1)
   551  	}
   552  }
   553  
   554  const elapsedKey = "seconds-elapsed"
   555  
   556  // junit_CONTEXT_TIMESTAMP_THREAD.xml
   557  var re = regexp.MustCompile(`.+/junit(_[^_]+)?(_\d+-\d+)?(_\d+)?\.xml$`)
   558  
   559  // dropPrefix removes the _ in _CONTEXT to help keep the regexp simple
   560  func dropPrefix(name string) string {
   561  	if len(name) == 0 {
   562  		return name
   563  	}
   564  	return name[1:]
   565  }
   566  
   567  // ValidateName checks whether the basename matches a junit file.
   568  //
   569  // Expected format: junit_context_20180102-1256-07.xml
   570  // Results in {
   571  //   "Context": "context",
   572  //   "Timestamp": "20180102-1256",
   573  //   "Thread": "07",
   574  // }
   575  func ValidateName(name string) map[string]string {
   576  	mat := re.FindStringSubmatch(name)
   577  	if mat == nil {
   578  		return nil
   579  	}
   580  	return map[string]string{
   581  		"Context":   dropPrefix(mat[1]),
   582  		"Timestamp": dropPrefix(mat[2]),
   583  		"Thread":    dropPrefix(mat[3]),
   584  	}
   585  
   586  }
   587  
   588  // ReadBuild asynchronously downloads the files in build from gcs and convert them into a build.
   589  func ReadBuild(build Build) (*Column, error) {
   590  	var wg sync.WaitGroup                                             // Each subtask does wg.Add(1), then we wg.Wait() for them to finish
   591  	ctx, cancel := context.WithTimeout(build.Context, 30*time.Second) // Allows aborting after first error
   592  	ec := make(chan error)                                            // Receives errors from anyone
   593  
   594  	// Download started.json, send to sc
   595  	wg.Add(1)
   596  	sc := make(chan Started) // Receives started.json result
   597  	go func() {
   598  		defer wg.Done()
   599  		started, err := func() (Started, error) {
   600  			var started Started
   601  			s := build.Bucket.Object(build.Prefix + "started.json")
   602  			sr, err := s.NewReader(ctx)
   603  			if err != nil {
   604  				return started, fmt.Errorf("build has not started")
   605  			}
   606  			if err = json.NewDecoder(sr).Decode(&started); err != nil {
   607  				return started, fmt.Errorf("could not decode started.json: %v", err)
   608  			}
   609  			return started, nil
   610  		}()
   611  		if err != nil {
   612  			select {
   613  			case <-ctx.Done():
   614  			case ec <- err:
   615  			}
   616  			return
   617  		}
   618  		select {
   619  		case <-ctx.Done():
   620  		case sc <- started:
   621  		}
   622  	}()
   623  
   624  	// Download finished.json, send to fc
   625  	wg.Add(1)
   626  	fc := make(chan Finished) // Receives finished.json result
   627  	go func() {
   628  		defer wg.Done()
   629  		finished, err := func() (Finished, error) {
   630  			f := build.Bucket.Object(build.Prefix + "finished.json")
   631  			fr, err := f.NewReader(ctx)
   632  			var finished Finished
   633  			if err == storage.ErrObjectNotExist { // Job has not (yet) completed
   634  				finished.running = true
   635  				return finished, nil
   636  			} else if err != nil {
   637  				return finished, fmt.Errorf("could not open %s: %v", f, err)
   638  			}
   639  			if err = json.NewDecoder(fr).Decode(&finished); err != nil {
   640  				return finished, fmt.Errorf("could not decode finished.json: %v", err)
   641  			}
   642  			return finished, nil
   643  		}()
   644  		if err != nil {
   645  			select {
   646  			case <-ctx.Done():
   647  			case ec <- err:
   648  			}
   649  			return
   650  		}
   651  		select {
   652  		case <-ctx.Done():
   653  		case fc <- finished:
   654  		}
   655  	}()
   656  
   657  	// List artifacts, send to ac channel
   658  	wg.Add(1)
   659  	ac := make(chan string) // Receives names of arifacts
   660  	go func() {
   661  		defer wg.Done()
   662  		defer close(ac) // No more artifacts
   663  		err := func() error {
   664  			pref := build.Prefix + "artifacts/"
   665  			ai := build.Bucket.Objects(ctx, &storage.Query{Prefix: pref})
   666  			for {
   667  				a, err := ai.Next()
   668  				if err == iterator.Done {
   669  					break
   670  				}
   671  				if err != nil {
   672  					return fmt.Errorf("failed to list %s: %v", pref, err)
   673  				}
   674  				select {
   675  				case <-ctx.Done():
   676  					return fmt.Errorf("interrupted listing %s", pref)
   677  				case ac <- a.Name: // Added
   678  				}
   679  			}
   680  			return nil
   681  		}()
   682  		if err != nil {
   683  			select {
   684  			case <-ctx.Done():
   685  			case ec <- err:
   686  			}
   687  		}
   688  	}()
   689  
   690  	// Download each artifact, send row map to rc
   691  	// With parallelism: 60s without: 220s
   692  	wg.Add(1)
   693  	rc := make(chan map[string][]Row)
   694  	go func() {
   695  		defer wg.Done()
   696  		defer close(rc) // No more rows
   697  		var awg sync.WaitGroup
   698  		for a := range ac {
   699  			select { // Should we stop?
   700  			case <-ctx.Done(): // Yes
   701  				return
   702  			default: // No, keep going
   703  			}
   704  			meta := ValidateName(a)
   705  			if meta == nil { // Not junit
   706  				continue
   707  			}
   708  			awg.Add(1)
   709  			// Read each artifact in a new thread
   710  			go func(ap string, meta map[string]string) {
   711  				defer awg.Done()
   712  				err := func() error {
   713  					ar, err := build.Bucket.Object(ap).NewReader(ctx)
   714  					if err != nil {
   715  						return fmt.Errorf("could not read %s: %v", ap, err)
   716  					}
   717  					if r := ar.Remain(); r > 50e6 {
   718  						return fmt.Errorf("too large: %s is %d > 50M", ap, r)
   719  					}
   720  					buf, err := ioutil.ReadAll(ar)
   721  					if err != nil {
   722  						return fmt.Errorf("partial read of %s: %v", ap, err)
   723  					}
   724  
   725  					select { // Keep going?
   726  					case <-ctx.Done(): // No, cancelled
   727  						return errors.New("aborted artifact read")
   728  					default: // Yes, acquire lock
   729  						// TODO(fejta): consider sync.Map
   730  						rows, err := extractRows(buf, meta)
   731  						if err != nil {
   732  							return fmt.Errorf("failed to parse %s: %v", ap, err)
   733  						}
   734  						rc <- rows
   735  					}
   736  					return nil
   737  				}()
   738  				if err == nil {
   739  					return
   740  				}
   741  				select {
   742  				case <-ctx.Done():
   743  				case ec <- err:
   744  				}
   745  			}(a, meta)
   746  		}
   747  		awg.Wait()
   748  	}()
   749  
   750  	// Append each row into the column
   751  	rows := map[string][]Row{}
   752  	wg.Add(1)
   753  	go func() {
   754  		defer wg.Done()
   755  		for r := range rc {
   756  			select { // Should we continue
   757  			case <-ctx.Done(): // No, aborted
   758  				return
   759  			default: // Yes
   760  			}
   761  			for t, rs := range r {
   762  				rows[t] = append(rows[t], rs...)
   763  			}
   764  		}
   765  	}()
   766  
   767  	// Wait for everyone to complete their work
   768  	go func() {
   769  		wg.Wait()
   770  		select {
   771  		case <-ctx.Done():
   772  			return
   773  		case ec <- nil:
   774  		}
   775  	}()
   776  	var finished *Finished
   777  	var started *Started
   778  	for { // Wait until we receive started and finished and/or an error
   779  		select {
   780  		case err := <-ec:
   781  			if err != nil {
   782  				cancel()
   783  				return nil, fmt.Errorf("failed to read %s: %v", build, err)
   784  			}
   785  			break
   786  		case s := <-sc:
   787  			started = &s
   788  		case f := <-fc:
   789  			finished = &f
   790  		}
   791  		if started != nil && finished != nil {
   792  			break
   793  		}
   794  	}
   795  	br := Column{
   796  		ID:      path.Base(build.Prefix),
   797  		Started: started.Timestamp,
   798  	}
   799  	// Has the build finished?
   800  	if finished.running { // No
   801  		cancel()
   802  		br.Rows = map[string][]Row{
   803  			"Overall": {br.Overall()},
   804  		}
   805  		return &br, nil
   806  	}
   807  	br.Finished = finished.Timestamp
   808  	br.Metadata = finished.Metadata.ColumnMetadata()
   809  	br.Passed = finished.Passed
   810  	or := br.Overall()
   811  	br.Rows = map[string][]Row{
   812  		"Overall": {or},
   813  	}
   814  	select {
   815  	case <-ctx.Done():
   816  		cancel()
   817  		return nil, fmt.Errorf("interrupted reading %s", build)
   818  	case err := <-ec:
   819  		if err != nil {
   820  			cancel()
   821  			return nil, fmt.Errorf("failed to read %s: %v", build, err)
   822  		}
   823  	}
   824  
   825  	for t, rs := range rows {
   826  		br.Rows[t] = append(br.Rows[t], rs...)
   827  	}
   828  	if or.Result == state.Row_FAIL { // Ensure failing build has a failing row
   829  		ft := false
   830  		for n, rs := range br.Rows {
   831  			if n == "Overall" {
   832  				continue
   833  			}
   834  			for _, r := range rs {
   835  				if r.Result == state.Row_FAIL {
   836  					ft = true // Failing test, huzzah!
   837  					break
   838  				}
   839  			}
   840  			if ft {
   841  				break
   842  			}
   843  		}
   844  		if !ft { // Nope, add the F icon and an explanatory message
   845  			br.Rows["Overall"][0].Icon = "F"
   846  			br.Rows["Overall"][0].Message = "Build failed outside of test results"
   847  		}
   848  	}
   849  
   850  	cancel()
   851  	return &br, nil
   852  }
   853  
   854  // Builds is a slice of builds.
   855  type Builds []Build
   856  
   857  func (b Builds) Len() int      { return len(b) }
   858  func (b Builds) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
   859  func (b Builds) Less(i, j int) bool {
   860  	return sortorder.NaturalLess(b[i].Prefix, b[j].Prefix)
   861  }
   862  
   863  // listBuilds lists and sorts builds under path, sending them to the builds channel.
   864  func listBuilds(ctx context.Context, client *storage.Client, path gcs.Path) (Builds, error) {
   865  	log.Printf("LIST: %s", path)
   866  	p := path.Object()
   867  	if !strings.HasSuffix(p, "/") {
   868  		p += "/"
   869  	}
   870  	bkt := client.Bucket(path.Bucket())
   871  	it := bkt.Objects(ctx, &storage.Query{
   872  		Delimiter: "/",
   873  		Prefix:    p,
   874  	})
   875  	var all Builds
   876  	for {
   877  		objAttrs, err := it.Next()
   878  		if err == iterator.Done {
   879  			break
   880  		}
   881  		if err != nil {
   882  			return nil, fmt.Errorf("failed to list objects: %v", err)
   883  		}
   884  
   885  		// if this is a link under directory, resolve the build value
   886  		if link := objAttrs.Metadata["link"]; len(link) > 0 {
   887  			// links created by bootstrap.py have a space
   888  			link = strings.TrimSpace(link)
   889  			u, err := url.Parse(link)
   890  			if err != nil {
   891  				return nil, fmt.Errorf("could not parse link for key %s: %v", objAttrs.Name, err)
   892  			}
   893  			if !strings.HasSuffix(u.Path, "/") {
   894  				u.Path += "/"
   895  			}
   896  			var linkPath gcs.Path
   897  			if err := linkPath.SetURL(u); err != nil {
   898  				return nil, fmt.Errorf("could not make GCS path for key %s: %v", objAttrs.Name, err)
   899  			}
   900  			all = append(all, Build{
   901  				Bucket:  bkt,
   902  				Context: ctx,
   903  				Prefix:  linkPath.Object(),
   904  			})
   905  			continue
   906  		}
   907  
   908  		if len(objAttrs.Prefix) == 0 {
   909  			continue
   910  		}
   911  
   912  		all = append(all, Build{
   913  			Bucket:  bkt,
   914  			Context: ctx,
   915  			Prefix:  objAttrs.Prefix,
   916  		})
   917  	}
   918  	// Expect builds to be in monotonically increasing order.
   919  	// So build9 should be followed by build10 or build888 but not build8
   920  	sort.Sort(sort.Reverse(all))
   921  	return all, nil
   922  }
   923  
   924  // Headers returns the list of ColumnHeader ConfigurationValues for this group.
   925  func Headers(group config.TestGroup) []string {
   926  	var extra []string
   927  	for _, h := range group.ColumnHeader {
   928  		extra = append(extra, h.ConfigurationValue)
   929  	}
   930  	return extra
   931  }
   932  
   933  // Rows is a slice of Row pointers
   934  type Rows []*state.Row
   935  
   936  func (r Rows) Len() int      { return len(r) }
   937  func (r Rows) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
   938  func (r Rows) Less(i, j int) bool {
   939  	return sortorder.NaturalLess(r[i].Name, r[j].Name)
   940  }
   941  
   942  // ReadBuilds will asynchronously construct a Grid for the group out of the specified builds.
   943  func ReadBuilds(parent context.Context, group config.TestGroup, builds Builds, max int, dur time.Duration, concurrency int) (*state.Grid, error) {
   944  	// Spawn build readers
   945  	if concurrency == 0 {
   946  		return nil, fmt.Errorf("zero readers for %s", group.Name)
   947  	}
   948  	ctx, cancel := context.WithCancel(parent)
   949  	var stop time.Time
   950  	if dur != 0 {
   951  		stop = time.Now().Add(-dur)
   952  	}
   953  	lb := len(builds)
   954  	if lb > max {
   955  		log.Printf("  Truncating %d %s results to %d", lb, group.Name, max)
   956  		lb = max
   957  	}
   958  	cols := make([]*Column, lb)
   959  	log.Printf("UPDATE: %s since %s (%d)", group.Name, stop, stop.Unix())
   960  	ec := make(chan error)
   961  	old := make(chan int)
   962  	var wg sync.WaitGroup
   963  
   964  	// Send build indices to readers
   965  	indices := make(chan int)
   966  	wg.Add(1)
   967  	go func() {
   968  		defer wg.Done()
   969  		defer close(indices)
   970  		for i := range builds[:lb] {
   971  			select {
   972  			case <-ctx.Done():
   973  				return
   974  			case <-old:
   975  				return
   976  			case indices <- i:
   977  			}
   978  		}
   979  	}()
   980  
   981  	// Concurrently receive indices and read builds
   982  	for i := 0; i < concurrency; i++ {
   983  		wg.Add(1)
   984  		go func() {
   985  			defer wg.Done()
   986  			for {
   987  				select {
   988  				case <-ctx.Done():
   989  					return
   990  				case i, open := <-indices:
   991  					if !open {
   992  						return
   993  					}
   994  					b := builds[i]
   995  					c, err := ReadBuild(b)
   996  					if err != nil {
   997  						ec <- err
   998  						return
   999  					}
  1000  					cols[i] = c
  1001  					if c.Started < stop.Unix() {
  1002  						select {
  1003  						case <-ctx.Done():
  1004  						case old <- i:
  1005  							log.Printf("STOP: %d %s started at %d < %d", i, b.Prefix, c.Started, stop.Unix())
  1006  						default: // Someone else may have already reported an old result
  1007  						}
  1008  					}
  1009  				}
  1010  			}
  1011  		}()
  1012  	}
  1013  
  1014  	// Wait for everyone to finish
  1015  	go func() {
  1016  		wg.Wait()
  1017  		select {
  1018  		case <-ctx.Done():
  1019  		case ec <- nil: // No error
  1020  		}
  1021  	}()
  1022  
  1023  	// Determine if we got an error
  1024  	select {
  1025  	case <-ctx.Done():
  1026  		cancel()
  1027  		return nil, fmt.Errorf("interrupted reading %s", group.Name)
  1028  	case err := <-ec:
  1029  		if err != nil {
  1030  			cancel()
  1031  			return nil, fmt.Errorf("error reading %s: %v", group.Name, err)
  1032  		}
  1033  	}
  1034  
  1035  	// Add the columns into a grid message
  1036  	grid := &state.Grid{}
  1037  	rows := map[string]*state.Row{} // For fast target => row lookup
  1038  	h := Headers(group)
  1039  	nc := makeNameConfig(group.TestNameConfig)
  1040  	for _, c := range cols {
  1041  		select {
  1042  		case <-ctx.Done():
  1043  			cancel()
  1044  			return nil, fmt.Errorf("interrupted appending columns to %s", group.Name)
  1045  		default:
  1046  		}
  1047  		if c == nil {
  1048  			continue
  1049  		}
  1050  		AppendColumn(h, nc, grid, rows, *c)
  1051  		if c.Started < stop.Unix() { // There may be concurrency results < stop.Unix()
  1052  			log.Printf("  %s#%s before %s, stopping...", group.Name, c.ID, stop)
  1053  			break // Just process the first result < stop.Unix()
  1054  		}
  1055  	}
  1056  	sort.Stable(Rows(grid.Rows))
  1057  	cancel()
  1058  	return grid, nil
  1059  }
  1060  
  1061  // Days converts days float into a time.Duration, assuming a 24 hour day.
  1062  //
  1063  // A day is not always 24 hours due to things like leap-seconds.
  1064  // We do not need this level of precision though, so ignore the complexity.
  1065  func Days(d float64) time.Duration {
  1066  	return time.Duration(24*d) * time.Hour // Close enough
  1067  }
  1068  
  1069  // ReadConfig reads the config from gcs and unmarshals it into a Configuration struct.
  1070  func ReadConfig(ctx context.Context, obj *storage.ObjectHandle) (*config.Configuration, error) {
  1071  	r, err := obj.NewReader(ctx)
  1072  	if err != nil {
  1073  		return nil, fmt.Errorf("failed to open config: %v", err)
  1074  	}
  1075  	buf, err := ioutil.ReadAll(r)
  1076  	if err != nil {
  1077  		return nil, fmt.Errorf("failed to read config: %v", err)
  1078  	}
  1079  	var cfg config.Configuration
  1080  	if err = proto.Unmarshal(buf, &cfg); err != nil {
  1081  		return nil, fmt.Errorf("failed to parse: %v", err)
  1082  	}
  1083  	return &cfg, nil
  1084  }
  1085  
  1086  // Group finds the test group in cfg matching name.
  1087  func Group(cfg config.Configuration, name string) (*config.TestGroup, bool) {
  1088  	for _, g := range cfg.TestGroups {
  1089  		if g.Name == name {
  1090  			return g, true
  1091  		}
  1092  	}
  1093  	return nil, false
  1094  }
  1095  
  1096  func main() {
  1097  	opt := gatherOptions()
  1098  	if err := opt.validate(); err != nil {
  1099  		log.Fatalf("Invalid flags: %v", err)
  1100  	}
  1101  	if !opt.confirm {
  1102  		log.Println("--confirm=false (DRY-RUN): will not write to gcs")
  1103  	}
  1104  
  1105  	ctx := context.Background()
  1106  	client, err := gcs.ClientWithCreds(ctx, opt.creds)
  1107  	if err != nil {
  1108  		log.Fatalf("Failed to create storage client: %v", err)
  1109  	}
  1110  
  1111  	cfg, err := ReadConfig(ctx, client.Bucket(opt.config.Bucket()).Object(opt.config.Object()))
  1112  	if err != nil {
  1113  		log.Fatalf("Failed to read %s: %v", opt.config, err)
  1114  	}
  1115  	log.Printf("Found %d groups", len(cfg.TestGroups))
  1116  
  1117  	groups := make(chan config.TestGroup)
  1118  	var wg sync.WaitGroup
  1119  
  1120  	for i := 0; i < opt.groupConcurrency; i++ {
  1121  		wg.Add(1)
  1122  		go func() {
  1123  			for tg := range groups {
  1124  				tgp, err := testGroupPath(opt.config, tg.Name)
  1125  				if err == nil {
  1126  					err = updateGroup(ctx, client, tg, *tgp, opt.buildConcurrency, opt.confirm)
  1127  				}
  1128  				if err != nil {
  1129  					log.Printf("FAIL: %v", err)
  1130  				}
  1131  			}
  1132  			wg.Done()
  1133  		}()
  1134  	}
  1135  
  1136  	if opt.group != "" { // Just a specific group
  1137  		// o := "ci-kubernetes-test-go"
  1138  		// o = "ci-kubernetes-node-kubelet-stable3"
  1139  		// gs://kubernetes-jenkins/logs/ci-kubernetes-test-go
  1140  		// gs://kubernetes-jenkins/pr-logs/pull-ingress-gce-e2e
  1141  		o := opt.group
  1142  		if tg, ok := Group(*cfg, o); !ok {
  1143  			log.Fatalf("Failed to find %s in %s", o, opt.config)
  1144  		} else {
  1145  			groups <- *tg
  1146  		}
  1147  	} else { // All groups
  1148  		for _, tg := range cfg.TestGroups {
  1149  			groups <- *tg
  1150  		}
  1151  	}
  1152  	close(groups)
  1153  	wg.Wait()
  1154  }
  1155  
  1156  func updateGroup(ctx context.Context, client *storage.Client, tg config.TestGroup, gridPath gcs.Path, concurrency int, write bool) error {
  1157  	o := tg.Name
  1158  
  1159  	var tgPath gcs.Path
  1160  	if err := tgPath.Set("gs://" + tg.GcsPrefix); err != nil {
  1161  		return fmt.Errorf("group %s has an invalid gcs_prefix %s: %v", o, tg.GcsPrefix, err)
  1162  	}
  1163  
  1164  	g := state.Grid{}
  1165  	g.Columns = append(g.Columns, &state.Column{Build: "first", Started: 1})
  1166  	builds, err := listBuilds(ctx, client, tgPath)
  1167  	if err != nil {
  1168  		return fmt.Errorf("failed to list %s builds: %v", o, err)
  1169  	}
  1170  	grid, err := ReadBuilds(ctx, tg, builds, 50, Days(7), concurrency)
  1171  	if err != nil {
  1172  		return err
  1173  	}
  1174  	buf, err := marshalGrid(*grid)
  1175  	if err != nil {
  1176  		return fmt.Errorf("failed to marshal %s grid: %v", o, err)
  1177  	}
  1178  	tgp := gridPath
  1179  	if !write {
  1180  		log.Printf("  Not writing %s (%d bytes) to %s", o, len(buf), tgp)
  1181  	} else {
  1182  		log.Printf("  Writing %s (%d bytes) to %s", o, len(buf), tgp)
  1183  		if err := gcs.Upload(ctx, client, tgp, buf); err != nil {
  1184  			return fmt.Errorf("upload %s to %s failed: %v", o, tgp, err)
  1185  		}
  1186  	}
  1187  	log.Printf("WROTE: %s, %dx%d grid (%s, %d bytes)", tg.Name, len(grid.Columns), len(grid.Rows), tgp, len(buf))
  1188  	return nil
  1189  }
  1190  
  1191  // marhshalGrid serializes a state proto into zlib-compressed bytes.
  1192  func marshalGrid(grid state.Grid) ([]byte, error) {
  1193  	buf, err := proto.Marshal(&grid)
  1194  	if err != nil {
  1195  		return nil, fmt.Errorf("proto encoding failed: %v", err)
  1196  	}
  1197  	var zbuf bytes.Buffer
  1198  	zw := zlib.NewWriter(&zbuf)
  1199  	if _, err = zw.Write(buf); err != nil {
  1200  		return nil, fmt.Errorf("zlib compression failed: %v", err)
  1201  	}
  1202  	if err = zw.Close(); err != nil {
  1203  		return nil, fmt.Errorf("zlib closing failed: %v", err)
  1204  	}
  1205  	return zbuf.Bytes(), nil
  1206  }