github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/updater/read.go (about)

     1  /*
     2  Copyright 2020 The TestGrid Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package updater
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"path"
    25  	"sort"
    26  	"strings"
    27  	"sync"
    28  	"time"
    29  
    30  	"cloud.google.com/go/storage"
    31  	configpb "github.com/GoogleCloudPlatform/testgrid/pb/config"
    32  	evalpb "github.com/GoogleCloudPlatform/testgrid/pb/custom_evaluator"
    33  	statepb "github.com/GoogleCloudPlatform/testgrid/pb/state"
    34  	statuspb "github.com/GoogleCloudPlatform/testgrid/pb/test_status"
    35  	"github.com/GoogleCloudPlatform/testgrid/util/gcs"
    36  	"github.com/fvbommel/sortorder"
    37  	"github.com/sirupsen/logrus"
    38  )
    39  
    40  // hintStarted returns the maximum hint
    41  func hintStarted(cols []InflatedColumn) string {
    42  	var hint string
    43  	for i, col := range cols {
    44  		if newHint := col.Column.Hint; i == 0 || sortorder.NaturalLess(hint, newHint) {
    45  			hint = newHint
    46  		}
    47  	}
    48  	return hint
    49  }
    50  
    51  func gcsColumnReader(client gcs.Client, buildTimeout time.Duration, readResult *resultReader, enableIgnoreSkip bool) ColumnReader {
    52  	return func(ctx context.Context, parentLog logrus.FieldLogger, tg *configpb.TestGroup, oldCols []InflatedColumn, stop time.Time, receivers chan<- InflatedColumn) error {
    53  		tgPaths, err := groupPaths(tg)
    54  		if err != nil {
    55  			return fmt.Errorf("group path: %w", err)
    56  		}
    57  
    58  		since := hintStarted(oldCols)
    59  		log := parentLog.WithField("since", since)
    60  
    61  		log.Trace("Listing builds...")
    62  		listBuildsStart := time.Now()
    63  		builds, err := listBuilds(ctx, client, since, tgPaths...)
    64  		if errors.Is(err, storage.ErrBucketNotExist) {
    65  			log.WithError(err).Info("Bucket does not exist")
    66  			return nil
    67  		}
    68  		if err != nil {
    69  			return fmt.Errorf("list builds: %w", err)
    70  		}
    71  		log.WithField("listBuilds", time.Since(listBuildsStart)).WithField("total", len(builds)).Debug("Listed builds")
    72  
    73  		readColumns(ctx, client, log, tg, builds, stop, buildTimeout, receivers, readResult, enableIgnoreSkip)
    74  		return nil
    75  	}
    76  }
    77  
    78  func resultReaderPool(poolCtx context.Context, log *logrus.Entry, concurrency int) *resultReader {
    79  
    80  	type request struct {
    81  		ctx    context.Context
    82  		client gcs.Downloader
    83  		build  gcs.Build
    84  		stop   time.Time
    85  		res    *gcsResult
    86  		err    error
    87  		wg     sync.WaitGroup
    88  	}
    89  
    90  	ch := make(chan *request, concurrency)
    91  
    92  	var wg sync.WaitGroup
    93  	wg.Add(concurrency)
    94  	log = log.WithField("concurrency", concurrency)
    95  	log.Info("Starting up result reader pool")
    96  
    97  	for i := 0; i < concurrency; i++ {
    98  		go func() {
    99  			defer wg.Done()
   100  			for req := range ch {
   101  				req.res, req.err = readResult(req.ctx, req.client, req.build, req.stop)
   102  				req.wg.Done()
   103  			}
   104  		}()
   105  	}
   106  
   107  	go func() {
   108  		<-poolCtx.Done()
   109  		log.Info("Shutting down result reader pool")
   110  		close(ch)
   111  		wg.Wait()
   112  		log.Info("Result reader pool stopped")
   113  	}()
   114  
   115  	readResultViaPool := func(ctx context.Context, client gcs.Downloader, build gcs.Build, stop time.Time) func() (*gcsResult, error) {
   116  
   117  		req := &request{
   118  			ctx:    ctx,
   119  			client: client,
   120  			build:  build,
   121  			stop:   stop,
   122  		}
   123  		req.wg.Add(1)
   124  		select {
   125  		case <-ctx.Done():
   126  			return func() (*gcsResult, error) { return nil, ctx.Err() }
   127  		case ch <- req: // wait for request to get onto the queue
   128  			return func() (*gcsResult, error) {
   129  				req.wg.Wait()
   130  				return req.res, req.err
   131  			}
   132  		}
   133  	}
   134  
   135  	return &resultReader{
   136  		lock: &sync.Mutex{},
   137  		read: readResultViaPool,
   138  	}
   139  }
   140  
   141  type resultReader struct {
   142  	lock sync.Locker
   143  	read func(context.Context, gcs.Downloader, gcs.Build, time.Time) func() (*gcsResult, error)
   144  }
   145  
   146  // readColumns will list, download and process builds into inflatedColumns.
   147  func readColumns(ctx context.Context, client gcs.Downloader, log logrus.FieldLogger, group *configpb.TestGroup, builds []gcs.Build, stop time.Time, buildTimeout time.Duration, receivers chan<- InflatedColumn, readResult *resultReader, enableIgnoreSkip bool) {
   148  	if len(builds) == 0 {
   149  		return
   150  	}
   151  
   152  	ctx, cancel := context.WithCancel(ctx)
   153  	defer cancel()
   154  
   155  	nameCfg := makeNameConfig(group)
   156  	var heads []string
   157  	for _, h := range group.ColumnHeader {
   158  		heads = append(heads, h.ConfigurationValue)
   159  	}
   160  
   161  	type resp struct {
   162  		build gcs.Build
   163  		res   func() (*gcsResult, error)
   164  	}
   165  
   166  	ch := make(chan resp)
   167  	var wg sync.WaitGroup
   168  
   169  	wg.Add(1)
   170  	go func() {
   171  		defer wg.Done()
   172  		// TODO(fejta): restore inter-build concurrency
   173  		var failures int // since last good column
   174  		var extra []string
   175  		var started float64
   176  		for resp := range ch {
   177  			b := resp.build
   178  			log := log.WithField("build", b)
   179  			result, err := resp.res()
   180  			id := path.Base(b.Path.Object())
   181  			var col InflatedColumn
   182  			if err != nil {
   183  				failures++
   184  				log.WithError(err).Trace("Failed to read build")
   185  				if extra == nil {
   186  					extra = make([]string, len(heads))
   187  				}
   188  				when := started + 0.01*float64(failures)
   189  				var ancientErr *ancientError
   190  				var noStartErr *noStartError
   191  				if errors.As(err, &ancientErr) {
   192  					col = ancientColumn(id, when, extra, ancientErr.Error())
   193  				} else if errors.As(err, &noStartErr) {
   194  					col = noStartColumn(id, when, extra, noStartErr.Error())
   195  				} else {
   196  					msg := fmt.Sprintf("Failed to download %s: %s", b, err.Error())
   197  					col = erroredColumn(id, when, extra, msg)
   198  				}
   199  			} else {
   200  				opts := makeOptions(group)
   201  				if !enableIgnoreSkip {
   202  					opts.ignoreSkip = false
   203  				}
   204  				col = convertResult(log, nameCfg, id, heads, *result, opts)
   205  				log.WithField("rows", len(col.Cells)).Debug("Read result")
   206  				failures = 0
   207  				extra = col.Column.Extra
   208  				started = col.Column.Started
   209  			}
   210  
   211  			select {
   212  			case <-ctx.Done():
   213  				return
   214  			case receivers <- col:
   215  			}
   216  		}
   217  	}()
   218  	defer wg.Wait()
   219  
   220  	defer close(ch)
   221  	if len(builds) > 2 {
   222  		readResult.lock.Lock()
   223  		defer readResult.lock.Unlock()
   224  	}
   225  	for i := len(builds) - 1; i >= 0; i-- {
   226  		b := builds[i]
   227  		r := resp{
   228  			build: b,
   229  			res:   readResult.read(ctx, client, b, stop),
   230  		}
   231  		select {
   232  		case <-ctx.Done():
   233  			return
   234  		case ch <- r:
   235  		}
   236  	}
   237  }
   238  
   239  func ancientColumn(id string, when float64, extra []string, msg string) InflatedColumn {
   240  	return InflatedColumn{
   241  		Column: &statepb.Column{
   242  			Build:   id,
   243  			Hint:    id,
   244  			Started: when,
   245  			Extra:   extra,
   246  		},
   247  		Cells: map[string]Cell{
   248  			overallRow: {
   249  				Message: msg,
   250  				Result:  statuspb.TestStatus_UNKNOWN,
   251  			},
   252  		},
   253  	}
   254  }
   255  
   256  func noStartColumn(id string, when float64, extra []string, msg string) InflatedColumn {
   257  	return InflatedColumn{
   258  		Column: &statepb.Column{
   259  			Build:   id,
   260  			Hint:    id,
   261  			Started: when,
   262  			Extra:   extra,
   263  		},
   264  		Cells: map[string]Cell{
   265  			overallRow: {
   266  				Message: msg,
   267  				Result:  statuspb.TestStatus_RUNNING,
   268  			},
   269  		},
   270  	}
   271  }
   272  
   273  func erroredColumn(id string, when float64, extra []string, msg string) InflatedColumn {
   274  	return InflatedColumn{
   275  		Column: &statepb.Column{
   276  			Build:   id,
   277  			Hint:    id,
   278  			Started: when,
   279  			Extra:   extra,
   280  		},
   281  		Cells: map[string]Cell{
   282  			overallRow: {
   283  				Message: msg,
   284  				Result:  statuspb.TestStatus_TOOL_FAIL,
   285  			},
   286  		},
   287  	}
   288  }
   289  
   290  type groupOptions struct {
   291  	merge          bool
   292  	analyzeProwJob bool
   293  	addCellID      bool
   294  	metricKey      string
   295  	buildKey       string
   296  	userKey        string
   297  	annotations    []*configpb.TestGroup_TestAnnotation
   298  	rules          []*evalpb.Rule
   299  	ignoreSkip     bool
   300  }
   301  
   302  func makeOptions(group *configpb.TestGroup) groupOptions {
   303  	return groupOptions{
   304  		merge:          !group.DisableMergedStatus,
   305  		analyzeProwJob: !group.DisableProwjobAnalysis,
   306  		addCellID:      group.BuildOverrideStrftime != "",
   307  		metricKey:      group.ShortTextMetric,
   308  		buildKey:       group.BuildOverrideConfigurationValue,
   309  		userKey:        group.UserProperty,
   310  		annotations:    group.TestAnnotations,
   311  		rules:          group.GetCustomEvaluatorRuleSet().GetRules(),
   312  		ignoreSkip:     group.GetIgnoreSkip(),
   313  	}
   314  }
   315  
   316  const (
   317  	testsName = "Tests name"
   318  	jobName   = "Job name"
   319  )
   320  
   321  type nameConfig struct {
   322  	format   string
   323  	parts    []string
   324  	multiJob bool
   325  }
   326  
   327  // render the metadata into the expect test name format.
   328  //
   329  // Argument order determines precedence.
   330  func (nc nameConfig) render(job, test string, metadatas ...map[string]string) string {
   331  	parsed := make([]interface{}, len(nc.parts))
   332  	for i, p := range nc.parts {
   333  		var s string
   334  		switch p {
   335  		case jobName:
   336  			s = job
   337  		case testsName:
   338  			s = test
   339  		default:
   340  			for _, metadata := range metadatas {
   341  				v, present := metadata[p]
   342  				if present {
   343  					s = v
   344  					break
   345  				}
   346  			}
   347  		}
   348  		parsed[i] = s
   349  	}
   350  	return fmt.Sprintf(nc.format, parsed...)
   351  }
   352  
   353  func makeNameConfig(group *configpb.TestGroup) nameConfig {
   354  	nameCfg := convertNameConfig(group.TestNameConfig)
   355  	if strings.Contains(gcsPrefix(group), ",") {
   356  		nameCfg.multiJob = true
   357  		ensureJobName(&nameCfg)
   358  	}
   359  	return nameCfg
   360  }
   361  
   362  func firstFilled(strs ...string) string {
   363  	for _, s := range strs {
   364  		if s != "" {
   365  			return s
   366  		}
   367  	}
   368  	return ""
   369  }
   370  
   371  func convertNameConfig(tnc *configpb.TestNameConfig) nameConfig {
   372  	if tnc == nil {
   373  		return nameConfig{
   374  			format: "%s",
   375  			parts:  []string{testsName},
   376  		}
   377  	}
   378  	nc := nameConfig{
   379  		format: tnc.NameFormat,
   380  		parts:  make([]string, len(tnc.NameElements)),
   381  	}
   382  	for i, e := range tnc.NameElements {
   383  		// TODO(fejta): build_target = true
   384  		// TODO(fejta): tags = 'SOMETHING'
   385  		nc.parts[i] = firstFilled(e.TargetConfig, e.TestProperty)
   386  	}
   387  	return nc
   388  }
   389  
   390  func ensureJobName(nc *nameConfig) {
   391  	for _, p := range nc.parts {
   392  		if p == jobName {
   393  			return
   394  		}
   395  	}
   396  	nc.format = "%s." + nc.format
   397  	nc.parts = append([]string{jobName}, nc.parts...)
   398  }
   399  
   400  type ancientError struct {
   401  	msg string
   402  }
   403  
   404  func (e *ancientError) Error() string {
   405  	return e.msg
   406  }
   407  
   408  type noStartError struct{}
   409  
   410  func (e *noStartError) Error() string {
   411  	return "Start timestamp for this job is 0."
   412  }
   413  
   414  // readResult will download all GCS artifacts in parallel.
   415  //
   416  // Specifically download the following files:
   417  // * started.json
   418  // * finished.json
   419  // * any junit.xml files under the artifacts directory.
   420  func readResult(parent context.Context, client gcs.Downloader, build gcs.Build, stop time.Time) (*gcsResult, error) {
   421  	ctx, cancel := context.WithCancel(parent) // Allows aborting after first error
   422  	defer cancel()
   423  	result := gcsResult{
   424  		job:   build.Job(),
   425  		build: build.Build(),
   426  	}
   427  	ec := make(chan error) // Receives errors from anyone
   428  
   429  	var lock sync.Mutex
   430  	addMalformed := func(s ...string) {
   431  		lock.Lock()
   432  		defer lock.Unlock()
   433  		result.malformed = append(result.malformed, s...)
   434  	}
   435  
   436  	var work int
   437  
   438  	// Download podinfo.json
   439  	work++
   440  	go func() {
   441  		pi, err := build.PodInfo(ctx, client)
   442  		switch {
   443  		case errors.Is(err, io.EOF):
   444  			addMalformed("podinfo.json")
   445  			err = nil
   446  		case err != nil:
   447  			err = fmt.Errorf("podinfo: %w", err)
   448  		case pi != nil:
   449  			result.podInfo = *pi
   450  		}
   451  		select {
   452  		case <-ctx.Done():
   453  		case ec <- err:
   454  		}
   455  	}()
   456  
   457  	// Download started.json
   458  	work++
   459  	go func() {
   460  		s, err := build.Started(ctx, client)
   461  		switch {
   462  		case errors.Is(err, io.EOF):
   463  			addMalformed("started.json")
   464  			err = nil
   465  		case err != nil:
   466  			err = fmt.Errorf("started: %w", err)
   467  		case time.Unix(s.Timestamp, 0).Before(stop):
   468  			err = &ancientError{fmt.Sprintf("build too old; started %v before %v)", s.Timestamp, stop.Unix())}
   469  			if s.Timestamp == 0 {
   470  				err = &noStartError{}
   471  			}
   472  		default:
   473  			result.started = *s
   474  		}
   475  		select {
   476  		case <-ctx.Done():
   477  		case ec <- err:
   478  		}
   479  	}()
   480  
   481  	// Download finished.json
   482  	work++
   483  	go func() {
   484  		f, err := build.Finished(ctx, client)
   485  		switch {
   486  		case errors.Is(err, io.EOF):
   487  			addMalformed("finished.json")
   488  			err = nil
   489  		case err != nil:
   490  			err = fmt.Errorf("finished: %w", err)
   491  		default:
   492  			result.finished = *f
   493  		}
   494  		select {
   495  		case <-ctx.Done():
   496  		case ec <- err:
   497  		}
   498  	}()
   499  
   500  	// Download suites
   501  	work++
   502  	go func() {
   503  		suites, err := readSuites(ctx, client, build)
   504  		if err != nil {
   505  			err = fmt.Errorf("suites: %w", err)
   506  		}
   507  		var problems []string
   508  		for _, s := range suites {
   509  			if s.Err != nil {
   510  				p := strings.TrimPrefix(s.Path, build.Path.String())
   511  				problems = append(problems, fmt.Sprintf("%s: %s", p, s.Err))
   512  			} else {
   513  				result.suites = append(result.suites, s)
   514  			}
   515  		}
   516  		if len(problems) > 0 {
   517  			addMalformed(problems...)
   518  		}
   519  
   520  		select {
   521  		case <-ctx.Done():
   522  		case ec <- err:
   523  		}
   524  	}()
   525  
   526  	for ; work > 0; work-- {
   527  		select {
   528  		case <-ctx.Done():
   529  			return nil, fmt.Errorf("timeout: %w", ctx.Err())
   530  		case err := <-ec:
   531  			if err != nil {
   532  				return nil, err
   533  			}
   534  		}
   535  	}
   536  	sort.Slice(result.malformed, func(i, j int) bool {
   537  		return result.malformed[i] < result.malformed[j]
   538  	})
   539  	return &result, nil
   540  }
   541  
   542  // readSuites asynchronously lists and downloads junit.xml files
   543  func readSuites(parent context.Context, client gcs.Downloader, build gcs.Build) ([]gcs.SuitesMeta, error) {
   544  	ctx, cancel := context.WithCancel(parent)
   545  	defer cancel()
   546  	ec := make(chan error)
   547  
   548  	// List
   549  	artifacts := make(chan string, 1)
   550  	go func() {
   551  		defer close(artifacts) // No more artifacts
   552  		if err := build.Artifacts(ctx, client, artifacts); err != nil {
   553  			select {
   554  			case <-ctx.Done():
   555  			case ec <- fmt.Errorf("list: %w", err):
   556  			}
   557  		}
   558  	}()
   559  
   560  	// Download
   561  	suitesChan := make(chan gcs.SuitesMeta, 1)
   562  	go func() {
   563  		defer close(suitesChan) // No more rows
   564  		const max = 1000
   565  		if err := build.Suites(ctx, client, artifacts, suitesChan, max); err != nil {
   566  			select {
   567  			case <-ctx.Done():
   568  			case ec <- fmt.Errorf("download: %w", err):
   569  			}
   570  		}
   571  	}()
   572  
   573  	// Append
   574  	var suites []gcs.SuitesMeta
   575  	go func() {
   576  		for suite := range suitesChan {
   577  			suites = append(suites, suite)
   578  		}
   579  		select {
   580  		case <-ctx.Done():
   581  		case ec <- nil:
   582  		}
   583  	}()
   584  
   585  	select {
   586  	case <-ctx.Done():
   587  		return nil, ctx.Err()
   588  	case err := <-ec:
   589  		if err != nil {
   590  			return nil, err
   591  		}
   592  	}
   593  	return suites, nil
   594  }