sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/gerrit/adapter/adapter.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package adapter implements a controller that interacts with gerrit instances
    18  package adapter
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"net/url"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/andygrunwald/go-gerrit"
    30  	"github.com/prometheus/client_golang/prometheus"
    31  	"github.com/sirupsen/logrus"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/util/sets"
    34  
    35  	prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    36  	prowv1 "sigs.k8s.io/prow/pkg/client/clientset/versioned/typed/prowjobs/v1"
    37  	"sigs.k8s.io/prow/pkg/config"
    38  	reporter "sigs.k8s.io/prow/pkg/crier/reporters/gerrit"
    39  	"sigs.k8s.io/prow/pkg/gerrit/client"
    40  	"sigs.k8s.io/prow/pkg/gerrit/source"
    41  	"sigs.k8s.io/prow/pkg/io"
    42  	"sigs.k8s.io/prow/pkg/kube"
    43  	"sigs.k8s.io/prow/pkg/pjutil"
    44  )
    45  
    46  const (
    47  	inRepoConfigRetries = 2
    48  	inRepoConfigFailed  = "Unable to get inRepoConfig. This could be due to a merge conflict (please resolve them), an inRepoConfig parsing error (incorrect formatting) in the .prow directory or .prow.yaml file, or a flake. For possible flakes, try again with /test all"
    49  )
    50  
    51  var gerritMetrics = struct {
    52  	processingResults           *prometheus.CounterVec
    53  	inrepoconfigResults         *prometheus.CounterVec
    54  	triggerLatency              *prometheus.HistogramVec
    55  	triggerHelpLatency          *prometheus.HistogramVec
    56  	changeProcessDuration       *prometheus.HistogramVec
    57  	processSingleChangeDuration *prometheus.HistogramVec
    58  	changeSyncDuration          *prometheus.HistogramVec
    59  	gerritRepoQueryDuration     *prometheus.HistogramVec
    60  	pickupChangeLatency         *prometheus.HistogramVec
    61  	jobCreationDuration         *prometheus.HistogramVec
    62  }{
    63  	processingResults: prometheus.NewCounterVec(prometheus.CounterOpts{
    64  		Name: "gerrit_processing_results",
    65  		Help: "Count of change processing by instance, repo, and result (ERROR or SUCCESS).",
    66  	}, []string{
    67  		"org",
    68  		"repo",
    69  		"result",
    70  	}),
    71  	inrepoconfigResults: prometheus.NewCounterVec(prometheus.CounterOpts{
    72  		Name: "gerrit_inrepoconfig_results",
    73  		Help: "Count of retrieving inrepoconfigs by instance, repo, and result (ERROR or SUCCESS).",
    74  	}, []string{
    75  		"org",
    76  		"repo",
    77  		"result",
    78  	}),
    79  	triggerLatency: prometheus.NewHistogramVec(prometheus.HistogramOpts{
    80  		Name:    "gerrit_trigger_latency",
    81  		Help:    "Histogram of seconds between triggering event and ProwJob creation time.",
    82  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600, 750, 900, 1050, 1200},
    83  	}, []string{
    84  		"org",
    85  		// We would normally omit 'repo' to avoid excessive cardinality due to the number of buckets, but we need the data.
    86  		// Hopefully this isn't excessive enough to cause metric scraping issues.
    87  		"repo",
    88  	}),
    89  	triggerHelpLatency: prometheus.NewHistogramVec(prometheus.HistogramOpts{
    90  		Name:    "gerrit_trigger_help_latency",
    91  		Help:    "Histogram of seconds between triggering event (help) and ProwJob creation time.",
    92  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 60, 120, 180, 300, 450, 600, 750, 900, 1050, 1200},
    93  	}, []string{
    94  		"org",
    95  	}),
    96  	processSingleChangeDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
    97  		Name:    "gerrit_process_single_change_duration",
    98  		Help:    "Histogram of seconds spent processing a single gerrit change, by instance and repo.",
    99  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600},
   100  	}, []string{
   101  		"org",
   102  		"repo",
   103  	}),
   104  	changeProcessDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
   105  		Name:    "gerrit_instance_process_duration",
   106  		Help:    "Histogram of seconds spent processing changes, by instance and repo. This measures the portion of a sync after we've queried for changes.",
   107  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600, 750, 900, 1050, 1200},
   108  	}, []string{
   109  		"org", "repo",
   110  	}),
   111  	changeSyncDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
   112  		Name:    "gerrit_instance_change_sync_duration",
   113  		Help:    "Histogram of seconds spent syncing changes from a single gerrit instance or repo. Includes gerrit_repo_query_duration and gerrit_instance_process_duration.",
   114  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600, 750, 900, 1050, 1200},
   115  	}, []string{"org", "repo"}),
   116  	gerritRepoQueryDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
   117  		Name:    "gerrit_repo_query_duration",
   118  		Help:    "Histogram of seconds spent querying a repo's changes. Includes time spent for rate limiting ourselves.",
   119  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 240},
   120  	}, []string{"org", "repo", "result"}),
   121  	pickupChangeLatency: prometheus.NewHistogramVec(prometheus.HistogramOpts{
   122  		Name:    "gerrit_pickup_change_latency",
   123  		Help:    "Histogram of seconds a query result had to wait after it was retrieved from the Gerrit API but before it was picked up for processing by a worker thread.",
   124  		Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 240},
   125  	}, []string{"org", "repo"}),
   126  	jobCreationDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
   127  		Name:    "gerrit_job_creation_duration",
   128  		Help:    "Histogram of seconds spent creating a ProwJob object in the K8s API server of the Prow service cluster, by instance and repo.",
   129  		Buckets: []float64{0.1, 0.2, 0.5, 0.75, 1, 2, 5, 7.5, 10, 15, 20},
   130  	}, []string{
   131  		"org",
   132  		"repo",
   133  	}),
   134  }
   135  
   136  func init() {
   137  	prometheus.MustRegister(gerritMetrics.processingResults)
   138  	prometheus.MustRegister(gerritMetrics.inrepoconfigResults)
   139  	prometheus.MustRegister(gerritMetrics.triggerLatency)
   140  	prometheus.MustRegister(gerritMetrics.triggerHelpLatency)
   141  	prometheus.MustRegister(gerritMetrics.processSingleChangeDuration)
   142  	prometheus.MustRegister(gerritMetrics.changeProcessDuration)
   143  	prometheus.MustRegister(gerritMetrics.changeSyncDuration)
   144  	prometheus.MustRegister(gerritMetrics.gerritRepoQueryDuration)
   145  	prometheus.MustRegister(gerritMetrics.pickupChangeLatency)
   146  	prometheus.MustRegister(gerritMetrics.jobCreationDuration)
   147  }
   148  
   149  type prowJobClient interface {
   150  	Create(context.Context, *prowapi.ProwJob, metav1.CreateOptions) (*prowapi.ProwJob, error)
   151  }
   152  
   153  type gerritClient interface {
   154  	ApplyGlobalConfig(orgRepoConfigGetter func() *config.GerritOrgRepoConfigs, lastSyncTracker *client.SyncTime, cookiefilePath, tokenPathOverride string, additionalFunc func())
   155  	Authenticate(cookiefilePath, tokenPath string)
   156  	QueryChangesForProject(instance, project string, lastUpdate time.Time, rateLimit int, additionalFilters ...string) ([]gerrit.ChangeInfo, error)
   157  	GetBranchRevision(instance, project, branch string) (string, error)
   158  	SetReview(instance, id, revision, message string, labels map[string]string) error
   159  	Account(instance string) (*gerrit.AccountInfo, error)
   160  	HasRelatedChanges(instance, id, revision string) (bool, error)
   161  }
   162  
   163  // Controller manages gerrit changes.
   164  type Controller struct {
   165  	config                      config.Getter
   166  	prowJobClient               prowJobClient
   167  	gc                          gerritClient
   168  	tracker                     LastSyncTracker
   169  	projectsOptOutHelp          map[string]sets.Set[string]
   170  	lock                        sync.RWMutex
   171  	cookieFilePath              string
   172  	configAgent                 *config.Agent
   173  	inRepoConfigGetter          config.InRepoConfigGetter
   174  	inRepoConfigFailuresTracker map[string]bool
   175  	projectsWithWorker          map[string]bool
   176  	latestMux                   sync.Mutex
   177  	workerPoolSize              int
   178  }
   179  
   180  type LastSyncTracker interface {
   181  	Current() client.LastSyncState
   182  	Update(client.LastSyncState) error
   183  }
   184  
   185  // NewController returns a new gerrit controller client
   186  func NewController(ctx context.Context, prowJobClient prowv1.ProwJobInterface, op io.Opener,
   187  	ca *config.Agent, cookiefilePath, tokenPathOverride, lastSyncFallback string, workerPoolSize int, maxQPS, maxBurst int, ircg config.InRepoConfigGetter) *Controller {
   188  
   189  	cfg := ca.Config
   190  	projectsOptOutHelpMap := map[string]sets.Set[string]{}
   191  	if cfg().Gerrit.OrgReposConfig != nil {
   192  		projectsOptOutHelpMap = cfg().Gerrit.OrgReposConfig.OptOutHelpRepos()
   193  	}
   194  	lastSyncTracker := client.NewSyncTime(lastSyncFallback, op, ctx)
   195  
   196  	if err := lastSyncTracker.Init(cfg().Gerrit.OrgReposConfig.AllRepos()); err != nil {
   197  		logrus.WithError(err).Fatal("Error initializing lastSyncFallback.")
   198  	}
   199  	gerritClient, err := client.NewClient(nil, maxQPS, maxBurst)
   200  	if err != nil {
   201  		logrus.WithError(err).Fatal("Error creating gerrit client.")
   202  	}
   203  	c := &Controller{
   204  		prowJobClient:               prowJobClient,
   205  		config:                      cfg,
   206  		gc:                          gerritClient,
   207  		tracker:                     lastSyncTracker,
   208  		projectsOptOutHelp:          projectsOptOutHelpMap,
   209  		cookieFilePath:              cookiefilePath,
   210  		configAgent:                 ca,
   211  		inRepoConfigGetter:          ircg,
   212  		inRepoConfigFailuresTracker: map[string]bool{},
   213  		projectsWithWorker:          make(map[string]bool),
   214  		workerPoolSize:              workerPoolSize,
   215  	}
   216  
   217  	// applyGlobalConfig reads gerrit configurations from global gerrit config,
   218  	// it will completely override previously configured gerrit hosts and projects.
   219  	// it will also by the way authenticate gerrit
   220  	orgRepoConfigGetter := func() *config.GerritOrgRepoConfigs {
   221  		return cfg().Gerrit.OrgReposConfig
   222  	}
   223  	c.gc.ApplyGlobalConfig(orgRepoConfigGetter, lastSyncTracker, cookiefilePath, tokenPathOverride, func() {
   224  		orgReposConfig := orgRepoConfigGetter()
   225  		if orgReposConfig == nil {
   226  			return
   227  		}
   228  		c.lock.Lock()
   229  		// Updates a map, lock to make sure it's thread safe.
   230  		c.projectsOptOutHelp = orgReposConfig.OptOutHelpRepos()
   231  		c.lock.Unlock()
   232  	})
   233  
   234  	// Authenticate creates a goroutine for rotating token secrets when called the first
   235  	// time, afterwards it only authenticate once.
   236  	// applyGlobalConfig calls authenticate only when global gerrit config presents,
   237  	// call it here is required for cases where gerrit repos are defined as command
   238  	// line arg(which is going to be deprecated).
   239  	c.gc.Authenticate(cookiefilePath, tokenPathOverride)
   240  
   241  	return c
   242  }
   243  
   244  type Change struct {
   245  	changeInfo gerrit.ChangeInfo
   246  	instance   string
   247  	created    time.Time
   248  }
   249  
   250  func (c *Controller) processChange(latest client.LastSyncState, changeChan <-chan Change, log *logrus.Entry, wg *sync.WaitGroup, lastProjectSyncTime time.Time) {
   251  	for changeStruct := range changeChan {
   252  		change := changeStruct.changeInfo
   253  		instance := changeStruct.instance
   254  		gerritMetrics.pickupChangeLatency.WithLabelValues(instance, change.Project).Observe(float64(time.Since(changeStruct.created).Seconds()))
   255  
   256  		log := log.WithFields(logrus.Fields{
   257  			"branch":   change.Branch,
   258  			"change":   change.Number,
   259  			"repo":     change.Project,
   260  			"revision": change.CurrentRevision,
   261  		})
   262  
   263  		now := time.Now()
   264  
   265  		result := client.ResultSuccess
   266  		if c.shouldTriggerJobs(change, lastProjectSyncTime) {
   267  			if err := c.triggerJobs(log, instance, change); err != nil {
   268  				result = client.ResultError
   269  				log.WithError(err).Info("Failed to trigger jobs based on change")
   270  			}
   271  		} else {
   272  			log.Info("Skipped triggering jobs for this change.")
   273  		}
   274  		gerritMetrics.processingResults.WithLabelValues(instance, change.Project, result).Inc()
   275  
   276  		c.latestMux.Lock()
   277  		lastTime, ok := latest[instance][change.Project]
   278  		if !ok || lastTime.Before(change.Updated.Time) {
   279  			lastTime = change.Updated.Time
   280  			latest[instance][change.Project] = lastTime
   281  		}
   282  		c.latestMux.Unlock()
   283  		wg.Done()
   284  
   285  		gerritMetrics.processSingleChangeDuration.WithLabelValues(instance, change.Project).Observe(float64(time.Since(now).Seconds()))
   286  	}
   287  }
   288  
   289  func (c *Controller) processSingleProject(instance, project string) {
   290  	// Assumes the passed in instance was already normalized with https:// prefix.
   291  	log := logrus.WithFields(logrus.Fields{"host": instance, "repo": project})
   292  	tracker := c.tracker.Current()
   293  	syncTime := time.Now()
   294  	if projects, ok := tracker[instance]; ok {
   295  		if t, ok := projects[project]; ok {
   296  			syncTime = t
   297  		}
   298  	}
   299  	latest := tracker.DeepCopy()
   300  
   301  	now := time.Now()
   302  	defer func() {
   303  		gerritMetrics.changeSyncDuration.WithLabelValues(instance, project).Observe(float64(time.Since(now).Seconds()))
   304  	}()
   305  
   306  	timeQueryChangesForProject := time.Now()
   307  
   308  	// Ignore the error. It is already logged.
   309  	changes, err := c.gc.QueryChangesForProject(instance, project, syncTime, c.config().Gerrit.RateLimit)
   310  	queryResult := func() string {
   311  		if err == nil {
   312  			return client.ResultSuccess
   313  		}
   314  		return client.ResultError
   315  	}()
   316  	log = log.WithFields(logrus.Fields{
   317  		"lastUpdate":    syncTime.String(),
   318  		"queryStart":    timeQueryChangesForProject.String(),
   319  		"queryDuration": time.Since(timeQueryChangesForProject).String(),
   320  		"changeCount":   len(changes),
   321  		"result":        queryResult,
   322  	})
   323  	gerritMetrics.gerritRepoQueryDuration.WithLabelValues(instance, project, queryResult).Observe((float64(time.Since(timeQueryChangesForProject).Seconds())))
   324  	checkAndLogQuery(log, changes)
   325  
   326  	if len(changes) == 0 {
   327  		return
   328  	}
   329  
   330  	timeProcessChangesForProject := time.Now()
   331  	var wg sync.WaitGroup
   332  	wg.Add(len(changes))
   333  	changeChan := make(chan Change)
   334  
   335  	poolSize := c.workerPoolSize
   336  	if poolSize > len(changes) {
   337  		poolSize = len(changes)
   338  	}
   339  	for i := 0; i < poolSize; i++ {
   340  		go c.processChange(latest, changeChan, log, &wg, syncTime)
   341  	}
   342  	// We need to call time.Now() outside this loop since <- will block
   343  	// while there are no more available worker threads possibly causing
   344  	// time.Now() to be called later than intended.
   345  	timeChangesCreated := time.Now()
   346  	for _, change := range changes {
   347  		changeChan <- Change{changeInfo: change, instance: instance, created: timeChangesCreated}
   348  	}
   349  	wg.Wait()
   350  	gerritMetrics.changeProcessDuration.WithLabelValues(instance, project).Observe((float64(time.Since(timeProcessChangesForProject).Seconds())))
   351  	close(changeChan)
   352  	c.tracker.Update(latest)
   353  }
   354  
   355  func checkAndLogQuery(log *logrus.Entry, changes []gerrit.ChangeInfo) {
   356  	seen := sets.NewInt()
   357  	for _, change := range changes {
   358  		if seen.Has(change.Number) {
   359  			log.WithField("change", change.Number).Error("Gerrit API bug! Received multiple updates for a change from a single query.")
   360  		}
   361  		seen.Insert(change.Number)
   362  	}
   363  	log.Infof("Query returned changes: %v", seen.List())
   364  }
   365  
   366  // Sync looks for newly made gerrit changes
   367  // and creates prowjobs according to specs
   368  func (c *Controller) Sync() {
   369  	// Identify projects without worker threads
   370  	id := func(instance, project string) string { return fmt.Sprintf("%s/%s", instance, project) }
   371  	needsWorker := map[string][]string{}
   372  	needsWorkerCount := map[string]int{}
   373  	for instance, projects := range c.config().Gerrit.OrgReposConfig.AllRepos() {
   374  		for project := range projects {
   375  			if _, ok := c.projectsWithWorker[id(instance, project)]; ok {
   376  				// The worker thread is already up for this project, nothing needs
   377  				// to be done.
   378  				continue
   379  			}
   380  			needsWorker[instance] = append(needsWorker[instance], project)
   381  			needsWorkerCount[instance]++
   382  		}
   383  	}
   384  	// First time seeing these projects, spin up worker threads for them.
   385  	staggerPosition := 0
   386  	for instance, projects := range needsWorker {
   387  		staggerIncement := c.config().Gerrit.TickInterval.Duration / time.Duration(needsWorkerCount[instance])
   388  		for _, project := range projects {
   389  			c.projectsWithWorker[id(instance, project)] = true
   390  			logrus.WithFields(logrus.Fields{"instance": instance, "repo": project}).Info("Starting worker for project.")
   391  			go func(instance, project string, staggerPosition int) {
   392  				// Stagger new worker threads across the loop period to reduce load on the Gerrit API and Git server.
   393  				napTime := staggerIncement * time.Duration(staggerPosition)
   394  				time.Sleep(napTime)
   395  
   396  				// Now start the repo worker thread.
   397  				previousRun := time.Now()
   398  				for {
   399  					timeDiff := time.Until(previousRun.Add(c.config().Gerrit.TickInterval.Duration))
   400  					if timeDiff > 0 {
   401  						time.Sleep(timeDiff)
   402  					}
   403  					previousRun = time.Now()
   404  					c.processSingleProject(instance, project)
   405  				}
   406  			}(instance, project, staggerPosition)
   407  			staggerPosition++
   408  		}
   409  	}
   410  }
   411  
   412  // CreateRefs creates refs for a presubmit job from given changes.
   413  //
   414  // Passed in instance must contain https:// prefix.
   415  func CreateRefs(instance, project, branch, baseSHA string, changes ...client.ChangeInfo) (prowapi.Refs, error) {
   416  	var refs prowapi.Refs
   417  	cloneURI := source.CloneURIFromOrgRepo(instance, project)
   418  
   419  	// Something like https://android.googlesource.com
   420  	codeHost := source.EnsureCodeURL(instance)
   421  
   422  	refs = prowapi.Refs{
   423  		Org:      instance, // Something like android-review.googlesource.com
   424  		Repo:     project,  // Something like platform/build
   425  		BaseRef:  branch,
   426  		BaseSHA:  baseSHA,
   427  		CloneURI: cloneURI, // Something like https://android-review.googlesource.com/platform/build
   428  		RepoLink: fmt.Sprintf("%s/%s", codeHost, project),
   429  		BaseLink: fmt.Sprintf("%s/%s/+/%s", codeHost, project, baseSHA),
   430  	}
   431  	for _, change := range changes {
   432  		rev, ok := change.Revisions[change.CurrentRevision]
   433  		if !ok {
   434  			return prowapi.Refs{}, fmt.Errorf("cannot find current revision for change %v", change.ID)
   435  		}
   436  		refs.Pulls = append(refs.Pulls, prowapi.Pull{
   437  			Number:     change.Number,
   438  			Author:     rev.Commit.Author.Name,
   439  			SHA:        change.CurrentRevision,
   440  			Ref:        rev.Ref,
   441  			Link:       fmt.Sprintf("%s/c/%s/+/%d", instance, change.Project, change.Number),
   442  			CommitLink: fmt.Sprintf("%s/%s/+/%s", codeHost, change.Project, change.CurrentRevision),
   443  			AuthorLink: fmt.Sprintf("%s/q/%s", instance, rev.Commit.Author.Email),
   444  		})
   445  	}
   446  	return refs, nil
   447  }
   448  
   449  func LabelsAndAnnotations(instance string, jobLabels, jobAnnotations map[string]string, changes ...client.ChangeInfo) (labels, annotations map[string]string) {
   450  	labels, annotations = make(map[string]string), make(map[string]string)
   451  	for k, v := range jobLabels {
   452  		labels[k] = v
   453  	}
   454  	for k, v := range jobAnnotations {
   455  		annotations[k] = v
   456  	}
   457  	annotations[kube.GerritInstance] = instance
   458  
   459  	// Labels required for Crier reporting back to Gerrit, batch jobs are not
   460  	// expected to report so only add when there is a single change.
   461  	if len(changes) == 1 {
   462  		change := changes[0]
   463  		labels[kube.GerritRevision] = change.CurrentRevision
   464  		labels[kube.GerritPatchset] = strconv.Itoa(change.Revisions[change.CurrentRevision].Number)
   465  		if _, ok := labels[kube.GerritReportLabel]; !ok {
   466  			logrus.Debug("Job uses default value of 'Code-Review' for 'prow.k8s.io/gerrit-report-label' label. This default will removed in March 2022.")
   467  			labels[kube.GerritReportLabel] = client.CodeReview
   468  		}
   469  
   470  		annotations[kube.GerritID] = change.ID
   471  	}
   472  
   473  	return
   474  }
   475  
   476  // failedJobs find jobs currently reported as failing (used for retesting).
   477  //
   478  // Failing means the job is complete and not passing.
   479  // Scans messages for prow reports, which lists jobs and whether they passed.
   480  // Job is included in the set if the latest report has it failing.
   481  func failedJobs(account int, revision int, messages ...gerrit.ChangeMessageInfo) sets.Set[string] {
   482  	failures := sets.Set[string]{}
   483  	times := map[string]time.Time{}
   484  	for _, message := range messages {
   485  		if message.Author.AccountID != account { // Ignore reports from other accounts
   486  			continue
   487  		}
   488  		if message.RevisionNumber != revision { // Ignore reports for old commits
   489  			continue
   490  		}
   491  		// TODO(fejta): parse triggered job reports and remove from failure set.
   492  		// (alternatively refactor this whole process rely less on fragile string parsing)
   493  		report := reporter.ParseReport(message.Message)
   494  		if report == nil {
   495  			continue
   496  		}
   497  		for _, job := range report.Jobs {
   498  			name := job.Name
   499  			if latest, present := times[name]; present && message.Date.Before(latest) {
   500  				continue
   501  			}
   502  			times[name] = message.Date.Time
   503  			if job.State == prowapi.FailureState || job.State == prowapi.ErrorState || job.State == prowapi.AbortedState {
   504  				failures.Insert(name)
   505  			} else {
   506  				failures.Delete(name)
   507  			}
   508  		}
   509  	}
   510  	return failures
   511  }
   512  
   513  func (c *Controller) handleInRepoConfigError(err error, instance string, change gerrit.ChangeInfo) error {
   514  	key := fmt.Sprintf("%s%s%s", instance, change.ID, change.CurrentRevision)
   515  	if err != nil {
   516  		// Only report back to Gerrit if we have not reported previously.
   517  		// If any new `/test` commands are given and fail for the same reason we won't post another error message
   518  		// which can be confusing to users. This behavior is to prevent us from reporting the failure again
   519  		// on unrelated comments (including the error message itself!), but we don't need this behavior if
   520  		// we don't process irrelevant comments which is the case if AllowedPresubmitTriggerRe is specified.
   521  		skipIrrelevantComments := c.config().Gerrit.AllowedPresubmitTriggerReRawString != ""
   522  		if _, alreadyReported := c.inRepoConfigFailuresTracker[key]; !alreadyReported || skipIrrelevantComments {
   523  			msg := fmt.Sprintf("%s: %v", inRepoConfigFailed, err)
   524  			if setReviewWerr := c.gc.SetReview(instance, change.ID, change.CurrentRevision, msg, nil); setReviewWerr != nil {
   525  				return fmt.Errorf("failed to get inRepoConfig and failed to set Review to notify user: %v and %v", err, setReviewWerr)
   526  			}
   527  			// The boolean value here is meaningless as we use the tracker as a
   528  			// set data structure, not as a hashmap where values actually
   529  			// matter. We just use a bool for simplicity.
   530  			c.inRepoConfigFailuresTracker[key] = true
   531  		}
   532  
   533  		// We do not want to return that there was an error processing change. If we are unable to get inRepoConfig we do not process. This is expected behavior.
   534  		return nil
   535  	}
   536  
   537  	// If we are passing now, remove any record of previous failures in our
   538  	// tracker to allow future failures to send an error message back to Gerrit
   539  	// (through this same function).
   540  	delete(c.inRepoConfigFailuresTracker, key)
   541  	return nil
   542  }
   543  
   544  // shouldTriggerJobs returns true if we should trigger jobs for the given
   545  // change.
   546  func (c *Controller) shouldTriggerJobs(change client.ChangeInfo, lastProjectSyncTime time.Time) bool {
   547  	// do not skip postsubmit jobs
   548  	if change.Status == client.Merged {
   549  		return true
   550  	}
   551  	revision := change.Revisions[change.CurrentRevision]
   552  	if revision.Created.After(lastProjectSyncTime) {
   553  		return true
   554  	}
   555  
   556  	for _, message := range currentMessages(change, lastProjectSyncTime) {
   557  		if c.messageContainsJobTriggeringCommand(message) {
   558  			return true
   559  		}
   560  		if indicatesChangeFromDraftToActiveState(message.Message) {
   561  			return true
   562  		}
   563  	}
   564  
   565  	return false
   566  }
   567  
   568  func (c *Controller) messageContainsJobTriggeringCommand(message gerrit.ChangeMessageInfo) bool {
   569  	return pjutil.RetestRe.MatchString(message.Message) ||
   570  		pjutil.TestAllRe.MatchString(message.Message) ||
   571  		c.configAgent.Config().Gerrit.IsAllowedPresubmitTrigger(message.Message)
   572  }
   573  
   574  // triggerJobs creates new presubmit/postsubmit prowjobs base off the gerrit changes
   575  func (c *Controller) triggerJobs(logger logrus.FieldLogger, instance string, change client.ChangeInfo) error {
   576  	cloneURI := source.CloneURIFromOrgRepo(instance, change.Project)
   577  	baseSHA, err := c.gc.GetBranchRevision(instance, change.Project, change.Branch)
   578  	if err != nil {
   579  		return fmt.Errorf("GetBranchRevision: %w", err)
   580  	}
   581  
   582  	type triggeredJob struct {
   583  		name   string
   584  		report bool
   585  	}
   586  	var triggeredJobs []triggeredJob
   587  	triggerTimes := map[string]time.Time{}
   588  
   589  	refs, err := CreateRefs(instance, change.Project, change.Branch, baseSHA, change)
   590  	if err != nil {
   591  		return fmt.Errorf("createRefs from %s at %s: %w", cloneURI, baseSHA, err)
   592  	}
   593  
   594  	type jobSpec struct {
   595  		spec        prowapi.ProwJobSpec
   596  		labels      map[string]string
   597  		annotations map[string]string
   598  	}
   599  	var jobSpecs []jobSpec
   600  	baseSHAGetter := func() (string, error) { return baseSHA, nil }
   601  	var hasRelatedChanges *bool
   602  	// This headSHAGetter will return the empty string instead of the head SHA in cases where we can be certain that change does not
   603  	// modify inrepoconfig. This allows multiple changes to share a ProwYAML cache entry so long as they don't touch inrepo config themselves.
   604  	headSHAGetter := func() (string, error) {
   605  		changes, err := client.ChangedFilesProvider(&change)()
   606  		if err != nil {
   607  			// This is a best effort optimization, log the error, but just use CurrentRevision in this case.
   608  			logger.WithError(err).Info("Failed to get changed files for the purpose of prowYAML cache optimization. Skipping optimization.")
   609  			return change.CurrentRevision, nil
   610  		}
   611  		if config.ContainsInRepoConfigPath(changes) {
   612  			return change.CurrentRevision, nil
   613  		}
   614  		if hasRelatedChanges == nil {
   615  			if res, err := c.gc.HasRelatedChanges(instance, change.ChangeID, change.CurrentRevision); err != nil {
   616  				logger.WithError(err).Info("Failed to get related changes for the purpose of prowYAML cache optimization. Skipping optimization.")
   617  				return change.CurrentRevision, nil
   618  			} else {
   619  				hasRelatedChanges = &res
   620  			}
   621  		}
   622  		if *hasRelatedChanges {
   623  			// If the change is part of a chain the commit may include files not identified by the API.
   624  			// So we can't easily check if the change includes inrepo config file changes.
   625  			return change.CurrentRevision, nil
   626  		}
   627  		// If we know the change doesn't touch the inrepo config itself, we don't need to check out the head commits.
   628  		// This is particularly useful because it lets multiple changes share a ProwYAML cache entry so long as they don't touch inrepo config themselves.
   629  		return "", nil
   630  	}
   631  
   632  	switch change.Status {
   633  	case client.Merged:
   634  		var postsubmits []config.Postsubmit
   635  		// Gerrit server might be unavailable intermittently, retry inrepoconfig
   636  		// processing for increased reliability.
   637  		for attempt := 0; attempt < inRepoConfigRetries; attempt++ {
   638  			postsubmits, err = c.inRepoConfigGetter.GetPostsubmits(cloneURI, change.Branch, baseSHAGetter, headSHAGetter)
   639  			// Break if there was no error, or if there was a merge conflict
   640  			if err == nil {
   641  				gerritMetrics.inrepoconfigResults.WithLabelValues(instance, change.Project, client.ResultSuccess).Inc()
   642  				break
   643  			}
   644  			if strings.Contains(err.Error(), "Merge conflict in") {
   645  				break
   646  			}
   647  		}
   648  		// Postsubmit jobs are triggered only once. Still try to fall back on
   649  		// static jobs if failed to retrieve inrepoconfig jobs.
   650  		if err != nil {
   651  			gerritMetrics.inrepoconfigResults.WithLabelValues(instance, change.Project, client.ResultError).Inc()
   652  
   653  			// Reports error back to Gerrit. handleInRepoConfigError is
   654  			// responsible for not sending the same message again and again on
   655  			// the same commit.
   656  			if postErr := c.handleInRepoConfigError(err, instance, change); postErr != nil {
   657  				logger.WithError(postErr).Error("Failed reporting inrepoconfig processing error back to Gerrit.")
   658  			}
   659  			// Static postsubmit jobs are included as part of output from
   660  			// inRepoConfigCache.GetPostsubmits, fallback to static only
   661  			// when inrepoconfig failed.
   662  			postsubmits = append(postsubmits, c.config().GetPostsubmitsStatic(cloneURI)...)
   663  		}
   664  
   665  		for _, postsubmit := range postsubmits {
   666  			if shouldRun, err := postsubmit.ShouldRun(change.Branch, client.ChangedFilesProvider(&change)); err != nil {
   667  				return fmt.Errorf("failed to determine if postsubmit %q should run: %w", postsubmit.Name, err)
   668  			} else if shouldRun {
   669  				if change.Submitted != nil {
   670  					triggerTimes[postsubmit.Name] = change.Submitted.Time
   671  				}
   672  				jobSpecs = append(jobSpecs, jobSpec{
   673  					spec:        pjutil.PostsubmitSpec(postsubmit, refs),
   674  					labels:      postsubmit.Labels,
   675  					annotations: postsubmit.Annotations,
   676  				})
   677  			}
   678  		}
   679  	case client.New:
   680  		var presubmits []config.Presubmit
   681  		// Gerrit server might be unavailable intermittently, retry inrepoconfig
   682  		// processing for increased reliability.
   683  		for attempt := 0; attempt < inRepoConfigRetries; attempt++ {
   684  			presubmits, err = c.inRepoConfigGetter.GetPresubmits(cloneURI, change.Branch, baseSHAGetter, headSHAGetter)
   685  			if err == nil {
   686  				break
   687  			}
   688  		}
   689  		if err != nil {
   690  			// Reports error back to Gerrit. handleInRepoConfigError is
   691  			// responsible for not sending the same message again and again on
   692  			// the same commit.
   693  			if postErr := c.handleInRepoConfigError(err, instance, change); postErr != nil {
   694  				logger.WithError(postErr).Error("Failed reporting inrepoconfig processing error back to Gerrit.")
   695  			}
   696  			// There is no need to keep going when failed to get inrepoconfig
   697  			// jobs.
   698  			// Imagining the scenario that:
   699  			// - Commit #abc triggered static job job-A, inrepoconfig jobs job-B
   700  			// and job-C
   701  			// - Both job-B and job-C failed
   702  			// - Commit #def was pushed. Inrepoconfig failed, falling back to
   703  			// trigger static job job-A.
   704  			// - job-A passed.
   705  			// - Prow would make decision on the result of job-A and ignore the
   706  			// rest. (Yes this is a Prow bug, which should not be a problem when
   707  			// each prowjob is reported to an individual Gerrit Check).
   708  			// So long story short: kicking off partial prowjobs is worse than
   709  			// kicking off nothing.
   710  			return err
   711  		}
   712  
   713  		account, err := c.gc.Account(instance)
   714  		if err != nil {
   715  			// This would happen if authenticateOnce hasn't done register this instance yet
   716  			return fmt.Errorf("account not found for %q: %w", instance, err)
   717  		}
   718  
   719  		lastUpdate, ok := c.tracker.Current()[instance][change.Project]
   720  		if !ok {
   721  			lastUpdate = time.Now()
   722  			logger.WithField("lastUpdate", lastUpdate).Warnf("lastUpdate not found, falling back to now")
   723  		}
   724  
   725  		revision := change.Revisions[change.CurrentRevision]
   726  		failedJobs := failedJobs(account.AccountID, revision.Number, change.Messages...)
   727  		failed, all := presubmitContexts(failedJobs, presubmits, logger)
   728  		messages := currentMessages(change, lastUpdate)
   729  		logger.WithField("failed", len(failed)).Debug("Failed jobs parsed from previous comments.")
   730  		filters := []pjutil.Filter{
   731  			messageFilter(messages, failed, all, triggerTimes, logger),
   732  		}
   733  		// Automatically trigger the Prow jobs if the revision is new and the
   734  		// change is not in WorkInProgress.
   735  		if revision.Created.Time.After(lastUpdate) && !change.WorkInProgress {
   736  			filters = append(filters, &timeAnnotationFilter{
   737  				Filter:       pjutil.NewTestAllFilter(),
   738  				eventTime:    revision.Created.Time,
   739  				triggerTimes: triggerTimes,
   740  			})
   741  		}
   742  		toTrigger, err := pjutil.FilterPresubmits(pjutil.NewAggregateFilter(filters), client.ChangedFilesProvider(&change), change.Branch, presubmits, logger)
   743  		if err != nil {
   744  			return fmt.Errorf("filter presubmits: %w", err)
   745  		}
   746  		// At this point triggerTimes should be properly populated as a side effect of FilterPresubmits.
   747  
   748  		// Reply with help information to run the presubmit Prow jobs if requested.
   749  		for _, msg := range messages {
   750  			needsHelp, note := pjutil.ShouldRespondWithHelp(msg.Message, len(toTrigger))
   751  			// Lock for projectOptOutHelp, which is a map.
   752  			c.lock.RLock()
   753  			optedOut := isProjectOptOutHelp(c.projectsOptOutHelp, instance, change.Project)
   754  			c.lock.RUnlock()
   755  			if needsHelp && !optedOut {
   756  				runWithTestAllNames, optionalJobsCommands, requiredJobsCommands, err := pjutil.AvailablePresubmits(client.ChangedFilesProvider(&change), change.Branch, presubmits, logger.WithField("help", true))
   757  				if err != nil {
   758  					return err
   759  				}
   760  				message := pjutil.HelpMessage(instance, change.Project, change.Branch, note, runWithTestAllNames, optionalJobsCommands, requiredJobsCommands)
   761  				if err := c.gc.SetReview(instance, change.ID, change.CurrentRevision, message, nil); err != nil {
   762  					return err
   763  				}
   764  				gerritMetrics.triggerHelpLatency.WithLabelValues(instance).Observe(float64(time.Since(msg.Date.Time).Seconds()))
   765  				// Only respond to the first message that requests help information.
   766  				break
   767  			}
   768  		}
   769  
   770  		for _, presubmit := range toTrigger {
   771  			jobSpecs = append(jobSpecs, jobSpec{
   772  				spec:        pjutil.PresubmitSpec(presubmit, refs),
   773  				labels:      presubmit.Labels,
   774  				annotations: presubmit.Annotations,
   775  			})
   776  		}
   777  	}
   778  
   779  	schedulerEnabled := c.config().Scheduler.Enabled
   780  
   781  	for _, jSpec := range jobSpecs {
   782  		labels, annotations := LabelsAndAnnotations(instance, jSpec.labels, jSpec.annotations, change)
   783  
   784  		pj := pjutil.NewProwJob(jSpec.spec, labels, annotations, pjutil.RequireScheduling(schedulerEnabled))
   785  
   786  		logger := logger.WithField("prowjob", pj.Name)
   787  		timeBeforeCreate := time.Now()
   788  		if _, err := c.prowJobClient.Create(context.TODO(), &pj, metav1.CreateOptions{}); err != nil {
   789  			logger.WithError(err).Errorf("Failed to create ProwJob")
   790  			continue
   791  		}
   792  		gerritMetrics.jobCreationDuration.WithLabelValues(instance, change.Project).Observe((float64(time.Since(timeBeforeCreate).Seconds())))
   793  		logger.Infof("Triggered new job")
   794  		if eventTime, ok := triggerTimes[pj.Spec.Job]; ok {
   795  			gerritMetrics.triggerLatency.WithLabelValues(instance, change.Project).Observe(float64(time.Since(eventTime).Seconds()))
   796  		}
   797  		triggeredJobs = append(triggeredJobs, triggeredJob{
   798  			name:   jSpec.spec.Job,
   799  			report: jSpec.spec.Report,
   800  		})
   801  	}
   802  
   803  	if len(triggeredJobs) == 0 {
   804  		return nil
   805  	}
   806  
   807  	// comment back to gerrit if Report is set for any of the jobs
   808  	var reportingJobs int
   809  	var jobList string
   810  	for _, job := range triggeredJobs {
   811  		if job.report {
   812  			jobList += fmt.Sprintf("\n  * Name: %s", job.name)
   813  			reportingJobs++
   814  		}
   815  	}
   816  
   817  	if reportingJobs > 0 {
   818  		message := fmt.Sprintf("Triggered %d prow jobs (%d suppressed reporting): ", len(triggeredJobs), len(triggeredJobs)-reportingJobs)
   819  		// If we have a Deck URL, link to all results for the CL, otherwise list the triggered jobs.
   820  		link, err := deckLinkForPR(c.config().Gerrit.DeckURL, refs, change.Status)
   821  		if err != nil {
   822  			logger.WithError(err).Error("Failed to generate link to job results on Deck.")
   823  		}
   824  		if link != "" && err == nil {
   825  			message = message + link
   826  		} else {
   827  			message = message + jobList
   828  		}
   829  		if err := c.gc.SetReview(instance, change.ID, change.CurrentRevision, message, nil); err != nil {
   830  			return err
   831  		}
   832  	}
   833  
   834  	return nil
   835  }
   836  
   837  // isProjectOptOutHelp returns if the project is opt-out from getting help
   838  // information about how to run presubmit tests on their changes.
   839  func isProjectOptOutHelp(projectsOptOutHelp map[string]sets.Set[string], instance, project string) bool {
   840  	ps, ok := projectsOptOutHelp[instance]
   841  	if !ok {
   842  		return false
   843  	}
   844  	return ps.Has(project)
   845  }
   846  
   847  func deckLinkForPR(deckURL string, refs prowapi.Refs, changeStatus string) (string, error) {
   848  	if deckURL == "" || changeStatus == client.Merged {
   849  		return "", nil
   850  	}
   851  
   852  	parsed, err := url.Parse(deckURL)
   853  	if err != nil {
   854  		return "", fmt.Errorf("failed to parse gerrit.deck_url (impossible: this should have been caught at load time): %w", err)
   855  	}
   856  	query := parsed.Query()
   857  	query.Set("repo", fmt.Sprintf("%s/%s", refs.Org, refs.Repo))
   858  	if len(refs.Pulls) != 1 {
   859  		return "", fmt.Errorf("impossible: triggered jobs for a Gerrit change, but refs.pulls was empty")
   860  	}
   861  	query.Set("pull", strconv.Itoa(refs.Pulls[0].Number))
   862  	parsed.RawQuery = query.Encode()
   863  	return parsed.String(), nil
   864  }