sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/gerrit/adapter/adapter.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package adapter implements a controller that interacts with gerrit instances 18 package adapter 19 20 import ( 21 "context" 22 "fmt" 23 "net/url" 24 "strconv" 25 "strings" 26 "sync" 27 "time" 28 29 "github.com/andygrunwald/go-gerrit" 30 "github.com/prometheus/client_golang/prometheus" 31 "github.com/sirupsen/logrus" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/util/sets" 34 35 prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1" 36 prowv1 "sigs.k8s.io/prow/pkg/client/clientset/versioned/typed/prowjobs/v1" 37 "sigs.k8s.io/prow/pkg/config" 38 reporter "sigs.k8s.io/prow/pkg/crier/reporters/gerrit" 39 "sigs.k8s.io/prow/pkg/gerrit/client" 40 "sigs.k8s.io/prow/pkg/gerrit/source" 41 "sigs.k8s.io/prow/pkg/io" 42 "sigs.k8s.io/prow/pkg/kube" 43 "sigs.k8s.io/prow/pkg/pjutil" 44 ) 45 46 const ( 47 inRepoConfigRetries = 2 48 inRepoConfigFailed = "Unable to get inRepoConfig. This could be due to a merge conflict (please resolve them), an inRepoConfig parsing error (incorrect formatting) in the .prow directory or .prow.yaml file, or a flake. For possible flakes, try again with /test all" 49 ) 50 51 var gerritMetrics = struct { 52 processingResults *prometheus.CounterVec 53 inrepoconfigResults *prometheus.CounterVec 54 triggerLatency *prometheus.HistogramVec 55 triggerHelpLatency *prometheus.HistogramVec 56 changeProcessDuration *prometheus.HistogramVec 57 processSingleChangeDuration *prometheus.HistogramVec 58 changeSyncDuration *prometheus.HistogramVec 59 gerritRepoQueryDuration *prometheus.HistogramVec 60 pickupChangeLatency *prometheus.HistogramVec 61 jobCreationDuration *prometheus.HistogramVec 62 }{ 63 processingResults: prometheus.NewCounterVec(prometheus.CounterOpts{ 64 Name: "gerrit_processing_results", 65 Help: "Count of change processing by instance, repo, and result (ERROR or SUCCESS).", 66 }, []string{ 67 "org", 68 "repo", 69 "result", 70 }), 71 inrepoconfigResults: prometheus.NewCounterVec(prometheus.CounterOpts{ 72 Name: "gerrit_inrepoconfig_results", 73 Help: "Count of retrieving inrepoconfigs by instance, repo, and result (ERROR or SUCCESS).", 74 }, []string{ 75 "org", 76 "repo", 77 "result", 78 }), 79 triggerLatency: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 80 Name: "gerrit_trigger_latency", 81 Help: "Histogram of seconds between triggering event and ProwJob creation time.", 82 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600, 750, 900, 1050, 1200}, 83 }, []string{ 84 "org", 85 // We would normally omit 'repo' to avoid excessive cardinality due to the number of buckets, but we need the data. 86 // Hopefully this isn't excessive enough to cause metric scraping issues. 87 "repo", 88 }), 89 triggerHelpLatency: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 90 Name: "gerrit_trigger_help_latency", 91 Help: "Histogram of seconds between triggering event (help) and ProwJob creation time.", 92 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 60, 120, 180, 300, 450, 600, 750, 900, 1050, 1200}, 93 }, []string{ 94 "org", 95 }), 96 processSingleChangeDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 97 Name: "gerrit_process_single_change_duration", 98 Help: "Histogram of seconds spent processing a single gerrit change, by instance and repo.", 99 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600}, 100 }, []string{ 101 "org", 102 "repo", 103 }), 104 changeProcessDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 105 Name: "gerrit_instance_process_duration", 106 Help: "Histogram of seconds spent processing changes, by instance and repo. This measures the portion of a sync after we've queried for changes.", 107 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600, 750, 900, 1050, 1200}, 108 }, []string{ 109 "org", "repo", 110 }), 111 changeSyncDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 112 Name: "gerrit_instance_change_sync_duration", 113 Help: "Histogram of seconds spent syncing changes from a single gerrit instance or repo. Includes gerrit_repo_query_duration and gerrit_instance_process_duration.", 114 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 180, 300, 450, 600, 750, 900, 1050, 1200}, 115 }, []string{"org", "repo"}), 116 gerritRepoQueryDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 117 Name: "gerrit_repo_query_duration", 118 Help: "Histogram of seconds spent querying a repo's changes. Includes time spent for rate limiting ourselves.", 119 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 240}, 120 }, []string{"org", "repo", "result"}), 121 pickupChangeLatency: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 122 Name: "gerrit_pickup_change_latency", 123 Help: "Histogram of seconds a query result had to wait after it was retrieved from the Gerrit API but before it was picked up for processing by a worker thread.", 124 Buckets: []float64{0.5, 1, 2, 5, 10, 20, 30, 45, 60, 90, 120, 240}, 125 }, []string{"org", "repo"}), 126 jobCreationDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 127 Name: "gerrit_job_creation_duration", 128 Help: "Histogram of seconds spent creating a ProwJob object in the K8s API server of the Prow service cluster, by instance and repo.", 129 Buckets: []float64{0.1, 0.2, 0.5, 0.75, 1, 2, 5, 7.5, 10, 15, 20}, 130 }, []string{ 131 "org", 132 "repo", 133 }), 134 } 135 136 func init() { 137 prometheus.MustRegister(gerritMetrics.processingResults) 138 prometheus.MustRegister(gerritMetrics.inrepoconfigResults) 139 prometheus.MustRegister(gerritMetrics.triggerLatency) 140 prometheus.MustRegister(gerritMetrics.triggerHelpLatency) 141 prometheus.MustRegister(gerritMetrics.processSingleChangeDuration) 142 prometheus.MustRegister(gerritMetrics.changeProcessDuration) 143 prometheus.MustRegister(gerritMetrics.changeSyncDuration) 144 prometheus.MustRegister(gerritMetrics.gerritRepoQueryDuration) 145 prometheus.MustRegister(gerritMetrics.pickupChangeLatency) 146 prometheus.MustRegister(gerritMetrics.jobCreationDuration) 147 } 148 149 type prowJobClient interface { 150 Create(context.Context, *prowapi.ProwJob, metav1.CreateOptions) (*prowapi.ProwJob, error) 151 } 152 153 type gerritClient interface { 154 ApplyGlobalConfig(orgRepoConfigGetter func() *config.GerritOrgRepoConfigs, lastSyncTracker *client.SyncTime, cookiefilePath, tokenPathOverride string, additionalFunc func()) 155 Authenticate(cookiefilePath, tokenPath string) 156 QueryChangesForProject(instance, project string, lastUpdate time.Time, rateLimit int, additionalFilters ...string) ([]gerrit.ChangeInfo, error) 157 GetBranchRevision(instance, project, branch string) (string, error) 158 SetReview(instance, id, revision, message string, labels map[string]string) error 159 Account(instance string) (*gerrit.AccountInfo, error) 160 HasRelatedChanges(instance, id, revision string) (bool, error) 161 } 162 163 // Controller manages gerrit changes. 164 type Controller struct { 165 config config.Getter 166 prowJobClient prowJobClient 167 gc gerritClient 168 tracker LastSyncTracker 169 projectsOptOutHelp map[string]sets.Set[string] 170 lock sync.RWMutex 171 cookieFilePath string 172 configAgent *config.Agent 173 inRepoConfigGetter config.InRepoConfigGetter 174 inRepoConfigFailuresTracker map[string]bool 175 projectsWithWorker map[string]bool 176 latestMux sync.Mutex 177 workerPoolSize int 178 } 179 180 type LastSyncTracker interface { 181 Current() client.LastSyncState 182 Update(client.LastSyncState) error 183 } 184 185 // NewController returns a new gerrit controller client 186 func NewController(ctx context.Context, prowJobClient prowv1.ProwJobInterface, op io.Opener, 187 ca *config.Agent, cookiefilePath, tokenPathOverride, lastSyncFallback string, workerPoolSize int, maxQPS, maxBurst int, ircg config.InRepoConfigGetter) *Controller { 188 189 cfg := ca.Config 190 projectsOptOutHelpMap := map[string]sets.Set[string]{} 191 if cfg().Gerrit.OrgReposConfig != nil { 192 projectsOptOutHelpMap = cfg().Gerrit.OrgReposConfig.OptOutHelpRepos() 193 } 194 lastSyncTracker := client.NewSyncTime(lastSyncFallback, op, ctx) 195 196 if err := lastSyncTracker.Init(cfg().Gerrit.OrgReposConfig.AllRepos()); err != nil { 197 logrus.WithError(err).Fatal("Error initializing lastSyncFallback.") 198 } 199 gerritClient, err := client.NewClient(nil, maxQPS, maxBurst) 200 if err != nil { 201 logrus.WithError(err).Fatal("Error creating gerrit client.") 202 } 203 c := &Controller{ 204 prowJobClient: prowJobClient, 205 config: cfg, 206 gc: gerritClient, 207 tracker: lastSyncTracker, 208 projectsOptOutHelp: projectsOptOutHelpMap, 209 cookieFilePath: cookiefilePath, 210 configAgent: ca, 211 inRepoConfigGetter: ircg, 212 inRepoConfigFailuresTracker: map[string]bool{}, 213 projectsWithWorker: make(map[string]bool), 214 workerPoolSize: workerPoolSize, 215 } 216 217 // applyGlobalConfig reads gerrit configurations from global gerrit config, 218 // it will completely override previously configured gerrit hosts and projects. 219 // it will also by the way authenticate gerrit 220 orgRepoConfigGetter := func() *config.GerritOrgRepoConfigs { 221 return cfg().Gerrit.OrgReposConfig 222 } 223 c.gc.ApplyGlobalConfig(orgRepoConfigGetter, lastSyncTracker, cookiefilePath, tokenPathOverride, func() { 224 orgReposConfig := orgRepoConfigGetter() 225 if orgReposConfig == nil { 226 return 227 } 228 c.lock.Lock() 229 // Updates a map, lock to make sure it's thread safe. 230 c.projectsOptOutHelp = orgReposConfig.OptOutHelpRepos() 231 c.lock.Unlock() 232 }) 233 234 // Authenticate creates a goroutine for rotating token secrets when called the first 235 // time, afterwards it only authenticate once. 236 // applyGlobalConfig calls authenticate only when global gerrit config presents, 237 // call it here is required for cases where gerrit repos are defined as command 238 // line arg(which is going to be deprecated). 239 c.gc.Authenticate(cookiefilePath, tokenPathOverride) 240 241 return c 242 } 243 244 type Change struct { 245 changeInfo gerrit.ChangeInfo 246 instance string 247 created time.Time 248 } 249 250 func (c *Controller) processChange(latest client.LastSyncState, changeChan <-chan Change, log *logrus.Entry, wg *sync.WaitGroup, lastProjectSyncTime time.Time) { 251 for changeStruct := range changeChan { 252 change := changeStruct.changeInfo 253 instance := changeStruct.instance 254 gerritMetrics.pickupChangeLatency.WithLabelValues(instance, change.Project).Observe(float64(time.Since(changeStruct.created).Seconds())) 255 256 log := log.WithFields(logrus.Fields{ 257 "branch": change.Branch, 258 "change": change.Number, 259 "repo": change.Project, 260 "revision": change.CurrentRevision, 261 }) 262 263 now := time.Now() 264 265 result := client.ResultSuccess 266 if c.shouldTriggerJobs(change, lastProjectSyncTime) { 267 if err := c.triggerJobs(log, instance, change); err != nil { 268 result = client.ResultError 269 log.WithError(err).Info("Failed to trigger jobs based on change") 270 } 271 } else { 272 log.Info("Skipped triggering jobs for this change.") 273 } 274 gerritMetrics.processingResults.WithLabelValues(instance, change.Project, result).Inc() 275 276 c.latestMux.Lock() 277 lastTime, ok := latest[instance][change.Project] 278 if !ok || lastTime.Before(change.Updated.Time) { 279 lastTime = change.Updated.Time 280 latest[instance][change.Project] = lastTime 281 } 282 c.latestMux.Unlock() 283 wg.Done() 284 285 gerritMetrics.processSingleChangeDuration.WithLabelValues(instance, change.Project).Observe(float64(time.Since(now).Seconds())) 286 } 287 } 288 289 func (c *Controller) processSingleProject(instance, project string) { 290 // Assumes the passed in instance was already normalized with https:// prefix. 291 log := logrus.WithFields(logrus.Fields{"host": instance, "repo": project}) 292 tracker := c.tracker.Current() 293 syncTime := time.Now() 294 if projects, ok := tracker[instance]; ok { 295 if t, ok := projects[project]; ok { 296 syncTime = t 297 } 298 } 299 latest := tracker.DeepCopy() 300 301 now := time.Now() 302 defer func() { 303 gerritMetrics.changeSyncDuration.WithLabelValues(instance, project).Observe(float64(time.Since(now).Seconds())) 304 }() 305 306 timeQueryChangesForProject := time.Now() 307 308 // Ignore the error. It is already logged. 309 changes, err := c.gc.QueryChangesForProject(instance, project, syncTime, c.config().Gerrit.RateLimit) 310 queryResult := func() string { 311 if err == nil { 312 return client.ResultSuccess 313 } 314 return client.ResultError 315 }() 316 log = log.WithFields(logrus.Fields{ 317 "lastUpdate": syncTime.String(), 318 "queryStart": timeQueryChangesForProject.String(), 319 "queryDuration": time.Since(timeQueryChangesForProject).String(), 320 "changeCount": len(changes), 321 "result": queryResult, 322 }) 323 gerritMetrics.gerritRepoQueryDuration.WithLabelValues(instance, project, queryResult).Observe((float64(time.Since(timeQueryChangesForProject).Seconds()))) 324 checkAndLogQuery(log, changes) 325 326 if len(changes) == 0 { 327 return 328 } 329 330 timeProcessChangesForProject := time.Now() 331 var wg sync.WaitGroup 332 wg.Add(len(changes)) 333 changeChan := make(chan Change) 334 335 poolSize := c.workerPoolSize 336 if poolSize > len(changes) { 337 poolSize = len(changes) 338 } 339 for i := 0; i < poolSize; i++ { 340 go c.processChange(latest, changeChan, log, &wg, syncTime) 341 } 342 // We need to call time.Now() outside this loop since <- will block 343 // while there are no more available worker threads possibly causing 344 // time.Now() to be called later than intended. 345 timeChangesCreated := time.Now() 346 for _, change := range changes { 347 changeChan <- Change{changeInfo: change, instance: instance, created: timeChangesCreated} 348 } 349 wg.Wait() 350 gerritMetrics.changeProcessDuration.WithLabelValues(instance, project).Observe((float64(time.Since(timeProcessChangesForProject).Seconds()))) 351 close(changeChan) 352 c.tracker.Update(latest) 353 } 354 355 func checkAndLogQuery(log *logrus.Entry, changes []gerrit.ChangeInfo) { 356 seen := sets.NewInt() 357 for _, change := range changes { 358 if seen.Has(change.Number) { 359 log.WithField("change", change.Number).Error("Gerrit API bug! Received multiple updates for a change from a single query.") 360 } 361 seen.Insert(change.Number) 362 } 363 log.Infof("Query returned changes: %v", seen.List()) 364 } 365 366 // Sync looks for newly made gerrit changes 367 // and creates prowjobs according to specs 368 func (c *Controller) Sync() { 369 // Identify projects without worker threads 370 id := func(instance, project string) string { return fmt.Sprintf("%s/%s", instance, project) } 371 needsWorker := map[string][]string{} 372 needsWorkerCount := map[string]int{} 373 for instance, projects := range c.config().Gerrit.OrgReposConfig.AllRepos() { 374 for project := range projects { 375 if _, ok := c.projectsWithWorker[id(instance, project)]; ok { 376 // The worker thread is already up for this project, nothing needs 377 // to be done. 378 continue 379 } 380 needsWorker[instance] = append(needsWorker[instance], project) 381 needsWorkerCount[instance]++ 382 } 383 } 384 // First time seeing these projects, spin up worker threads for them. 385 staggerPosition := 0 386 for instance, projects := range needsWorker { 387 staggerIncement := c.config().Gerrit.TickInterval.Duration / time.Duration(needsWorkerCount[instance]) 388 for _, project := range projects { 389 c.projectsWithWorker[id(instance, project)] = true 390 logrus.WithFields(logrus.Fields{"instance": instance, "repo": project}).Info("Starting worker for project.") 391 go func(instance, project string, staggerPosition int) { 392 // Stagger new worker threads across the loop period to reduce load on the Gerrit API and Git server. 393 napTime := staggerIncement * time.Duration(staggerPosition) 394 time.Sleep(napTime) 395 396 // Now start the repo worker thread. 397 previousRun := time.Now() 398 for { 399 timeDiff := time.Until(previousRun.Add(c.config().Gerrit.TickInterval.Duration)) 400 if timeDiff > 0 { 401 time.Sleep(timeDiff) 402 } 403 previousRun = time.Now() 404 c.processSingleProject(instance, project) 405 } 406 }(instance, project, staggerPosition) 407 staggerPosition++ 408 } 409 } 410 } 411 412 // CreateRefs creates refs for a presubmit job from given changes. 413 // 414 // Passed in instance must contain https:// prefix. 415 func CreateRefs(instance, project, branch, baseSHA string, changes ...client.ChangeInfo) (prowapi.Refs, error) { 416 var refs prowapi.Refs 417 cloneURI := source.CloneURIFromOrgRepo(instance, project) 418 419 // Something like https://android.googlesource.com 420 codeHost := source.EnsureCodeURL(instance) 421 422 refs = prowapi.Refs{ 423 Org: instance, // Something like android-review.googlesource.com 424 Repo: project, // Something like platform/build 425 BaseRef: branch, 426 BaseSHA: baseSHA, 427 CloneURI: cloneURI, // Something like https://android-review.googlesource.com/platform/build 428 RepoLink: fmt.Sprintf("%s/%s", codeHost, project), 429 BaseLink: fmt.Sprintf("%s/%s/+/%s", codeHost, project, baseSHA), 430 } 431 for _, change := range changes { 432 rev, ok := change.Revisions[change.CurrentRevision] 433 if !ok { 434 return prowapi.Refs{}, fmt.Errorf("cannot find current revision for change %v", change.ID) 435 } 436 refs.Pulls = append(refs.Pulls, prowapi.Pull{ 437 Number: change.Number, 438 Author: rev.Commit.Author.Name, 439 SHA: change.CurrentRevision, 440 Ref: rev.Ref, 441 Link: fmt.Sprintf("%s/c/%s/+/%d", instance, change.Project, change.Number), 442 CommitLink: fmt.Sprintf("%s/%s/+/%s", codeHost, change.Project, change.CurrentRevision), 443 AuthorLink: fmt.Sprintf("%s/q/%s", instance, rev.Commit.Author.Email), 444 }) 445 } 446 return refs, nil 447 } 448 449 func LabelsAndAnnotations(instance string, jobLabels, jobAnnotations map[string]string, changes ...client.ChangeInfo) (labels, annotations map[string]string) { 450 labels, annotations = make(map[string]string), make(map[string]string) 451 for k, v := range jobLabels { 452 labels[k] = v 453 } 454 for k, v := range jobAnnotations { 455 annotations[k] = v 456 } 457 annotations[kube.GerritInstance] = instance 458 459 // Labels required for Crier reporting back to Gerrit, batch jobs are not 460 // expected to report so only add when there is a single change. 461 if len(changes) == 1 { 462 change := changes[0] 463 labels[kube.GerritRevision] = change.CurrentRevision 464 labels[kube.GerritPatchset] = strconv.Itoa(change.Revisions[change.CurrentRevision].Number) 465 if _, ok := labels[kube.GerritReportLabel]; !ok { 466 logrus.Debug("Job uses default value of 'Code-Review' for 'prow.k8s.io/gerrit-report-label' label. This default will removed in March 2022.") 467 labels[kube.GerritReportLabel] = client.CodeReview 468 } 469 470 annotations[kube.GerritID] = change.ID 471 } 472 473 return 474 } 475 476 // failedJobs find jobs currently reported as failing (used for retesting). 477 // 478 // Failing means the job is complete and not passing. 479 // Scans messages for prow reports, which lists jobs and whether they passed. 480 // Job is included in the set if the latest report has it failing. 481 func failedJobs(account int, revision int, messages ...gerrit.ChangeMessageInfo) sets.Set[string] { 482 failures := sets.Set[string]{} 483 times := map[string]time.Time{} 484 for _, message := range messages { 485 if message.Author.AccountID != account { // Ignore reports from other accounts 486 continue 487 } 488 if message.RevisionNumber != revision { // Ignore reports for old commits 489 continue 490 } 491 // TODO(fejta): parse triggered job reports and remove from failure set. 492 // (alternatively refactor this whole process rely less on fragile string parsing) 493 report := reporter.ParseReport(message.Message) 494 if report == nil { 495 continue 496 } 497 for _, job := range report.Jobs { 498 name := job.Name 499 if latest, present := times[name]; present && message.Date.Before(latest) { 500 continue 501 } 502 times[name] = message.Date.Time 503 if job.State == prowapi.FailureState || job.State == prowapi.ErrorState || job.State == prowapi.AbortedState { 504 failures.Insert(name) 505 } else { 506 failures.Delete(name) 507 } 508 } 509 } 510 return failures 511 } 512 513 func (c *Controller) handleInRepoConfigError(err error, instance string, change gerrit.ChangeInfo) error { 514 key := fmt.Sprintf("%s%s%s", instance, change.ID, change.CurrentRevision) 515 if err != nil { 516 // Only report back to Gerrit if we have not reported previously. 517 // If any new `/test` commands are given and fail for the same reason we won't post another error message 518 // which can be confusing to users. This behavior is to prevent us from reporting the failure again 519 // on unrelated comments (including the error message itself!), but we don't need this behavior if 520 // we don't process irrelevant comments which is the case if AllowedPresubmitTriggerRe is specified. 521 skipIrrelevantComments := c.config().Gerrit.AllowedPresubmitTriggerReRawString != "" 522 if _, alreadyReported := c.inRepoConfigFailuresTracker[key]; !alreadyReported || skipIrrelevantComments { 523 msg := fmt.Sprintf("%s: %v", inRepoConfigFailed, err) 524 if setReviewWerr := c.gc.SetReview(instance, change.ID, change.CurrentRevision, msg, nil); setReviewWerr != nil { 525 return fmt.Errorf("failed to get inRepoConfig and failed to set Review to notify user: %v and %v", err, setReviewWerr) 526 } 527 // The boolean value here is meaningless as we use the tracker as a 528 // set data structure, not as a hashmap where values actually 529 // matter. We just use a bool for simplicity. 530 c.inRepoConfigFailuresTracker[key] = true 531 } 532 533 // We do not want to return that there was an error processing change. If we are unable to get inRepoConfig we do not process. This is expected behavior. 534 return nil 535 } 536 537 // If we are passing now, remove any record of previous failures in our 538 // tracker to allow future failures to send an error message back to Gerrit 539 // (through this same function). 540 delete(c.inRepoConfigFailuresTracker, key) 541 return nil 542 } 543 544 // shouldTriggerJobs returns true if we should trigger jobs for the given 545 // change. 546 func (c *Controller) shouldTriggerJobs(change client.ChangeInfo, lastProjectSyncTime time.Time) bool { 547 // do not skip postsubmit jobs 548 if change.Status == client.Merged { 549 return true 550 } 551 revision := change.Revisions[change.CurrentRevision] 552 if revision.Created.After(lastProjectSyncTime) { 553 return true 554 } 555 556 for _, message := range currentMessages(change, lastProjectSyncTime) { 557 if c.messageContainsJobTriggeringCommand(message) { 558 return true 559 } 560 if indicatesChangeFromDraftToActiveState(message.Message) { 561 return true 562 } 563 } 564 565 return false 566 } 567 568 func (c *Controller) messageContainsJobTriggeringCommand(message gerrit.ChangeMessageInfo) bool { 569 return pjutil.RetestRe.MatchString(message.Message) || 570 pjutil.TestAllRe.MatchString(message.Message) || 571 c.configAgent.Config().Gerrit.IsAllowedPresubmitTrigger(message.Message) 572 } 573 574 // triggerJobs creates new presubmit/postsubmit prowjobs base off the gerrit changes 575 func (c *Controller) triggerJobs(logger logrus.FieldLogger, instance string, change client.ChangeInfo) error { 576 cloneURI := source.CloneURIFromOrgRepo(instance, change.Project) 577 baseSHA, err := c.gc.GetBranchRevision(instance, change.Project, change.Branch) 578 if err != nil { 579 return fmt.Errorf("GetBranchRevision: %w", err) 580 } 581 582 type triggeredJob struct { 583 name string 584 report bool 585 } 586 var triggeredJobs []triggeredJob 587 triggerTimes := map[string]time.Time{} 588 589 refs, err := CreateRefs(instance, change.Project, change.Branch, baseSHA, change) 590 if err != nil { 591 return fmt.Errorf("createRefs from %s at %s: %w", cloneURI, baseSHA, err) 592 } 593 594 type jobSpec struct { 595 spec prowapi.ProwJobSpec 596 labels map[string]string 597 annotations map[string]string 598 } 599 var jobSpecs []jobSpec 600 baseSHAGetter := func() (string, error) { return baseSHA, nil } 601 var hasRelatedChanges *bool 602 // This headSHAGetter will return the empty string instead of the head SHA in cases where we can be certain that change does not 603 // modify inrepoconfig. This allows multiple changes to share a ProwYAML cache entry so long as they don't touch inrepo config themselves. 604 headSHAGetter := func() (string, error) { 605 changes, err := client.ChangedFilesProvider(&change)() 606 if err != nil { 607 // This is a best effort optimization, log the error, but just use CurrentRevision in this case. 608 logger.WithError(err).Info("Failed to get changed files for the purpose of prowYAML cache optimization. Skipping optimization.") 609 return change.CurrentRevision, nil 610 } 611 if config.ContainsInRepoConfigPath(changes) { 612 return change.CurrentRevision, nil 613 } 614 if hasRelatedChanges == nil { 615 if res, err := c.gc.HasRelatedChanges(instance, change.ChangeID, change.CurrentRevision); err != nil { 616 logger.WithError(err).Info("Failed to get related changes for the purpose of prowYAML cache optimization. Skipping optimization.") 617 return change.CurrentRevision, nil 618 } else { 619 hasRelatedChanges = &res 620 } 621 } 622 if *hasRelatedChanges { 623 // If the change is part of a chain the commit may include files not identified by the API. 624 // So we can't easily check if the change includes inrepo config file changes. 625 return change.CurrentRevision, nil 626 } 627 // If we know the change doesn't touch the inrepo config itself, we don't need to check out the head commits. 628 // This is particularly useful because it lets multiple changes share a ProwYAML cache entry so long as they don't touch inrepo config themselves. 629 return "", nil 630 } 631 632 switch change.Status { 633 case client.Merged: 634 var postsubmits []config.Postsubmit 635 // Gerrit server might be unavailable intermittently, retry inrepoconfig 636 // processing for increased reliability. 637 for attempt := 0; attempt < inRepoConfigRetries; attempt++ { 638 postsubmits, err = c.inRepoConfigGetter.GetPostsubmits(cloneURI, change.Branch, baseSHAGetter, headSHAGetter) 639 // Break if there was no error, or if there was a merge conflict 640 if err == nil { 641 gerritMetrics.inrepoconfigResults.WithLabelValues(instance, change.Project, client.ResultSuccess).Inc() 642 break 643 } 644 if strings.Contains(err.Error(), "Merge conflict in") { 645 break 646 } 647 } 648 // Postsubmit jobs are triggered only once. Still try to fall back on 649 // static jobs if failed to retrieve inrepoconfig jobs. 650 if err != nil { 651 gerritMetrics.inrepoconfigResults.WithLabelValues(instance, change.Project, client.ResultError).Inc() 652 653 // Reports error back to Gerrit. handleInRepoConfigError is 654 // responsible for not sending the same message again and again on 655 // the same commit. 656 if postErr := c.handleInRepoConfigError(err, instance, change); postErr != nil { 657 logger.WithError(postErr).Error("Failed reporting inrepoconfig processing error back to Gerrit.") 658 } 659 // Static postsubmit jobs are included as part of output from 660 // inRepoConfigCache.GetPostsubmits, fallback to static only 661 // when inrepoconfig failed. 662 postsubmits = append(postsubmits, c.config().GetPostsubmitsStatic(cloneURI)...) 663 } 664 665 for _, postsubmit := range postsubmits { 666 if shouldRun, err := postsubmit.ShouldRun(change.Branch, client.ChangedFilesProvider(&change)); err != nil { 667 return fmt.Errorf("failed to determine if postsubmit %q should run: %w", postsubmit.Name, err) 668 } else if shouldRun { 669 if change.Submitted != nil { 670 triggerTimes[postsubmit.Name] = change.Submitted.Time 671 } 672 jobSpecs = append(jobSpecs, jobSpec{ 673 spec: pjutil.PostsubmitSpec(postsubmit, refs), 674 labels: postsubmit.Labels, 675 annotations: postsubmit.Annotations, 676 }) 677 } 678 } 679 case client.New: 680 var presubmits []config.Presubmit 681 // Gerrit server might be unavailable intermittently, retry inrepoconfig 682 // processing for increased reliability. 683 for attempt := 0; attempt < inRepoConfigRetries; attempt++ { 684 presubmits, err = c.inRepoConfigGetter.GetPresubmits(cloneURI, change.Branch, baseSHAGetter, headSHAGetter) 685 if err == nil { 686 break 687 } 688 } 689 if err != nil { 690 // Reports error back to Gerrit. handleInRepoConfigError is 691 // responsible for not sending the same message again and again on 692 // the same commit. 693 if postErr := c.handleInRepoConfigError(err, instance, change); postErr != nil { 694 logger.WithError(postErr).Error("Failed reporting inrepoconfig processing error back to Gerrit.") 695 } 696 // There is no need to keep going when failed to get inrepoconfig 697 // jobs. 698 // Imagining the scenario that: 699 // - Commit #abc triggered static job job-A, inrepoconfig jobs job-B 700 // and job-C 701 // - Both job-B and job-C failed 702 // - Commit #def was pushed. Inrepoconfig failed, falling back to 703 // trigger static job job-A. 704 // - job-A passed. 705 // - Prow would make decision on the result of job-A and ignore the 706 // rest. (Yes this is a Prow bug, which should not be a problem when 707 // each prowjob is reported to an individual Gerrit Check). 708 // So long story short: kicking off partial prowjobs is worse than 709 // kicking off nothing. 710 return err 711 } 712 713 account, err := c.gc.Account(instance) 714 if err != nil { 715 // This would happen if authenticateOnce hasn't done register this instance yet 716 return fmt.Errorf("account not found for %q: %w", instance, err) 717 } 718 719 lastUpdate, ok := c.tracker.Current()[instance][change.Project] 720 if !ok { 721 lastUpdate = time.Now() 722 logger.WithField("lastUpdate", lastUpdate).Warnf("lastUpdate not found, falling back to now") 723 } 724 725 revision := change.Revisions[change.CurrentRevision] 726 failedJobs := failedJobs(account.AccountID, revision.Number, change.Messages...) 727 failed, all := presubmitContexts(failedJobs, presubmits, logger) 728 messages := currentMessages(change, lastUpdate) 729 logger.WithField("failed", len(failed)).Debug("Failed jobs parsed from previous comments.") 730 filters := []pjutil.Filter{ 731 messageFilter(messages, failed, all, triggerTimes, logger), 732 } 733 // Automatically trigger the Prow jobs if the revision is new and the 734 // change is not in WorkInProgress. 735 if revision.Created.Time.After(lastUpdate) && !change.WorkInProgress { 736 filters = append(filters, &timeAnnotationFilter{ 737 Filter: pjutil.NewTestAllFilter(), 738 eventTime: revision.Created.Time, 739 triggerTimes: triggerTimes, 740 }) 741 } 742 toTrigger, err := pjutil.FilterPresubmits(pjutil.NewAggregateFilter(filters), client.ChangedFilesProvider(&change), change.Branch, presubmits, logger) 743 if err != nil { 744 return fmt.Errorf("filter presubmits: %w", err) 745 } 746 // At this point triggerTimes should be properly populated as a side effect of FilterPresubmits. 747 748 // Reply with help information to run the presubmit Prow jobs if requested. 749 for _, msg := range messages { 750 needsHelp, note := pjutil.ShouldRespondWithHelp(msg.Message, len(toTrigger)) 751 // Lock for projectOptOutHelp, which is a map. 752 c.lock.RLock() 753 optedOut := isProjectOptOutHelp(c.projectsOptOutHelp, instance, change.Project) 754 c.lock.RUnlock() 755 if needsHelp && !optedOut { 756 runWithTestAllNames, optionalJobsCommands, requiredJobsCommands, err := pjutil.AvailablePresubmits(client.ChangedFilesProvider(&change), change.Branch, presubmits, logger.WithField("help", true)) 757 if err != nil { 758 return err 759 } 760 message := pjutil.HelpMessage(instance, change.Project, change.Branch, note, runWithTestAllNames, optionalJobsCommands, requiredJobsCommands) 761 if err := c.gc.SetReview(instance, change.ID, change.CurrentRevision, message, nil); err != nil { 762 return err 763 } 764 gerritMetrics.triggerHelpLatency.WithLabelValues(instance).Observe(float64(time.Since(msg.Date.Time).Seconds())) 765 // Only respond to the first message that requests help information. 766 break 767 } 768 } 769 770 for _, presubmit := range toTrigger { 771 jobSpecs = append(jobSpecs, jobSpec{ 772 spec: pjutil.PresubmitSpec(presubmit, refs), 773 labels: presubmit.Labels, 774 annotations: presubmit.Annotations, 775 }) 776 } 777 } 778 779 schedulerEnabled := c.config().Scheduler.Enabled 780 781 for _, jSpec := range jobSpecs { 782 labels, annotations := LabelsAndAnnotations(instance, jSpec.labels, jSpec.annotations, change) 783 784 pj := pjutil.NewProwJob(jSpec.spec, labels, annotations, pjutil.RequireScheduling(schedulerEnabled)) 785 786 logger := logger.WithField("prowjob", pj.Name) 787 timeBeforeCreate := time.Now() 788 if _, err := c.prowJobClient.Create(context.TODO(), &pj, metav1.CreateOptions{}); err != nil { 789 logger.WithError(err).Errorf("Failed to create ProwJob") 790 continue 791 } 792 gerritMetrics.jobCreationDuration.WithLabelValues(instance, change.Project).Observe((float64(time.Since(timeBeforeCreate).Seconds()))) 793 logger.Infof("Triggered new job") 794 if eventTime, ok := triggerTimes[pj.Spec.Job]; ok { 795 gerritMetrics.triggerLatency.WithLabelValues(instance, change.Project).Observe(float64(time.Since(eventTime).Seconds())) 796 } 797 triggeredJobs = append(triggeredJobs, triggeredJob{ 798 name: jSpec.spec.Job, 799 report: jSpec.spec.Report, 800 }) 801 } 802 803 if len(triggeredJobs) == 0 { 804 return nil 805 } 806 807 // comment back to gerrit if Report is set for any of the jobs 808 var reportingJobs int 809 var jobList string 810 for _, job := range triggeredJobs { 811 if job.report { 812 jobList += fmt.Sprintf("\n * Name: %s", job.name) 813 reportingJobs++ 814 } 815 } 816 817 if reportingJobs > 0 { 818 message := fmt.Sprintf("Triggered %d prow jobs (%d suppressed reporting): ", len(triggeredJobs), len(triggeredJobs)-reportingJobs) 819 // If we have a Deck URL, link to all results for the CL, otherwise list the triggered jobs. 820 link, err := deckLinkForPR(c.config().Gerrit.DeckURL, refs, change.Status) 821 if err != nil { 822 logger.WithError(err).Error("Failed to generate link to job results on Deck.") 823 } 824 if link != "" && err == nil { 825 message = message + link 826 } else { 827 message = message + jobList 828 } 829 if err := c.gc.SetReview(instance, change.ID, change.CurrentRevision, message, nil); err != nil { 830 return err 831 } 832 } 833 834 return nil 835 } 836 837 // isProjectOptOutHelp returns if the project is opt-out from getting help 838 // information about how to run presubmit tests on their changes. 839 func isProjectOptOutHelp(projectsOptOutHelp map[string]sets.Set[string], instance, project string) bool { 840 ps, ok := projectsOptOutHelp[instance] 841 if !ok { 842 return false 843 } 844 return ps.Has(project) 845 } 846 847 func deckLinkForPR(deckURL string, refs prowapi.Refs, changeStatus string) (string, error) { 848 if deckURL == "" || changeStatus == client.Merged { 849 return "", nil 850 } 851 852 parsed, err := url.Parse(deckURL) 853 if err != nil { 854 return "", fmt.Errorf("failed to parse gerrit.deck_url (impossible: this should have been caught at load time): %w", err) 855 } 856 query := parsed.Query() 857 query.Set("repo", fmt.Sprintf("%s/%s", refs.Org, refs.Repo)) 858 if len(refs.Pulls) != 1 { 859 return "", fmt.Errorf("impossible: triggered jobs for a Gerrit change, but refs.pulls was empty") 860 } 861 query.Set("pull", strconv.Itoa(refs.Pulls[0].Number)) 862 parsed.RawQuery = query.Encode() 863 return parsed.String(), nil 864 }