sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/tide/tide.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package tide contains a controller for managing a tide pool of PRs. The 18 // controller will automatically retest PRs in the pool and merge them if they 19 // pass tests. 20 package tide 21 22 import ( 23 "context" 24 "encoding/json" 25 "errors" 26 "fmt" 27 "net/http" 28 "sort" 29 "strconv" 30 "strings" 31 "sync" 32 "time" 33 34 utilerrors "k8s.io/apimachinery/pkg/util/errors" 35 36 "github.com/prometheus/client_golang/prometheus" 37 githubql "github.com/shurcooL/githubv4" 38 "github.com/sirupsen/logrus" 39 "k8s.io/apimachinery/pkg/util/sets" 40 ctrlruntimeclient "sigs.k8s.io/controller-runtime/pkg/client" 41 42 prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1" 43 "sigs.k8s.io/prow/pkg/config" 44 "sigs.k8s.io/prow/pkg/git/v2" 45 "sigs.k8s.io/prow/pkg/github" 46 "sigs.k8s.io/prow/pkg/io" 47 "sigs.k8s.io/prow/pkg/kube" 48 "sigs.k8s.io/prow/pkg/pjutil" 49 "sigs.k8s.io/prow/pkg/tide/blockers" 50 "sigs.k8s.io/prow/pkg/tide/history" 51 _ "sigs.k8s.io/prow/pkg/version" 52 ) 53 54 // For mocking out sleep during unit tests. 55 var sleep = time.Sleep 56 57 type githubClient interface { 58 CreateStatus(string, string, string, github.Status) error 59 GetCombinedStatus(org, repo, ref string) (*github.CombinedStatus, error) 60 ListCheckRuns(org, repo, ref string) (*github.CheckRunList, error) 61 GetPullRequestChanges(org, repo string, number int) ([]github.PullRequestChange, error) 62 GetRef(string, string, string) (string, error) 63 GetRepo(owner, name string) (github.FullRepo, error) 64 Merge(string, string, int, github.MergeDetails) error 65 QueryWithGitHubAppsSupport(ctx context.Context, q interface{}, vars map[string]interface{}, org string) error 66 } 67 68 type contextChecker interface { 69 // IsOptional tells whether a context is optional. 70 IsOptional(string) bool 71 // MissingRequiredContexts tells if required contexts are missing from the list of contexts provided. 72 MissingRequiredContexts([]string) []string 73 } 74 75 // Controller knows how to sync PRs and PJs. 76 type syncController struct { 77 ctx context.Context 78 logger *logrus.Entry 79 config config.Getter 80 prowJobClient ctrlruntimeclient.Client 81 provider provider 82 pickNewBatch func(sp subpool, candidates []CodeReviewCommon, maxBatchSize int) ([]CodeReviewCommon, error) 83 84 m sync.Mutex 85 pools []Pool 86 87 // changedFiles caches the names of files changed by PRs. 88 // Cache entries expire if they are not used during a sync loop. 89 changedFiles *changedFilesAgent 90 91 History *history.History 92 93 // Shared fields with status controller 94 statusUpdate *statusUpdate 95 } 96 97 // Action represents what actions the controller can take. It will take 98 // exactly one action each sync. 99 type Action string 100 101 // Constants for various actions the controller might take 102 const ( 103 Wait Action = "WAIT" 104 Trigger Action = "TRIGGER" 105 TriggerBatch Action = "TRIGGER_BATCH" 106 Merge Action = "MERGE" 107 MergeBatch Action = "MERGE_BATCH" 108 PoolBlocked Action = "BLOCKED" 109 ) 110 111 // recordableActions is the subset of actions that we keep historical record of. 112 // Ignore idle actions to avoid flooding the records with useless data. 113 var recordableActions = map[Action]bool{ 114 Trigger: true, 115 TriggerBatch: true, 116 Merge: true, 117 MergeBatch: true, 118 } 119 120 // Pool represents information about a tide pool. There is one for every 121 // org/repo/branch combination that has PRs in the pool. 122 type Pool struct { 123 Org string 124 Repo string 125 Branch string 126 127 // PRs with passing tests, pending tests, and missing or failed tests. 128 // Note that these results are rolled up. If all tests for a PR are passing 129 // except for one pending, it will be in PendingPRs. 130 SuccessPRs []CodeReviewCommon 131 PendingPRs []CodeReviewCommon 132 MissingPRs []CodeReviewCommon 133 134 // Empty if there is no pending batch. 135 BatchPending []CodeReviewCommon 136 137 // Which action did we last take, and to what target(s), if any. 138 Action Action 139 Target []CodeReviewCommon 140 Blockers []blockers.Blocker 141 Error string 142 143 // All of the TenantIDs associated with PRs in the pool. 144 TenantIDs []string 145 } 146 147 // PoolForDeck contains the same data as Pool, the only exception is that it has 148 // a minified version of CodeReviewCommon which is good for deck, as 149 // MinCodeReview is a very small superset of CodeReviewCommon. 150 type PoolForDeck struct { 151 Org string 152 Repo string 153 Branch string 154 155 // PRs with passing tests, pending tests, and missing or failed tests. 156 // Note that these results are rolled up. If all tests for a PR are passing 157 // except for one pending, it will be in PendingPRs. 158 SuccessPRs []MinCodeReviewCommon 159 PendingPRs []MinCodeReviewCommon 160 MissingPRs []MinCodeReviewCommon 161 162 // Empty if there is no pending batch. 163 BatchPending []MinCodeReviewCommon 164 165 // Which action did we last take, and to what target(s), if any. 166 Action Action 167 Target []MinCodeReviewCommon 168 Blockers []blockers.Blocker 169 Error string 170 171 // All of the TenantIDs associated with PRs in the pool. 172 TenantIDs []string 173 } 174 175 func PoolToPoolForDeck(p *Pool) *PoolForDeck { 176 crcToMin := func(crcs []CodeReviewCommon) []MinCodeReviewCommon { 177 var res []MinCodeReviewCommon 178 for _, crc := range crcs { 179 res = append(res, MinCodeReviewCommon(crc)) 180 } 181 return res 182 } 183 pfd := &PoolForDeck{ 184 Org: p.Org, 185 Repo: p.Repo, 186 Branch: p.Branch, 187 SuccessPRs: crcToMin(p.SuccessPRs), 188 PendingPRs: crcToMin(p.PendingPRs), 189 MissingPRs: crcToMin(p.MissingPRs), 190 BatchPending: crcToMin(p.BatchPending), 191 Action: p.Action, 192 Target: crcToMin(p.Target), 193 Blockers: p.Blockers, 194 Error: p.Error, 195 TenantIDs: p.TenantIDs, 196 } 197 return pfd 198 } 199 200 // Prometheus Metrics 201 var ( 202 tideMetrics = struct { 203 // Per pool 204 pooledPRs *prometheus.GaugeVec 205 updateTime *prometheus.GaugeVec 206 merges *prometheus.HistogramVec 207 poolErrors *prometheus.CounterVec 208 queryResults *prometheus.CounterVec 209 210 // Singleton 211 syncDuration prometheus.Gauge 212 statusUpdateDuration prometheus.Gauge 213 214 // Per controller 215 syncHeartbeat *prometheus.CounterVec 216 }{ 217 pooledPRs: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 218 Name: "pooledprs", 219 Help: "Number of PRs in each Tide pool.", 220 }, []string{ 221 "org", 222 "repo", 223 "branch", 224 }), 225 updateTime: prometheus.NewGaugeVec(prometheus.GaugeOpts{ 226 Name: "updatetime", 227 Help: "The last time each subpool was synced. (Used to determine 'pooledprs' freshness.)", 228 }, []string{ 229 "org", 230 "repo", 231 "branch", 232 }), 233 234 merges: prometheus.NewHistogramVec(prometheus.HistogramOpts{ 235 Name: "merges", 236 Help: "Histogram of merges where values are the number of PRs merged together.", 237 Buckets: []float64{1, 2, 3, 4, 5, 7, 10, 15, 25}, 238 }, []string{ 239 "org", 240 "repo", 241 "branch", 242 }), 243 244 poolErrors: prometheus.NewCounterVec(prometheus.CounterOpts{ 245 Name: "tidepoolerrors", 246 Help: "Count of Tide pool sync errors.", 247 }, []string{ 248 "org", 249 "repo", 250 "branch", 251 }), 252 253 queryResults: prometheus.NewCounterVec(prometheus.CounterOpts{ 254 Name: "tidequeryresults", 255 Help: "Count of Tide queries by query index, org shard, and result (success/error).", 256 }, []string{ 257 "query_index", 258 "org_shard", 259 "result", 260 }), 261 262 // Use the sync heartbeat counter to monitor for liveness. Use the duration 263 // gauges for precise sync duration graphs since the prometheus scrape 264 // period is likely much larger than the loop periods. 265 syncDuration: prometheus.NewGauge(prometheus.GaugeOpts{ 266 Name: "syncdur", 267 Help: "The duration of the last loop of the sync controller.", 268 }), 269 statusUpdateDuration: prometheus.NewGauge(prometheus.GaugeOpts{ 270 Name: "statusupdatedur", 271 Help: "The duration of the last loop of the status update controller.", 272 }), 273 274 syncHeartbeat: prometheus.NewCounterVec(prometheus.CounterOpts{ 275 Name: "tidesyncheartbeat", 276 Help: "Count of Tide syncs per controller.", 277 }, []string{ 278 "controller", 279 }), 280 } 281 ) 282 283 func init() { 284 prometheus.MustRegister(tideMetrics.pooledPRs) 285 prometheus.MustRegister(tideMetrics.updateTime) 286 prometheus.MustRegister(tideMetrics.merges) 287 prometheus.MustRegister(tideMetrics.syncDuration) 288 prometheus.MustRegister(tideMetrics.statusUpdateDuration) 289 prometheus.MustRegister(tideMetrics.syncHeartbeat) 290 prometheus.MustRegister(tideMetrics.poolErrors) 291 prometheus.MustRegister(tideMetrics.queryResults) 292 } 293 294 type manager interface { 295 GetClient() ctrlruntimeclient.Client 296 GetFieldIndexer() ctrlruntimeclient.FieldIndexer 297 } 298 299 type Controller struct { 300 syncCtrl *syncController 301 statusCtrl *statusController 302 } 303 304 // Shutdown signals the statusController to stop working and waits for it to 305 // finish its last update loop before terminating. 306 // Controller.Sync() should not be used after this function is called. 307 func (c *Controller) Shutdown() { 308 c.syncCtrl.History.Flush() 309 c.statusCtrl.shutdown() 310 } 311 312 func (c *Controller) Sync() error { 313 return c.syncCtrl.Sync() 314 } 315 316 func (c *Controller) ServeHTTP(w http.ResponseWriter, r *http.Request) { 317 c.syncCtrl.ServeHTTP(w, r) 318 } 319 320 func (c *Controller) History() *history.History { 321 return c.syncCtrl.History 322 } 323 324 // NewController makes a Controller out of the given clients. 325 func NewController( 326 ghcSync, 327 ghcStatus github.Client, 328 mgr manager, 329 cfg config.Getter, 330 gc git.ClientFactory, 331 maxRecordsPerPool int, 332 opener io.Opener, 333 historyURI, 334 statusURI string, 335 logger *logrus.Entry, 336 usesGitHubAppsAuth bool, 337 ) (*Controller, error) { 338 if logger == nil { 339 logger = logrus.NewEntry(logrus.StandardLogger()) 340 } 341 hist, err := history.New(maxRecordsPerPool, opener, historyURI) 342 if err != nil { 343 return nil, fmt.Errorf("error initializing history client from %q: %w", historyURI, err) 344 } 345 mergeChecker := newMergeChecker(cfg, ghcSync) 346 347 ctx := context.Background() 348 // Shared fields 349 350 statusUpdate := &statusUpdate{ 351 dontUpdateStatus: &threadSafePRSet{}, 352 newPoolPending: make(chan bool), 353 } 354 355 sc, err := newStatusController(ctx, logger, ghcStatus, mgr, gc, cfg, opener, statusURI, mergeChecker, usesGitHubAppsAuth, statusUpdate) 356 if err != nil { 357 return nil, err 358 } 359 go sc.run() 360 361 provider := newGitHubProvider(logger, ghcSync, gc, cfg, mergeChecker, usesGitHubAppsAuth) 362 syncCtrl, err := newSyncController(ctx, logger, mgr, provider, cfg, gc, hist, usesGitHubAppsAuth, statusUpdate) 363 if err != nil { 364 return nil, err 365 } 366 return &Controller{syncCtrl: syncCtrl, statusCtrl: sc}, nil 367 } 368 369 func newStatusController( 370 ctx context.Context, 371 logger *logrus.Entry, 372 ghc githubClient, 373 mgr manager, 374 gc git.ClientFactory, 375 cfg config.Getter, 376 opener io.Opener, 377 statusURI string, 378 mergeChecker *mergeChecker, 379 usesGitHubAppsAuth bool, 380 statusUpdate *statusUpdate, 381 ) (*statusController, error) { 382 if err := mgr.GetFieldIndexer().IndexField(ctx, &prowapi.ProwJob{}, indexNamePassingJobs, indexFuncPassingJobs); err != nil { 383 return nil, fmt.Errorf("failed to add index for passing jobs to cache: %w", err) 384 } 385 return &statusController{ 386 pjClient: mgr.GetClient(), 387 logger: logger.WithField("controller", "status-update"), 388 ghProvider: newGitHubProvider(logger, ghc, gc, cfg, mergeChecker, usesGitHubAppsAuth), 389 ghc: ghc, 390 gc: gc, 391 usesGitHubAppsAuth: usesGitHubAppsAuth, 392 config: cfg, 393 shutDown: make(chan bool), 394 opener: opener, 395 path: statusURI, 396 statusUpdate: statusUpdate, 397 }, nil 398 } 399 400 func newSyncController( 401 ctx context.Context, 402 logger *logrus.Entry, 403 mgr manager, 404 provider provider, 405 cfg config.Getter, 406 gc git.ClientFactory, 407 hist *history.History, 408 usesGitHubAppsAuth bool, 409 statusUpdate *statusUpdate, 410 ) (*syncController, error) { 411 if err := mgr.GetFieldIndexer().IndexField( 412 ctx, 413 &prowapi.ProwJob{}, 414 cacheIndexName, 415 cacheIndexFunc, 416 ); err != nil { 417 return nil, fmt.Errorf("failed to add baseSHA index to cache: %w", err) 418 } 419 if err := mgr.GetFieldIndexer().IndexField( 420 ctx, 421 &prowapi.ProwJob{}, 422 nonFailedBatchByNameBaseAndPullsIndexName, 423 nonFailedBatchByNameBaseAndPullsIndexFunc, 424 ); err != nil { 425 return nil, fmt.Errorf("failed to add index for non failed batches: %w", err) 426 } 427 428 return &syncController{ 429 ctx: ctx, 430 logger: logger.WithField("controller", "sync"), 431 prowJobClient: mgr.GetClient(), 432 config: cfg, 433 provider: provider, 434 pickNewBatch: pickNewBatch(gc, cfg, provider), 435 changedFiles: &changedFilesAgent{ 436 provider: provider, 437 nextChangeCache: make(map[changeCacheKey][]string), 438 }, 439 History: hist, 440 statusUpdate: statusUpdate, 441 }, nil 442 } 443 444 func prKey(pr *CodeReviewCommon) string { 445 return fmt.Sprintf("%s#%d", string(pr.NameWithOwner), pr.Number) 446 } 447 448 // newExpectedContext creates a Context with Expected state. 449 func newExpectedContext(c string) Context { 450 return Context{ 451 Context: githubql.String(c), 452 State: githubql.StatusStateExpected, 453 Description: githubql.String(""), 454 } 455 } 456 457 // contextsToStrings converts a list Context to a list of string 458 func contextsToStrings(contexts []Context) []string { 459 var names []string 460 for _, c := range contexts { 461 names = append(names, string(c.Context)) 462 } 463 // Sorting names improves readability of logs and simplifies unit tests. 464 sort.Strings(names) 465 return names 466 } 467 468 // Sync runs one sync iteration. 469 func (c *syncController) Sync() error { 470 start := time.Now() 471 defer func() { 472 duration := time.Since(start) 473 c.logger.WithField("duration", duration.String()).Info("Synced") 474 tideMetrics.syncDuration.Set(duration.Seconds()) 475 tideMetrics.syncHeartbeat.WithLabelValues("sync").Inc() 476 }() 477 defer c.changedFiles.prune() 478 c.config().BranchProtectionWarnings(c.logger, c.config().PresubmitsStatic) 479 480 c.logger.Debug("Building tide pool.") 481 var queryErrors []error 482 prs, err := c.provider.Query() 483 if err != nil { 484 c.logger.WithError(err).Debug("failed to query GitHub for some prs") 485 queryErrors = append(queryErrors, err) 486 } 487 c.logger.WithFields(logrus.Fields{ 488 "duration": time.Since(start).String(), 489 "found_pr_count": len(prs), 490 }).Debug("Found (unfiltered) pool PRs.") 491 492 var blocks blockers.Blockers 493 if len(prs) > 0 { 494 blocks, err = c.provider.blockers() 495 if err != nil { 496 return fmt.Errorf("failed getting blockers: %v", err) 497 } 498 } 499 // Partition PRs into subpools and filter out non-pool PRs. 500 rawPools, err := c.dividePool(prs) 501 if err != nil { 502 return err 503 } 504 filteredPools := c.filterSubpools(c.provider.isAllowedToMerge, rawPools) 505 506 // Notify statusController about the new pool. 507 c.statusUpdate.Lock() 508 c.statusUpdate.blocks = blocks 509 c.statusUpdate.poolPRs = poolPRMap(filteredPools) 510 c.statusUpdate.baseSHAs = baseSHAMap(filteredPools) 511 c.statusUpdate.requiredContexts = requiredContextsMap(filteredPools) 512 select { 513 case c.statusUpdate.newPoolPending <- true: 514 c.statusUpdate.dontUpdateStatus.reset() 515 default: 516 } 517 c.statusUpdate.Unlock() 518 519 // Sync subpools in parallel. 520 poolChan := make(chan Pool, len(filteredPools)) 521 subpoolsInParallel( 522 c.config().Tide.MaxGoroutines, 523 filteredPools, 524 func(sp *subpool) { 525 // blocks.GetApplicable will be noop if blocks is not initialized at 526 // all. This applies to both cases where there is no blocking label 527 // configured, or other source control systems that don't support 528 // blockers yet. 529 pool, err := c.syncSubpool(*sp, blocks.GetApplicable(sp.org, sp.repo, sp.branch)) 530 if err != nil { 531 tideMetrics.poolErrors.WithLabelValues(sp.org, sp.repo, sp.branch).Inc() 532 sp.log.WithError(err).Errorf("Error syncing subpool.") 533 } 534 poolChan <- pool 535 }, 536 ) 537 538 close(poolChan) 539 pools := make([]Pool, 0, len(poolChan)) 540 for pool := range poolChan { 541 pools = append(pools, pool) 542 } 543 sortPools(pools) 544 c.m.Lock() 545 c.pools = pools 546 c.m.Unlock() 547 548 c.History.Flush() 549 return utilerrors.NewAggregate(queryErrors) 550 } 551 552 func (c *syncController) ServeHTTP(w http.ResponseWriter, r *http.Request) { 553 c.m.Lock() 554 defer c.m.Unlock() 555 b, err := json.Marshal(c.pools) 556 if err != nil { 557 c.logger.WithError(err).Error("Encoding JSON.") 558 b = []byte("[]") 559 } 560 if _, err = w.Write(b); err != nil { 561 c.logger.WithError(err).Error("Writing JSON response.") 562 } 563 } 564 565 func subpoolsInParallel(goroutines int, sps map[string]*subpool, process func(*subpool)) { 566 // Load the subpools into a channel for use as a work queue. 567 queue := make(chan *subpool, len(sps)) 568 for _, sp := range sps { 569 queue <- sp 570 } 571 close(queue) 572 573 if goroutines > len(queue) { 574 goroutines = len(queue) 575 } 576 wg := &sync.WaitGroup{} 577 wg.Add(goroutines) 578 for i := 0; i < goroutines; i++ { 579 go func() { 580 defer wg.Done() 581 for sp := range queue { 582 process(sp) 583 } 584 }() 585 } 586 wg.Wait() 587 } 588 589 // filterSubpools filters non-pool PRs out of the initially identified subpools, 590 // deleting any pools that become empty. 591 // See filterSubpool for filtering details. 592 func (c *syncController) filterSubpools(mergeAllowed func(*CodeReviewCommon) (string, error), raw map[string]*subpool) map[string]*subpool { 593 filtered := make(map[string]*subpool) 594 var lock sync.Mutex 595 596 subpoolsInParallel( 597 c.config().Tide.MaxGoroutines, 598 raw, 599 func(sp *subpool) { 600 if err := c.initSubpoolData(sp); err != nil { 601 sp.log.WithError(err).Error("Error initializing subpool.") 602 return 603 } 604 key := poolKey(sp.org, sp.repo, sp.branch) 605 if spFiltered := filterSubpool(c.provider, mergeAllowed, sp); spFiltered != nil { 606 sp.log.WithField("key", key).WithField("pool", spFiltered).Debug("filtered sub-pool") 607 608 lock.Lock() 609 filtered[key] = spFiltered 610 lock.Unlock() 611 } else { 612 sp.log.WithField("key", key).WithField("pool", spFiltered).Debug("filtering sub-pool removed all PRs") 613 } 614 }, 615 ) 616 return filtered 617 } 618 619 // initSubpoolData fetches presubmit jobs and context checkers for the subpool. 620 func (c *syncController) initSubpoolData(sp *subpool) error { 621 var err error 622 sp.presubmits, err = c.presubmitsByPull(sp) 623 if err != nil { 624 return fmt.Errorf("error determining required presubmit prowjobs: %w", err) 625 } 626 // CloneURI is used by Gerrit to retrieve inrepoconfig; this is not used by 627 // GitHub at all. 628 // It's known that cloneURI is the only reliable way for Gerrit to correctly 629 // clone, so it should be safe to assume that cloneURI is identical among jobs. 630 var cloneURI string 631 for _, presubmits := range sp.presubmits { 632 for _, p := range presubmits { 633 if p.CloneURI != "" { 634 cloneURI = p.CloneURI 635 break 636 } 637 } 638 } 639 sp.cloneURI = cloneURI 640 641 sp.cc = make(map[int]contextChecker, len(sp.prs)) 642 for _, pr := range sp.prs { 643 sp.cc[pr.Number], err = c.provider.GetTideContextPolicy(sp.org, sp.repo, sp.branch, refGetterFactory(string(sp.sha)), &pr) 644 if err != nil { 645 return fmt.Errorf("error setting up context checker for pr %d: %w", pr.Number, err) 646 } 647 } 648 return nil 649 } 650 651 // filterSubpool filters PRs from an initially identified subpool, returning the 652 // filtered subpool. 653 // If the subpool becomes empty 'nil' is returned to indicate that the subpool 654 // should be deleted. 655 // 656 // This function works for any source code provider. 657 func filterSubpool(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool) *subpool { 658 var toKeep []CodeReviewCommon 659 for _, pr := range sp.prs { 660 if !filterPR(provider, mergeAllowed, sp, &pr) { 661 toKeep = append(toKeep, pr) 662 } 663 } 664 if len(toKeep) == 0 { 665 return nil 666 } 667 sp.prs = toKeep 668 return sp 669 } 670 671 // filterPR indicates if a PR should be filtered out of the subpool. 672 // Specifically we filter out PRs that: 673 // - Have known merge conflicts or invalid merge method. 674 // - Have failing or missing status contexts. 675 // - Have pending required status contexts that are not associated with a 676 // ProwJob. (This ensures that the 'tide' context indicates that the pending 677 // status is preventing merge. Required ProwJob statuses are allowed to be 678 // 'pending' because this prevents kicking PRs from the pool when Tide is 679 // retesting them.) 680 // 681 // This function works for any source code provider. 682 func filterPR(provider provider, mergeAllowed func(*CodeReviewCommon) (string, error), sp *subpool, pr *CodeReviewCommon) bool { 683 log := sp.log.WithFields(pr.logFields()) 684 // Skip PRs that are known to be unmergeable. 685 if reason, err := mergeAllowed(pr); err != nil { 686 log.WithError(err).Error("Error checking PR mergeability.") 687 return true 688 } else if reason != "" { 689 log.WithField("reason", reason).Debug("filtering out PR as it is not mergeable") 690 return true 691 } 692 693 // Filter out PRs with unsuccessful contexts unless the only unsuccessful 694 // contexts are pending required prowjobs. 695 contexts, err := provider.headContexts(pr) 696 if err != nil { 697 log.WithError(err).Error("Getting head contexts.") 698 return true 699 } 700 presubmitsHaveContext := func(context string) bool { 701 for _, job := range sp.presubmits[pr.Number] { 702 if job.Context == context { 703 return true 704 } 705 } 706 return false 707 } 708 for _, ctx := range unsuccessfulContexts(contexts, sp.cc[pr.Number], log) { 709 if ctx.State != githubql.StatusStatePending { 710 log.WithField("context", ctx.Context).Debug("filtering out PR as unsuccessful context is not pending") 711 return true 712 } 713 if !presubmitsHaveContext(string(ctx.Context)) { 714 log.WithField("context", ctx.Context).Debug("filtering out PR as unsuccessful context is not Prow-controlled") 715 return true 716 } 717 } 718 719 return false 720 } 721 722 func baseSHAMap(subpoolMap map[string]*subpool) map[string]string { 723 baseSHAs := make(map[string]string, len(subpoolMap)) 724 for key, sp := range subpoolMap { 725 baseSHAs[key] = sp.sha 726 } 727 return baseSHAs 728 } 729 730 // poolPRMap collects all subpool PRs into a map containing all pooled PRs. 731 func poolPRMap(subpoolMap map[string]*subpool) map[string]CodeReviewCommon { 732 prs := make(map[string]CodeReviewCommon) 733 for _, sp := range subpoolMap { 734 for _, pr := range sp.prs { 735 prs[prKey(&pr)] = pr 736 } 737 } 738 return prs 739 } 740 741 func requiredContextsMap(subpoolMap map[string]*subpool) map[string][]string { 742 requiredContextsMap := map[string][]string{} 743 for _, sp := range subpoolMap { 744 for _, pr := range sp.prs { 745 requiredContextsSet := sets.Set[string]{} 746 for _, requiredJob := range sp.presubmits[pr.Number] { 747 requiredContextsSet.Insert(requiredJob.Context) 748 } 749 requiredContextsMap[prKey(&pr)] = sets.List(requiredContextsSet) 750 } 751 } 752 return requiredContextsMap 753 } 754 755 type simpleState string 756 757 const ( 758 failureState simpleState = "failure" 759 pendingState simpleState = "pending" 760 successState simpleState = "success" 761 ) 762 763 func toSimpleState(s prowapi.ProwJobState) simpleState { 764 if s == prowapi.TriggeredState || s == prowapi.PendingState { 765 return pendingState 766 } else if s == prowapi.SuccessState { 767 return successState 768 } 769 return failureState 770 } 771 772 // isPassingTests returns whether or not all contexts set on the PR except for 773 // the tide pool context are passing. 774 func (c *syncController) isPassingTests(log *logrus.Entry, pr *CodeReviewCommon, cc contextChecker) bool { 775 log = log.WithFields(pr.logFields()) 776 777 contexts, err := c.provider.headContexts(pr) 778 if err != nil { 779 log.WithError(err).Error("Getting head commit status contexts.") 780 // If we can't get the status of the commit, assume that it is failing. 781 return false 782 } 783 unsuccessful := unsuccessfulContexts(contexts, cc, log) 784 return len(unsuccessful) == 0 785 } 786 787 // unsuccessfulContexts determines which contexts from the list that we care about are 788 // failed. For instance, we do not care about our own context. 789 // If the branchProtection is set to only check for required checks, we will skip 790 // all non-required tests. If required tests are missing from the list, they will be 791 // added to the list of failed contexts. 792 func unsuccessfulContexts(contexts []Context, cc contextChecker, log *logrus.Entry) []Context { 793 var failed []Context 794 for _, ctx := range contexts { 795 if string(ctx.Context) == statusContext { 796 continue 797 } 798 if cc.IsOptional(string(ctx.Context)) { 799 continue 800 } 801 if ctx.State != githubql.StatusStateSuccess { 802 failed = append(failed, ctx) 803 } 804 } 805 for _, c := range cc.MissingRequiredContexts(contextsToStrings(contexts)) { 806 failed = append(failed, newExpectedContext(c)) 807 } 808 809 log.WithFields(logrus.Fields{ 810 "total_context_count": len(contexts), 811 "context_names": contextsToStrings(contexts), 812 "failed_context_count": len(failed), 813 "failed_context_names": contextsToStrings(failed), 814 }).Debug("Filtered out failed contexts") 815 return failed 816 } 817 818 // hasAllLabels is used by pickHighestPriorityPR. Returns true when wantLabels 819 // is empty, otherwise ensures that PR labels contain all wantLabels. 820 func hasAllLabels(pr CodeReviewCommon, wantLabels []string) bool { 821 if len(wantLabels) == 0 { 822 return true 823 } 824 prLabels := sets.New[string]() 825 if labels := pr.GitHubLabels(); labels != nil { 826 for _, l2 := range labels.Nodes { 827 prLabels.Insert(string(l2.Name)) 828 } 829 } 830 for _, label := range wantLabels { 831 altLabels := strings.Split(label, ",") 832 if !prLabels.HasAny(altLabels...) { 833 return false 834 } 835 } 836 return true 837 } 838 839 func pickHighestPriorityPR(log *logrus.Entry, prs []CodeReviewCommon, cc map[int]contextChecker, isPassingTestsFunc func(*logrus.Entry, *CodeReviewCommon, contextChecker) bool, priorities []config.TidePriority) (bool, CodeReviewCommon) { 840 smallestNumber := -1 841 var smallestPR CodeReviewCommon 842 for _, p := range append(priorities, config.TidePriority{}) { 843 for _, pr := range prs { 844 // This should only apply to GitHub PRs, for Gerrit this is always true. 845 if !hasAllLabels(pr, p.Labels) { 846 continue 847 } 848 if smallestNumber != -1 && pr.Number >= smallestNumber { 849 continue 850 } 851 if !isPassingTestsFunc(log, &pr, cc[pr.Number]) { 852 continue 853 } 854 smallestNumber = pr.Number 855 smallestPR = pr 856 } 857 if smallestNumber > -1 { 858 return true, smallestPR 859 } 860 } 861 return false, smallestPR 862 } 863 864 // accumulateBatch looks at existing batch ProwJobs and, if applicable, returns: 865 // * A list of PRs that are part of a batch test that finished successfully 866 // * A list of PRs that are part of a batch test that hasn't finished yet but 867 // didn't have any failures so far 868 // 869 // jobs that are configured as `run_before_merge` are required to be returned as 870 // successBatch, it's possible that these jobs haven't run yet, and in the case 871 // we should consider this batch as failed so that takeAction can trigger a new 872 // batch. 873 func (c *syncController) accumulateBatch(sp subpool) (successBatch []CodeReviewCommon, pendingBatch []CodeReviewCommon) { 874 sp.log.Debug("accumulating PRs for batch testing") 875 prNums := make(map[int]CodeReviewCommon) 876 for _, pr := range sp.prs { 877 prNums[pr.Number] = pr 878 } 879 type accState struct { 880 prs []CodeReviewCommon 881 jobStates map[string]simpleState 882 // Are the pull requests in the ref still acceptable? That is, do they 883 // still point to the heads of the PRs? 884 validPulls bool 885 } 886 states := make(map[string]*accState) 887 for _, pj := range sp.pjs { 888 if pj.Spec.Type != prowapi.BatchJob { 889 continue 890 } 891 // First validate the batch job's refs. 892 ref := pj.Spec.Refs.String() 893 if _, ok := states[ref]; !ok { 894 state := &accState{ 895 jobStates: make(map[string]simpleState), 896 validPulls: true, 897 } 898 for _, pull := range pj.Spec.Refs.Pulls { 899 if pr, ok := prNums[pull.Number]; ok && pr.HeadRefOID == pull.SHA { 900 state.prs = append(state.prs, pr) 901 } else if !ok { 902 state.validPulls = false 903 sp.log.WithField("batch", ref).WithFields(pr.logFields()).Debug("batch job invalid, PR left pool") 904 break 905 } else { 906 state.validPulls = false 907 sp.log.WithField("batch", ref).WithFields(pr.logFields()).Debug("batch job invalid, PR HEAD changed") 908 break 909 } 910 } 911 states[ref] = state 912 } 913 if !states[ref].validPulls { 914 // The batch contains a PR ref that has changed. Skip it. 915 continue 916 } 917 918 // Batch job refs are valid. Now accumulate job states by batch ref. 919 context := pj.Spec.Context 920 jobState := toSimpleState(pj.Status.State) 921 // Store the best result for this ref+context. 922 states[ref].jobStates[context] = getBetterSimpleState(states[ref].jobStates[context], jobState) 923 } 924 for ref, state := range states { 925 if !state.validPulls { 926 continue 927 } 928 929 // presubmitsForBatch includes jobs that are `run_before_merge`, the 930 // jobs are not triggered before entering tide pool, and will need to be 931 // handled below. 932 requiredPresubmits, err := c.presubmitsForBatch(state.prs, sp.org, sp.repo, sp.sha, sp.branch) 933 if err != nil { 934 sp.log.WithError(err).Error("Error getting presubmits for batch") 935 continue 936 } 937 938 overallState := successState 939 for _, p := range requiredPresubmits { 940 if s, ok := state.jobStates[p.Context]; !ok { 941 // This could happen to jobs configured as `run_before_merge` as 942 // these jobs are triggered only by tide. There is no need to 943 // handle it differently as a new batch is expected in both cases. 944 overallState = failureState 945 sp.log.WithField("batch", ref).Debugf("batch invalid, required presubmit %s is missing", p.Context) 946 break 947 } else if s == failureState { 948 overallState = failureState 949 sp.log.WithField("batch", ref).Debugf("batch invalid, required presubmit %s is not passing", p.Context) 950 break 951 } else if s == pendingState && overallState == successState { 952 overallState = pendingState 953 } 954 } 955 switch overallState { 956 // Currently we only consider 1 pending batch and 1 success batch at a time. 957 // If more are somehow present they will be ignored. 958 case pendingState: 959 pendingBatch = state.prs 960 case successState: 961 successBatch = state.prs 962 } 963 } 964 return successBatch, pendingBatch 965 } 966 967 // prowJobsFromContexts constructs ProwJob objects from all successful presubmit contexts that include a baseSHA. 968 // This is needed because otherwise we would always need retesting for results that are older than sinkers 969 // max_prowjob_age. 970 func (c *syncController) prowJobsFromContexts(pr *CodeReviewCommon, baseSHA string) ([]prowapi.ProwJob, error) { 971 headContexts, err := c.provider.headContexts(pr) 972 if err != nil { 973 return nil, fmt.Errorf("failed to get head contexts: %w", err) 974 } 975 var passingCurrentContexts []string 976 for _, headContext := range headContexts { 977 if headContext.State != githubql.StatusStateSuccess { 978 continue 979 } 980 if baseSHAForContext := config.BaseSHAFromContextDescription(string(headContext.Description)); baseSHAForContext != "" && baseSHAForContext == baseSHA { 981 passingCurrentContexts = append(passingCurrentContexts, string((headContext.Context))) 982 } 983 } 984 985 var prowjobsFromContexts []prowapi.ProwJob 986 for _, passingCurrentContext := range passingCurrentContexts { 987 prowjobsFromContexts = append(prowjobsFromContexts, prowapi.ProwJob{ 988 Spec: prowapi.ProwJobSpec{ 989 Context: passingCurrentContext, 990 Refs: &prowapi.Refs{Pulls: []prowapi.Pull{{Number: pr.Number, SHA: pr.HeadRefOID}}}, 991 Type: prowapi.PresubmitJob, 992 }, 993 Status: prowapi.ProwJobStatus{ 994 State: prowapi.SuccessState, 995 }, 996 }) 997 } 998 999 return prowjobsFromContexts, nil 1000 } 1001 1002 // accumulate returns the supplied PRs sorted into three buckets based on their 1003 // accumulated state across the presubmits. 1004 func (c *syncController) accumulate(presubmits map[int][]config.Presubmit, prs []CodeReviewCommon, pjs []prowapi.ProwJob, baseSHA string) (successes, pendings, missings []CodeReviewCommon, missingTests map[int][]config.Presubmit) { 1005 log := c.logger 1006 missingTests = map[int][]config.Presubmit{} 1007 for _, pr := range prs { 1008 1009 if prowjobsFromContexts, err := c.prowJobsFromContexts(&pr, baseSHA); err != nil { 1010 log.WithError(err).Error("failed to get prowjobs from contexts") 1011 } else { 1012 pjs = append(pjs, prowjobsFromContexts...) 1013 } 1014 1015 // Accumulate the best result for each job (Passing > Pending > Failing/Unknown) 1016 // We can ignore the baseSHA here because the subPool only contains ProwJobs with the correct baseSHA 1017 psStates := make(map[string]simpleState) 1018 for _, pj := range pjs { 1019 if pj.Spec.Type != prowapi.PresubmitJob { 1020 continue 1021 } 1022 if pj.Spec.Refs.Pulls[0].Number != pr.Number { 1023 continue 1024 } 1025 if pj.Spec.Refs.Pulls[0].SHA != pr.HeadRefOID { 1026 continue 1027 } 1028 1029 name := pj.Spec.Context 1030 psStates[name] = getBetterSimpleState(psStates[name], toSimpleState(pj.Status.State)) 1031 } 1032 // The overall result for the PR is the worst of the best of all its 1033 // required Presubmits 1034 overallState := successState 1035 for _, ps := range presubmits[pr.Number] { 1036 if s, ok := psStates[ps.Context]; !ok { 1037 // No PJ with correct baseSHA+headSHA exists 1038 missingTests[pr.Number] = append(missingTests[pr.Number], ps) 1039 log.WithFields(pr.logFields()).Debugf("missing presubmit %s", ps.Context) 1040 } else if s == failureState { 1041 // PJ with correct baseSHA+headSHA exists but failed 1042 missingTests[pr.Number] = append(missingTests[pr.Number], ps) 1043 log.WithFields(pr.logFields()).Debugf("presubmit %s not passing", ps.Context) 1044 } else if s == pendingState { 1045 log.WithFields(pr.logFields()).Debugf("presubmit %s pending", ps.Context) 1046 overallState = pendingState 1047 } 1048 } 1049 if len(missingTests[pr.Number]) > 0 { 1050 overallState = failureState 1051 } 1052 1053 if overallState == successState { 1054 successes = append(successes, pr) 1055 } else if overallState == pendingState { 1056 pendings = append(pendings, pr) 1057 } else { 1058 missings = append(missings, pr) 1059 } 1060 } 1061 return 1062 } 1063 1064 func prNumbers(prs []CodeReviewCommon) []int { 1065 var nums []int 1066 for _, pr := range prs { 1067 nums = append(nums, pr.Number) 1068 } 1069 return nums 1070 } 1071 1072 // pickNewBatch picks PRs to form a new batch, it's only used by pickBatch. 1073 // 1074 // This function works for any source code provider. 1075 func pickNewBatch(gc git.ClientFactory, cfg config.Getter, provider provider) func(sp subpool, candidates []CodeReviewCommon, maxBatchSize int) ([]CodeReviewCommon, error) { 1076 return func(sp subpool, candidates []CodeReviewCommon, maxBatchSize int) ([]CodeReviewCommon, error) { 1077 var res []CodeReviewCommon 1078 // TODO(chaodaiG): make sure cloning works for gerrit. 1079 r, err := gc.ClientFor(sp.org, sp.repo) 1080 if err != nil { 1081 return nil, err 1082 } 1083 defer r.Clean() 1084 if err := r.Config("user.name", "prow"); err != nil { 1085 return nil, err 1086 } 1087 if err := r.Config("user.email", "prow@localhost"); err != nil { 1088 return nil, err 1089 } 1090 if err := r.Config("commit.gpgsign", "false"); err != nil { 1091 sp.log.Warningf("Cannot set gpgsign=false in gitconfig: %v", err) 1092 } 1093 if err := r.Checkout(sp.sha); err != nil { 1094 return nil, err 1095 } 1096 1097 for _, pr := range candidates { 1098 mergeMethod := provider.prMergeMethod(&pr) 1099 if mergeMethod == nil { 1100 sp.log.WithFields(pr.logFields()).Warnln("Failed to get merge method for PR, will skip.") 1101 continue 1102 } 1103 if ok, err := r.MergeWithStrategy(pr.HeadRefOID, string(*mergeMethod)); err != nil { 1104 // we failed to abort the merge and our git client is 1105 // in a bad state; it must be cleaned before we try again 1106 return nil, err 1107 } else if ok { 1108 res = append(res, pr) 1109 // TODO: Make this configurable per subpool. 1110 if maxBatchSize > 0 && len(res) >= maxBatchSize { 1111 break 1112 } 1113 } 1114 } 1115 1116 return res, nil 1117 } 1118 } 1119 1120 type newBatchFunc func(sp subpool, candidates []CodeReviewCommon, maxBatchSize int) ([]CodeReviewCommon, error) 1121 1122 // pickBatch picks PRs to form a batch. 1123 // 1124 // This function works for any source code provider. 1125 func (c *syncController) pickBatch(sp subpool, cc map[int]contextChecker, newBatchFunc newBatchFunc) ([]CodeReviewCommon, []config.Presubmit, error) { 1126 // BatchSizeLimit is a global option, it will work for any source code provider. 1127 batchLimit := c.config().Tide.BatchSizeLimit(config.OrgRepo{Org: sp.org, Repo: sp.repo}) 1128 if batchLimit < 0 { 1129 sp.log.Debug("Batch merges disabled by configuration in this repo.") 1130 return nil, nil, nil 1131 } 1132 1133 // we must choose the oldest PRs for the batch 1134 sort.Slice(sp.prs, func(i, j int) bool { return sp.prs[i].Number < sp.prs[j].Number }) 1135 1136 var candidates []CodeReviewCommon 1137 for _, pr := range sp.prs { 1138 // c.isRetestEligible appends `Commits` into the passed in PullRequest 1139 // struct, which is used later to avoid repeatedly looking up on GitHub. 1140 if c.isRetestEligible(sp.log, &pr, cc[pr.Number]) { 1141 candidates = append(candidates, pr) 1142 } 1143 } 1144 1145 log := sp.log.WithField("subpool_pr_count", len(sp.prs)) 1146 if len(candidates) == 0 { 1147 log.Debug("None of the prs in the subpool was passing tests, no batch will be created") 1148 return nil, nil, nil 1149 } 1150 log.WithField("candidate_count", len(candidates)).Debug("Found PRs with passing tests when picking batch") 1151 1152 var res []CodeReviewCommon 1153 // PrioritizeExistingBatches is a global option, it will work for any source 1154 // code provider. 1155 if c.config().Tide.PrioritizeExistingBatches(config.OrgRepo{Repo: sp.repo, Org: sp.org}) { 1156 res = pickBatchWithPreexistingTests(sp, candidates, batchLimit) 1157 } 1158 // No batch with pre-existing tests found or prioritize_existing_batches disabled 1159 if len(res) == 0 { 1160 var err error 1161 res, err = newBatchFunc(sp, candidates, batchLimit) 1162 if err != nil { 1163 return nil, nil, err 1164 } 1165 } 1166 1167 // presubmitsForBatch returns jobs that should run via trigger, as well as 1168 // jobs that are `run_before_merge`. 1169 presubmits, err := c.presubmitsForBatch(res, sp.org, sp.repo, sp.sha, sp.branch) 1170 if err != nil { 1171 return nil, nil, err 1172 } 1173 1174 return res, presubmits, nil 1175 } 1176 1177 // isRetestEligible determines retesting eligibility. It allows PRs where all mandatory contexts 1178 // are either passing or pending. Pending ones are only allowed if we find a ProwJob that corresponds to them 1179 // and was created by Tide, as that allows us to infer that this job passed in the past. 1180 // We look at the actively running ProwJob rather than a previous successful one, because the latter might 1181 // already be garbage collected. 1182 func (c *syncController) isRetestEligible(log *logrus.Entry, candidate *CodeReviewCommon, cc contextChecker) bool { 1183 candidateHeadContexts, err := c.provider.headContexts(candidate) 1184 if err != nil { 1185 log.WithError(err).WithFields(candidate.logFields()).Debug("failed to get headContexts for batch candidate, ignoring.") 1186 return false 1187 } 1188 var contextNames []string 1189 for _, headContext := range candidateHeadContexts { 1190 contextNames = append(contextNames, string(headContext.Context)) 1191 } 1192 1193 if missedContexts := cc.MissingRequiredContexts(contextNames); len(missedContexts) > 0 { 1194 return false 1195 } 1196 1197 for _, headContext := range candidateHeadContexts { 1198 if headContext.Context == statusContext || cc.IsOptional(string(headContext.Context)) || headContext.State == githubql.StatusStateSuccess { 1199 continue 1200 } 1201 if headContext.State != githubql.StatusStatePending { 1202 return false 1203 } 1204 1205 // In the case where a status is pending, 1206 // If the prowjob was triggered by tide, then tide had considered it a 1207 // good candidate. We should still consider it as a candidate. 1208 pjLabels := make(map[string]string) 1209 pjLabels[kube.CreatedByTideLabel] = "true" 1210 pjLabels[kube.ProwJobTypeLabel] = string(prowapi.PresubmitJob) 1211 pjLabels[kube.OrgLabel] = string(candidate.Org) 1212 pjLabels[kube.RepoLabel] = string(candidate.Repo) 1213 pjLabels[kube.BaseRefLabel] = string(candidate.BaseRefName) 1214 pjLabels[kube.PullLabel] = string(strconv.Itoa(int(candidate.Number))) 1215 pjLabels[kube.ContextAnnotation] = string(headContext.Context) 1216 1217 var pjs prowapi.ProwJobList 1218 if err := c.prowJobClient.List(c.ctx, 1219 &pjs, 1220 ctrlruntimeclient.InNamespace(c.config().ProwJobNamespace), 1221 ctrlruntimeclient.MatchingLabels(pjLabels), 1222 ); err != nil { 1223 log.WithError(err).Debug("failed to list prowjobs for PR, ignoring") 1224 return false 1225 } 1226 1227 if prowJobListHasProwJobWithMatchingHeadSHA(&pjs, string(candidate.HeadRefOID)) { 1228 continue 1229 } 1230 1231 return false 1232 } 1233 1234 return true 1235 } 1236 1237 func prowJobListHasProwJobWithMatchingHeadSHA(pjs *prowapi.ProwJobList, headSHA string) bool { 1238 for _, pj := range pjs.Items { 1239 if pj.Spec.Refs != nil && len(pj.Spec.Refs.Pulls) == 1 && pj.Spec.Refs.Pulls[0].SHA == headSHA { 1240 return true 1241 } 1242 } 1243 return false 1244 } 1245 1246 // setTideStatusSuccess ensures the tide context is set to success 1247 // 1248 // Used only by mergePRs, referenced by GitHubProvider only. 1249 func setTideStatusSuccess(pr CodeReviewCommon, ghc githubClient, cfg *config.Config, log *logrus.Entry) error { 1250 // Do not waste api tokens and risk hitting the 2.5k context limit by setting it to success if it is 1251 // already set to success. 1252 if prHasSuccessfullTideStatusContext(pr) { 1253 return nil 1254 } 1255 return ghc.CreateStatus( 1256 pr.Org, 1257 pr.Repo, 1258 pr.HeadRefOID, 1259 github.Status{ 1260 Context: statusContext, 1261 State: "success", 1262 TargetURL: targetURL(cfg, &pr, log), 1263 }) 1264 } 1265 1266 // prHasSuccessfullTideStatusContext is used only by setTideStatusSuccess. 1267 // 1268 // Used only by setTideStatusSuccess, referenced only by GitHubProvider. 1269 func prHasSuccessfullTideStatusContext(pr CodeReviewCommon) bool { 1270 commits := pr.GitHubCommits() 1271 if commits == nil { 1272 return false 1273 } 1274 for _, commit := range commits.Nodes { 1275 if string(commit.Commit.OID) != pr.HeadRefOID { 1276 continue 1277 } 1278 for _, context := range commit.Commit.Status.Contexts { 1279 if strings.EqualFold(string(context.Context), statusContext) { 1280 return strings.EqualFold(string(context.State), string(githubql.StatusStateSuccess)) 1281 } 1282 } 1283 } 1284 1285 return false 1286 } 1287 1288 // tryMerge attempts 1 merge and returns a bool indicating if we should try 1289 // to merge the remaining PRs and possibly an error. 1290 // 1291 // tryMerge is used by mergePRs only, referenced by GitHubProvider only. 1292 func tryMerge(mergeFunc func() error) (bool, error) { 1293 var err error 1294 const maxRetries = 3 1295 backoff := time.Second * 4 1296 for retry := 0; retry < maxRetries; retry++ { 1297 if err = mergeFunc(); err == nil { 1298 // Successful merge! 1299 return true, nil 1300 } 1301 // TODO: Add a config option to abort batches if a PR in the batch 1302 // cannot be merged for any reason. This would skip merging 1303 // not just the changed PR, but also the other PRs in the batch. 1304 // This shouldn't be the default behavior as merging batches is high 1305 // priority and this is unlikely to be problematic. 1306 // Note: We would also need to be able to roll back any merges for the 1307 // batch that were already successfully completed before the failure. 1308 // Ref: https://github.com/kubernetes/test-infra/issues/10621 1309 if _, ok := err.(github.ModifiedHeadError); ok { 1310 // This is a possible source of incorrect behavior. If someone 1311 // modifies their PR as we try to merge it in a batch then we 1312 // end up in an untested state. This is unlikely to cause any 1313 // real problems. 1314 return true, fmt.Errorf("PR was modified: %w", err) 1315 } else if _, ok = err.(github.UnmergablePRBaseChangedError); ok { 1316 // complained that the base branch was modified. This is a 1317 // strange error because the API doesn't even allow the request to 1318 // specify the base branch sha, only the head sha. 1319 // We suspect that github is complaining because we are making the 1320 // merge requests too rapidly and it cannot recompute mergability 1321 // in time. https://github.com/kubernetes/test-infra/issues/5171 1322 // We handle this by sleeping for a few seconds before trying to 1323 // merge again. 1324 err = fmt.Errorf("base branch was modified: %w", err) 1325 if retry+1 < maxRetries { 1326 sleep(backoff) 1327 backoff *= 2 1328 } 1329 } else if _, ok = err.(github.UnauthorizedToPushError); ok { 1330 // GitHub let us know that the token used cannot push to the branch. 1331 // Even if the robot is set up to have write access to the repo, an 1332 // overzealous branch protection setting will not allow the robot to 1333 // push to a specific branch. 1334 // We won't be able to merge the other PRs. 1335 return false, fmt.Errorf("branch needs to be configured to allow this robot to push: %w", err) 1336 } else if _, ok = err.(github.MergeCommitsForbiddenError); ok { 1337 // GitHub let us know that the merge method configured for this repo 1338 // is not allowed by other repo settings, so we should let the admins 1339 // know that the configuration needs to be updated. 1340 // We won't be able to merge the other PRs. 1341 return false, fmt.Errorf("Tide needs to be configured to use the 'rebase' merge method for this repo or the repo needs to allow merge commits: %w", err) 1342 } else if _, ok = err.(github.UnmergablePRError); ok { 1343 return true, fmt.Errorf("PR is unmergable. Do the Tide merge requirements match the GitHub settings for the repo? %w", err) 1344 } else { 1345 return true, err 1346 } 1347 } 1348 // We ran out of retries. Return the last transient error. 1349 return true, err 1350 } 1351 1352 func (c *syncController) trigger(sp subpool, presubmits []config.Presubmit, prs []CodeReviewCommon) error { 1353 refs, err := c.provider.refsForJob(sp, prs) 1354 if err != nil { 1355 return fmt.Errorf("failed creating refs: %v", err) 1356 } 1357 1358 // If PRs require the same job, we only want to trigger it once. 1359 // If multiple required jobs have the same context, we assume the 1360 // same shard will be run to provide those contexts 1361 triggeredContexts := sets.New[string]() 1362 enableScheduling := c.config().Scheduler.Enabled 1363 for _, ps := range presubmits { 1364 if triggeredContexts.Has(string(ps.Context)) { 1365 continue 1366 } 1367 triggeredContexts.Insert(string(ps.Context)) 1368 var spec prowapi.ProwJobSpec 1369 if len(prs) == 1 { 1370 spec = pjutil.PresubmitSpec(ps, refs) 1371 } else { 1372 if c.nonFailedBatchForJobAndRefsExists(ps.Name, &refs) { 1373 continue 1374 } 1375 spec = pjutil.BatchSpec(ps, refs) 1376 } 1377 labels, annotations := c.provider.labelsAndAnnotations(sp.org, ps.Labels, ps.Annotations, prs...) 1378 pj := pjutil.NewProwJob(spec, labels, annotations, pjutil.RequireScheduling(enableScheduling)) 1379 pj.Namespace = c.config().ProwJobNamespace 1380 log := c.logger.WithFields(pjutil.ProwJobFields(&pj)) 1381 start := time.Now() 1382 if pj.Labels == nil { 1383 pj.Labels = map[string]string{} 1384 } 1385 pj.Labels[kube.CreatedByTideLabel] = "true" 1386 if err := c.prowJobClient.Create(c.ctx, &pj); err != nil { 1387 log.WithField("duration", time.Since(start).String()).Debug("Failed to create ProwJob on the cluster.") 1388 return fmt.Errorf("failed to create a ProwJob for job: %q, PRs: %v: %w", spec.Job, prNumbers(prs), err) 1389 } 1390 log.WithField("duration", time.Since(start).String()).Debug("Created ProwJob on the cluster.") 1391 } 1392 return nil 1393 } 1394 1395 // nonFailedBatchForJobAndRefsExists ensures that the batch job exists 1396 func (c *syncController) nonFailedBatchForJobAndRefsExists(jobName string, refs *prowapi.Refs) bool { 1397 pjs := &prowapi.ProwJobList{} 1398 if err := c.prowJobClient.List(c.ctx, 1399 pjs, 1400 ctrlruntimeclient.MatchingFields{nonFailedBatchByNameBaseAndPullsIndexName: nonFailedBatchByNameBaseAndPullsIndexKey(jobName, refs)}, 1401 ctrlruntimeclient.InNamespace(c.config().ProwJobNamespace), 1402 ); err != nil { 1403 c.logger.WithError(err).Error("Failed to list non-failed batches") 1404 return false 1405 } 1406 1407 return len(pjs.Items) > 0 1408 } 1409 1410 func (c *syncController) takeAction(sp subpool, batchPending, successes, pendings, missings, batchMerges []CodeReviewCommon, missingSerialTests map[int][]config.Presubmit) (Action, []CodeReviewCommon, error) { 1411 var merged []CodeReviewCommon 1412 var err error 1413 defer func() { 1414 if len(merged) > 0 { 1415 tideMetrics.merges.WithLabelValues(sp.org, sp.repo, sp.branch).Observe(float64(len(merged))) 1416 } 1417 }() 1418 1419 // Merge the batch! 1420 if len(batchMerges) > 0 { 1421 merged, err = c.provider.mergePRs(sp, batchMerges, c.statusUpdate.dontUpdateStatus) 1422 return MergeBatch, batchMerges, err 1423 } 1424 // Do not merge PRs while waiting for a batch to complete. We don't want to 1425 // invalidate the old batch result. 1426 if len(successes) > 0 && len(batchPending) == 0 { 1427 if ok, pr := pickHighestPriorityPR(sp.log, successes, sp.cc, c.isPassingTests, c.config().Tide.Priority); ok { 1428 merged, err = c.provider.mergePRs(sp, []CodeReviewCommon{pr}, c.statusUpdate.dontUpdateStatus) 1429 return Merge, []CodeReviewCommon{pr}, err 1430 } 1431 } 1432 // If no presubmits are configured, just wait. 1433 if len(sp.presubmits) == 0 { 1434 return Wait, nil, nil 1435 } 1436 // If we have no batch, trigger one. 1437 if len(sp.prs) > 1 && len(batchPending) == 0 { 1438 batch, presubmits, err := c.pickBatch(sp, sp.cc, c.pickNewBatch) 1439 if err != nil { 1440 return Wait, nil, err 1441 } 1442 if len(batch) > 1 { 1443 return TriggerBatch, batch, c.trigger(sp, presubmits, batch) 1444 } 1445 } 1446 // If we have no serial jobs pending or successful, trigger one. 1447 if len(missings) > 0 && len(pendings) == 0 && len(successes) == 0 { 1448 if ok, pr := pickHighestPriorityPR(sp.log, missings, sp.cc, c.isRetestEligible, c.config().Tide.Priority); ok { 1449 return Trigger, []CodeReviewCommon{pr}, c.trigger(sp, missingSerialTests[pr.Number], []CodeReviewCommon{pr}) 1450 } 1451 } 1452 return Wait, nil, nil 1453 } 1454 1455 // changedFilesAgent queries and caches the names of files changed by PRs. 1456 // Cache entries expire if they are not used during a sync loop. 1457 type changedFilesAgent struct { 1458 provider provider 1459 changeCache map[changeCacheKey][]string 1460 // nextChangeCache caches file change info that is relevant this sync for use next sync. 1461 // This becomes the new changeCache when prune() is called at the end of each sync. 1462 nextChangeCache map[changeCacheKey][]string 1463 sync.RWMutex 1464 } 1465 1466 type changeCacheKey struct { 1467 org, repo string 1468 number int 1469 sha string 1470 } 1471 1472 // prChanges gets the files changed by the PR, either from the cache or by 1473 // querying GitHub. 1474 func (c *changedFilesAgent) prChanges(pr *CodeReviewCommon) config.ChangedFilesProvider { 1475 return func() ([]string, error) { 1476 cacheKey := changeCacheKey{ 1477 org: pr.Org, 1478 repo: pr.Repo, 1479 number: pr.Number, 1480 sha: pr.HeadRefOID, 1481 } 1482 1483 c.RLock() 1484 changedFiles, ok := c.changeCache[cacheKey] 1485 if ok { 1486 c.RUnlock() 1487 c.Lock() 1488 c.nextChangeCache[cacheKey] = changedFiles 1489 c.Unlock() 1490 return changedFiles, nil 1491 } 1492 if changedFiles, ok = c.nextChangeCache[cacheKey]; ok { 1493 c.RUnlock() 1494 return changedFiles, nil 1495 } 1496 c.RUnlock() 1497 1498 // We need to query the changes from GitHub. 1499 changes, err := c.provider.GetChangedFiles( 1500 pr.Org, 1501 pr.Repo, 1502 pr.Number, 1503 ) 1504 if err != nil { 1505 return nil, fmt.Errorf("error getting PR changes for #%d: %w", pr.Number, err) 1506 } 1507 1508 changedFiles = make([]string, 0, len(changes)) 1509 changedFiles = append(changedFiles, changes...) 1510 c.Lock() 1511 c.nextChangeCache[cacheKey] = changedFiles 1512 c.Unlock() 1513 return changedFiles, nil 1514 } 1515 } 1516 1517 func (c *changedFilesAgent) batchChanges(prs []CodeReviewCommon) config.ChangedFilesProvider { 1518 return func() ([]string, error) { 1519 result := sets.Set[string]{} 1520 for _, pr := range prs { 1521 changes, err := c.prChanges(&pr)() 1522 if err != nil { 1523 return nil, err 1524 } 1525 1526 result.Insert(changes...) 1527 } 1528 1529 return sets.List(result), nil 1530 } 1531 } 1532 1533 // prune removes any cached file changes that were not used since the last prune. 1534 func (c *changedFilesAgent) prune() { 1535 c.Lock() 1536 defer c.Unlock() 1537 c.changeCache = c.nextChangeCache 1538 c.nextChangeCache = make(map[changeCacheKey][]string) 1539 } 1540 1541 func refGetterFactory(ref string) config.RefGetter { 1542 return func() (string, error) { 1543 return ref, nil 1544 } 1545 } 1546 1547 // presubmitsByPull creates a map pr -> requiredPresubmits and will filter out all PRs 1548 // where we failed to find out the required presubmits (can happen if inrepoconfig is enabled). 1549 func (c *syncController) presubmitsByPull(sp *subpool) (map[int][]config.Presubmit, error) { 1550 presubmits := make(map[int][]config.Presubmit, len(sp.prs)) 1551 1552 // filtered PRs contains all PRs for which we were able to get the presubmits 1553 var filteredPRs []CodeReviewCommon 1554 1555 for _, pr := range sp.prs { 1556 log := c.logger.WithField("base-sha", sp.sha).WithFields(pr.logFields()) 1557 requireManuallyTriggeredJobs := requireManuallyTriggeredJobs(c.config(), sp.org, sp.repo, pr.BaseRefName) 1558 presubmitsForPull, err := c.provider.GetPresubmits(sp.org+"/"+sp.repo, pr.BaseRefName, refGetterFactory(sp.sha), refGetterFactory(pr.HeadRefOID)) 1559 if err != nil { 1560 log.WithError(err).Debug("Failed to get presubmits for PR, excluding from subpool") 1561 continue 1562 } 1563 filteredPRs = append(filteredPRs, pr) 1564 log.WithField("num_possible_presubmit", len(presubmitsForPull)).Debug("Found possible presubmits") 1565 1566 for _, ps := range presubmitsForPull { 1567 if !c.provider.jobIsRequiredByTide(&ps, &pr) { 1568 continue 1569 } 1570 1571 // Only keep the jobs that are required for this PR. Order of 1572 // filters: 1573 // - Brancher 1574 // - RunBeforeMerge 1575 // - Files changed 1576 forceRun := (requireManuallyTriggeredJobs && ps.ContextRequired() && ps.NeedsExplicitTrigger()) || ps.RunBeforeMerge 1577 shouldRun, err := ps.ShouldRun(sp.branch, c.changedFiles.prChanges(&pr), forceRun, false) 1578 if err != nil { 1579 return nil, err 1580 } 1581 if !shouldRun { 1582 log.WithField("context", ps.Context).Debug("Presubmit excluded by ps.ShouldRun") 1583 continue 1584 } 1585 1586 presubmits[pr.Number] = append(presubmits[pr.Number], ps) 1587 } 1588 log.WithField("required-presubmit-count", len(presubmits[pr.Number])).Debug("Determined required presubmits for PR.") 1589 } 1590 1591 sp.prs = filteredPRs 1592 return presubmits, nil 1593 } 1594 1595 // presubmitsForBatch filters presubmit jobs from a repo based on the PRs in the 1596 // pool. 1597 // 1598 // Aside from jobs that should run based on triggers, jobs that are configured 1599 // as `run_before_merge` are also returned. 1600 func (c *syncController) presubmitsForBatch(prs []CodeReviewCommon, org, repo, baseSHA, baseBranch string) ([]config.Presubmit, error) { 1601 log := c.logger.WithFields(logrus.Fields{"repo": repo, "org": org, "base-sha": baseSHA, "base-branch": baseBranch}) 1602 1603 if len(prs) == 0 { 1604 log.Debug("No PRs, skip looking for presubmits for batch.") 1605 return nil, errors.New("no PRs are provided") 1606 } 1607 1608 var headRefGetters []config.RefGetter 1609 for _, pr := range prs { 1610 headRefGetters = append(headRefGetters, refGetterFactory(pr.HeadRefOID)) 1611 } 1612 1613 presubmits, err := c.provider.GetPresubmits(org+"/"+repo, baseBranch, refGetterFactory(baseSHA), headRefGetters...) 1614 if err != nil { 1615 return nil, fmt.Errorf("failed to get presubmits for batch: %w", err) 1616 } 1617 log.Debugf("Found %d possible presubmits for batch", len(presubmits)) 1618 1619 requireManuallyTriggeredJobs := requireManuallyTriggeredJobs(c.config(), org, repo, baseBranch) 1620 1621 var result []config.Presubmit 1622 for _, ps := range presubmits { 1623 // PR is required only by Gerrit, the required "label" will be extracted 1624 // from a PR. Assuming the submission requirement for a given label is 1625 // consistent across all PRs from the same repo at a given time point, 1626 // which should be a safe assumption. 1627 if !c.provider.jobIsRequiredByTide(&ps, &prs[0]) { 1628 continue 1629 } 1630 1631 forceRun := (requireManuallyTriggeredJobs && ps.ContextRequired() && ps.NeedsExplicitTrigger()) || ps.RunBeforeMerge 1632 shouldRun, err := ps.ShouldRun(baseBranch, c.changedFiles.batchChanges(prs), forceRun, false) 1633 if err != nil { 1634 return nil, err 1635 } 1636 if !shouldRun { 1637 log.WithField("context", ps.Context).Debug("Presubmit excluded by ps.ShouldRun") 1638 continue 1639 } 1640 1641 result = append(result, ps) 1642 } 1643 1644 log.Debugf("After filtering, %d presubmits remained for batch", len(result)) 1645 return result, nil 1646 } 1647 1648 func (c *syncController) syncSubpool(sp subpool, blocks []blockers.Blocker) (Pool, error) { 1649 sp.log.WithField("num_prs", len(sp.prs)).WithField("num_prowjobs", len(sp.pjs)).Info("Syncing subpool") 1650 successes, pendings, missings, missingSerialTests := c.accumulate(sp.presubmits, sp.prs, sp.pjs, sp.sha) 1651 batchMerge, batchPending := c.accumulateBatch(sp) 1652 sp.log.WithFields(logrus.Fields{ 1653 "prs-passing": prNumbers(successes), 1654 "prs-pending": prNumbers(pendings), 1655 "prs-missing": prNumbers(missings), 1656 "batch-passing": prNumbers(batchMerge), 1657 "batch-pending": prNumbers(batchPending), 1658 }).Info("Subpool accumulated.") 1659 1660 tenantIDs := sp.TenantIDs() 1661 var act Action 1662 var targets []CodeReviewCommon 1663 var err error 1664 var errorString string 1665 if len(blocks) > 0 { 1666 act = PoolBlocked 1667 } else { 1668 act, targets, err = c.takeAction(sp, batchPending, successes, pendings, missings, batchMerge, missingSerialTests) 1669 if err != nil { 1670 errorString = err.Error() 1671 } 1672 if recordableActions[act] { 1673 c.History.Record( 1674 poolKey(sp.org, sp.repo, sp.branch), 1675 string(act), 1676 sp.sha, 1677 errorString, 1678 prMeta(targets...), 1679 tenantIDs, 1680 ) 1681 } 1682 } 1683 1684 sp.log.WithFields(logrus.Fields{ 1685 "action": string(act), 1686 "targets": prNumbers(targets), 1687 }).Info("Subpool synced.") 1688 tideMetrics.pooledPRs.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(len(sp.prs))) 1689 tideMetrics.updateTime.WithLabelValues(sp.org, sp.repo, sp.branch).Set(float64(time.Now().Unix())) 1690 return Pool{ 1691 Org: sp.org, 1692 Repo: sp.repo, 1693 Branch: sp.branch, 1694 1695 SuccessPRs: successes, 1696 PendingPRs: pendings, 1697 MissingPRs: missings, 1698 1699 BatchPending: batchPending, 1700 1701 Action: act, 1702 Target: targets, 1703 Blockers: blocks, 1704 Error: errorString, 1705 1706 TenantIDs: tenantIDs, 1707 }, 1708 err 1709 } 1710 1711 func prMeta(prs ...CodeReviewCommon) []prowapi.Pull { 1712 var res []prowapi.Pull 1713 for _, pr := range prs { 1714 res = append(res, prowapi.Pull{ 1715 Number: pr.Number, 1716 Author: pr.AuthorLogin, 1717 Title: pr.Title, 1718 SHA: pr.HeadRefOID, 1719 HeadRef: pr.HeadRefName, 1720 }) 1721 } 1722 return res 1723 } 1724 1725 func sortPools(pools []Pool) { 1726 sort.Slice(pools, func(i, j int) bool { 1727 if string(pools[i].Org) != string(pools[j].Org) { 1728 return string(pools[i].Org) < string(pools[j].Org) 1729 } 1730 if string(pools[i].Repo) != string(pools[j].Repo) { 1731 return string(pools[i].Repo) < string(pools[j].Repo) 1732 } 1733 return string(pools[i].Branch) < string(pools[j].Branch) 1734 }) 1735 1736 sortPRs := func(prs []CodeReviewCommon) { 1737 sort.Slice(prs, func(i, j int) bool { return int(prs[i].Number) < int(prs[j].Number) }) 1738 } 1739 for i := range pools { 1740 sortPRs(pools[i].SuccessPRs) 1741 sortPRs(pools[i].PendingPRs) 1742 sortPRs(pools[i].MissingPRs) 1743 sortPRs(pools[i].BatchPending) 1744 } 1745 } 1746 1747 type subpool struct { 1748 log *logrus.Entry 1749 org string 1750 repo string 1751 cloneURI string 1752 branch string 1753 // sha is the baseSHA for this subpool 1754 sha string 1755 1756 // pjs contains all ProwJobs of type Presubmit or Batch 1757 // that have the same baseSHA as the subpool 1758 pjs []prowapi.ProwJob 1759 prs []CodeReviewCommon 1760 1761 cc map[int]contextChecker 1762 // presubmit contains all required presubmits for each PR 1763 // in this subpool 1764 presubmits map[int][]config.Presubmit 1765 } 1766 1767 func (sp subpool) TenantIDs() []string { 1768 ids := sets.Set[string]{} 1769 for _, pj := range sp.pjs { 1770 if pj.Spec.ProwJobDefault == nil || pj.Spec.ProwJobDefault.TenantID == "" { 1771 ids.Insert("") 1772 } else { 1773 ids.Insert(pj.Spec.ProwJobDefault.TenantID) 1774 } 1775 } 1776 return sets.List(ids) 1777 } 1778 1779 func poolKey(org, repo, branch string) string { 1780 return fmt.Sprintf("%s/%s:%s", org, repo, branch) 1781 } 1782 1783 // dividePool splits up the list of pull requests and prow jobs into a group 1784 // per repo and branch. It only keeps ProwJobs that match the latest branch. 1785 func (c *syncController) dividePool(pool map[string]CodeReviewCommon) (map[string]*subpool, error) { 1786 sps := make(map[string]*subpool) 1787 for _, pr := range pool { 1788 org := pr.Org 1789 repo := pr.Repo 1790 branch := pr.BaseRefName 1791 branchRef := pr.BaseRefPrefix + pr.BaseRefName 1792 fn := poolKey(org, repo, branch) 1793 if sps[fn] == nil { 1794 sha, err := c.provider.GetRef(org, repo, strings.TrimPrefix(branchRef, "refs/")) 1795 if err != nil { 1796 return nil, err 1797 } 1798 sps[fn] = &subpool{ 1799 log: c.logger.WithFields(logrus.Fields{ 1800 "org": org, 1801 "repo": repo, 1802 "branch": branch, 1803 "base-sha": sha, 1804 }), 1805 org: org, 1806 repo: repo, 1807 branch: branch, 1808 sha: sha, 1809 } 1810 } 1811 sps[fn].prs = append(sps[fn].prs, pr) 1812 } 1813 1814 for subpoolkey, sp := range sps { 1815 pjs := &prowapi.ProwJobList{} 1816 err := c.prowJobClient.List( 1817 c.ctx, 1818 pjs, 1819 ctrlruntimeclient.MatchingFields{cacheIndexName: cacheIndexKey(sp.org, sp.repo, sp.branch, sp.sha)}, 1820 ctrlruntimeclient.InNamespace(c.config().ProwJobNamespace)) 1821 if err != nil { 1822 return nil, fmt.Errorf("failed to list jobs for subpool %s: %w", subpoolkey, err) 1823 } 1824 sp.log.WithField("subpool", subpoolkey).WithField("pj_count", len(pjs.Items)).Debug("Found prowjobs") 1825 sps[subpoolkey].pjs = pjs.Items 1826 } 1827 return sps, nil 1828 } 1829 1830 // PullRequest holds graphql data about a PR, including its commits and their 1831 // contexts. 1832 // This struct is GitHub specific 1833 type PullRequest struct { 1834 Number githubql.Int 1835 Author struct { 1836 Login githubql.String 1837 } 1838 BaseRef struct { 1839 Name githubql.String 1840 Prefix githubql.String 1841 } 1842 HeadRefName githubql.String `graphql:"headRefName"` 1843 HeadRefOID githubql.String `graphql:"headRefOid"` 1844 Mergeable githubql.MergeableState 1845 CanBeRebased githubql.Boolean `graphql:"canBeRebased"` 1846 Repository struct { 1847 Name githubql.String 1848 NameWithOwner githubql.String 1849 Owner struct { 1850 Login githubql.String 1851 } 1852 } 1853 ReviewDecision githubql.PullRequestReviewDecision `graphql:"reviewDecision"` 1854 // Request the 'last' 4 commits hoping that one of them is the logically 'last' 1855 // commit with OID matching HeadRefOID. If we don't find it we have to use an 1856 // additional API token. (see the 'headContexts' func for details) 1857 // We can't raise this too much or we could hit the limit of 50,000 nodes 1858 // per query: https://developer.github.com/v4/guides/resource-limitations/#node-limit 1859 Commits Commits `graphql:"commits(last: 4)"` 1860 Labels Labels `graphql:"labels(first: 100)"` 1861 Milestone *Milestone 1862 Body githubql.String 1863 Title githubql.String 1864 UpdatedAt githubql.DateTime 1865 } 1866 1867 func (pr *PullRequest) logFields() logrus.Fields { 1868 return logrus.Fields{ 1869 "org": pr.Repository.Owner.Login, 1870 "repo": pr.Repository.Name, 1871 "pr": pr.Number, 1872 "branch": pr.BaseRef.Name, 1873 "sha": pr.HeadRefOID, 1874 } 1875 } 1876 1877 type Labels struct { 1878 Nodes []struct { 1879 Name githubql.String 1880 } 1881 } 1882 1883 type Milestone struct { 1884 Title githubql.String 1885 } 1886 1887 type Commits struct { 1888 Nodes []struct { 1889 Commit Commit 1890 } 1891 } 1892 1893 type CommitNode struct { 1894 Commit Commit 1895 } 1896 1897 // Commit holds graphql data about commits and which contexts they have 1898 type Commit struct { 1899 Status CommitStatus 1900 OID githubql.String `graphql:"oid"` 1901 StatusCheckRollup StatusCheckRollup 1902 } 1903 1904 type CommitStatus struct { 1905 Contexts []Context 1906 } 1907 1908 type StatusCheckRollup struct { 1909 Contexts StatusCheckRollupContext `graphql:"contexts(last: 100)"` 1910 } 1911 1912 type StatusCheckRollupContext struct { 1913 Nodes []CheckRunNode 1914 } 1915 1916 type CheckRunNode struct { 1917 CheckRun CheckRun `graphql:"... on CheckRun"` 1918 } 1919 1920 type CheckRun struct { 1921 Name githubql.String 1922 Conclusion githubql.String 1923 Status githubql.String 1924 } 1925 1926 // Context holds graphql response data for github contexts. 1927 type Context struct { 1928 // Context is the name of the context, it's identical to the full name of a 1929 // prowjob if the context is for a prowjob. 1930 Context githubql.String 1931 // Description is the description for a context, it's formed by 1932 // config.ContextDescriptionWithBaseSha for a prowjob. 1933 Description githubql.String 1934 // State is the state for a prowjob: EXPECTED, ERROR, FAILURE, PENDING, SUCCESS. 1935 State githubql.StatusState 1936 } 1937 1938 type PRNode struct { 1939 PullRequest PullRequest `graphql:"... on PullRequest"` 1940 } 1941 1942 type searchQuery struct { 1943 RateLimit struct { 1944 Cost githubql.Int 1945 Remaining githubql.Int 1946 } 1947 Search struct { 1948 PageInfo struct { 1949 HasNextPage githubql.Boolean 1950 EndCursor githubql.String 1951 } 1952 Nodes []PRNode 1953 } `graphql:"search(type: ISSUE, first: 37, after: $searchCursor, query: $query)"` 1954 } 1955 1956 // orgRepoQueryStrings returns the GitHub query strings for given orgs and 1957 // repos. Make sure that this is only used by GitHub interactor. 1958 func orgRepoQueryStrings(orgs, repos []string, orgExceptions map[string]sets.Set[string]) map[string]string { 1959 queriesByOrg := map[string]string{} 1960 1961 for _, org := range orgs { 1962 queriesByOrg[org] = fmt.Sprintf(`org:"%s"`, org) 1963 1964 for _, exception := range sets.List(orgExceptions[org]) { 1965 queriesByOrg[org] += fmt.Sprintf(` -repo:"%s"`, exception) 1966 } 1967 } 1968 1969 for _, repo := range repos { 1970 if org, _, ok := splitOrgRepoString(repo); ok { 1971 queriesByOrg[org] += fmt.Sprintf(` repo:"%s"`, repo) 1972 } 1973 } 1974 1975 return queriesByOrg 1976 } 1977 1978 // splitOrgRepoString is used only by orgRepoQueryStrings, which is only used by 1979 // GitHub related functions. 1980 func splitOrgRepoString(orgRepo string) (string, string, bool) { 1981 split := strings.Split(orgRepo, "/") 1982 if len(split) != 2 { 1983 // Just do it like the github search itself and ignore invalid orgRepo identifiers 1984 return "", "", false 1985 } 1986 return split[0], split[1], true 1987 } 1988 1989 // cacheIndexName is the name of the index that indexes presubmit+batch ProwJobs by 1990 // org+repo+branch+baseSHA. Use the cacheIndexKey func to get the correct key. 1991 const cacheIndexName = "tide-global-index" 1992 1993 // cacheIndexKey returns the index key for the tideCacheIndex 1994 func cacheIndexKey(org, repo, branch, baseSHA string) string { 1995 return fmt.Sprintf("%s/%s:%s@%s", org, repo, branch, baseSHA) 1996 } 1997 1998 // cacheIndexFunc ensures that the passed in Prowjob is only batch job. 1999 // 2000 // Used only by manager.FieldIndexer, so that only batch job is indexed. 2001 func cacheIndexFunc(obj ctrlruntimeclient.Object) []string { 2002 pj := obj.(*prowapi.ProwJob) 2003 // We do not care about jobs other than presubmit and batch 2004 if pj.Spec.Type != prowapi.PresubmitJob && pj.Spec.Type != prowapi.BatchJob { 2005 return nil 2006 } 2007 if pj.Spec.Refs == nil { 2008 return nil 2009 } 2010 return []string{cacheIndexKey(pj.Spec.Refs.Org, pj.Spec.Refs.Repo, pj.Spec.Refs.BaseRef, pj.Spec.Refs.BaseSHA)} 2011 } 2012 2013 // nonFailedBatchByNameBaseAndPullsIndexName is used as the key of a label, for 2014 // non failed batching job. Use the nonFailedBatchByNameBaseAndPullsIndexKey 2015 // function to get the correct value. 2016 const nonFailedBatchByNameBaseAndPullsIndexName = "tide-non-failed-jobs-by-name-base-and-pulls" 2017 2018 // nonFailedBatchByNameBaseAndPullsIndexKey collects the PR numbers and SHAs from 2019 // the batch job, and returns a string contain all of them. This is used only by 2020 // nonFailedBatchByNameBaseAndPullsIndexFunc. 2021 func nonFailedBatchByNameBaseAndPullsIndexKey(jobName string, refs *prowapi.Refs) string { 2022 // sort the pulls to make sure this is deterministic 2023 sort.Slice(refs.Pulls, func(i, j int) bool { 2024 return refs.Pulls[i].Number < refs.Pulls[j].Number 2025 }) 2026 2027 keys := []string{jobName, refs.Org, refs.Repo, refs.BaseRef, refs.BaseSHA} 2028 for _, pull := range refs.Pulls { 2029 keys = append(keys, strconv.Itoa(pull.Number), pull.SHA) 2030 } 2031 2032 return strings.Join(keys, "|") 2033 } 2034 2035 // nonFailedBatchByNameBaseAndPullsIndexFunc ensures that the passed in ProwJob 2036 // object is a succeeded batch job, and returns the key from the job. 2037 // 2038 // Used only by manager.FieldIndexer, so that only non failed batch job is indexed. 2039 func nonFailedBatchByNameBaseAndPullsIndexFunc(obj ctrlruntimeclient.Object) []string { 2040 pj := obj.(*prowapi.ProwJob) 2041 if pj.Spec.Type != prowapi.BatchJob || pj.Spec.Refs == nil { 2042 return nil 2043 } 2044 2045 if pj.Complete() && pj.Status.State != prowapi.SuccessState { 2046 return nil 2047 } 2048 2049 return []string{nonFailedBatchByNameBaseAndPullsIndexKey(pj.Spec.Job, pj.Spec.Refs)} 2050 } 2051 2052 func checkRunNodesToContexts(log *logrus.Entry, nodes []CheckRunNode) []Context { 2053 var result []Context 2054 for _, node := range nodes { 2055 // GitHub gives us an empty checkrun per status context. In theory they could 2056 // at some point decide to create a virtual check run per status context. 2057 // If that were to happen, we would retrieve redundant data as we get the 2058 // status context both directly as a status context and as a checkrun, however 2059 // the actual data in there should be identical, hence this isn't a problem. 2060 if string(node.CheckRun.Name) == "" { 2061 continue 2062 } 2063 result = append(result, checkRunToContext(node.CheckRun)) 2064 } 2065 result = deduplicateContexts(result) 2066 if len(result) > 0 { 2067 log.WithField("checkruns", len(result)).Debug("Transformed checkruns to contexts") 2068 } 2069 return result 2070 } 2071 2072 type descriptionAndState struct { 2073 description githubql.String 2074 state githubql.StatusState 2075 } 2076 2077 // deduplicateContexts deduplicates contexts, returning the best result for 2078 // contexts that have multiple entries. 2079 // 2080 // deduplicateContexts is used only by checkRunNodesToContexts. 2081 func deduplicateContexts(contexts []Context) []Context { 2082 result := map[githubql.String]descriptionAndState{} 2083 for _, context := range contexts { 2084 previousResult, found := result[context.Context] 2085 if !found { 2086 result[context.Context] = descriptionAndState{description: context.Description, state: context.State} 2087 continue 2088 } 2089 if isStateBetter(previousResult.state, context.State) { 2090 result[context.Context] = descriptionAndState{description: context.Description, state: context.State} 2091 } 2092 } 2093 2094 var resultSlice []Context 2095 for name, descriptionAndState := range result { 2096 resultSlice = append(resultSlice, Context{Context: name, Description: descriptionAndState.description, State: descriptionAndState.state}) 2097 } 2098 2099 return resultSlice 2100 } 2101 2102 // isStateBetter is used only by deduplicateContexts. 2103 func isStateBetter(previous, current githubql.StatusState) bool { 2104 if current == githubql.StatusStateSuccess { 2105 return true 2106 } 2107 if current == githubql.StatusStatePending && (previous == githubql.StatusStateError || previous == githubql.StatusStateFailure || previous == githubql.StatusStateExpected) { 2108 return true 2109 } 2110 if previous == githubql.StatusStateExpected && (current == githubql.StatusStateError || current == githubql.StatusStateFailure) { 2111 return true 2112 } 2113 2114 return false 2115 } 2116 2117 // checkRunToContext translates a checkRun to a classic context 2118 // ref: https://developer.github.com/v3/checks/runs/#parameters 2119 func checkRunToContext(checkRun CheckRun) Context { 2120 context := Context{ 2121 Context: checkRun.Name, 2122 } 2123 if checkRun.Status != githubql.String(githubql.CheckStatusStateCompleted) { 2124 context.State = githubql.StatusStatePending 2125 return context 2126 } 2127 2128 if checkRun.Conclusion == githubql.String(githubql.CheckConclusionStateNeutral) || checkRun.Conclusion == githubql.String(githubql.CheckConclusionStateSkipped) || checkRun.Conclusion == githubql.String(githubql.StatusStateSuccess) { 2129 context.State = githubql.StatusStateSuccess 2130 return context 2131 } 2132 2133 context.State = githubql.StatusStateFailure 2134 return context 2135 } 2136 2137 func pickBatchWithPreexistingTests(sp subpool, candidates []CodeReviewCommon, maxSize int) []CodeReviewCommon { 2138 batchCandidatesBySuccessfulJobCount := map[string]int{} 2139 batchCandidatesByPendingJobCount := map[string]int{} 2140 2141 prNumbersToMapKey := func(prs []prowapi.Pull) string { 2142 var numbers []string 2143 for _, pr := range prs { 2144 numbers = append(numbers, strconv.Itoa(pr.Number)) 2145 } 2146 return strings.Join(numbers, "|") 2147 } 2148 prNumbersFromMapKey := func(s string) []int { 2149 var result []int 2150 for _, element := range strings.Split(s, "|") { 2151 intVal, err := strconv.Atoi(element) 2152 if err != nil { 2153 logrus.WithField("element", element).Error("BUG: Found element in pr numbers map that was not parseable as int") 2154 return nil 2155 } 2156 result = append(result, intVal) 2157 } 2158 return result 2159 } 2160 for _, pj := range sp.pjs { 2161 if pj.Spec.Type != prowapi.BatchJob || (maxSize != 0 && len(pj.Spec.Refs.Pulls) > maxSize) || (pj.Status.State != prowapi.SuccessState && pj.Status.State != prowapi.PendingState) { 2162 continue 2163 } 2164 var hasInvalidPR bool 2165 for _, pull := range pj.Spec.Refs.Pulls { 2166 if !isPullInPRList(pull, candidates) { 2167 hasInvalidPR = true 2168 break 2169 } 2170 } 2171 if hasInvalidPR { 2172 continue 2173 } 2174 if pj.Status.State == prowapi.SuccessState { 2175 batchCandidatesBySuccessfulJobCount[prNumbersToMapKey(pj.Spec.Refs.Pulls)]++ 2176 } else { 2177 batchCandidatesByPendingJobCount[prNumbersToMapKey(pj.Spec.Refs.Pulls)]++ 2178 } 2179 } 2180 2181 var resultPullNumbers []int 2182 if len(batchCandidatesBySuccessfulJobCount) > 0 { 2183 resultPullNumbers = prNumbersFromMapKey(mapKeyWithHighestvalue(batchCandidatesBySuccessfulJobCount)) 2184 } else if len(batchCandidatesByPendingJobCount) > 0 { 2185 resultPullNumbers = prNumbersFromMapKey(mapKeyWithHighestvalue(batchCandidatesByPendingJobCount)) 2186 } 2187 2188 var result []CodeReviewCommon 2189 for _, resultPRNumber := range resultPullNumbers { 2190 for _, pr := range sp.prs { 2191 if pr.Number == resultPRNumber { 2192 result = append(result, pr) 2193 break 2194 } 2195 } 2196 } 2197 2198 return result 2199 } 2200 2201 func isPullInPRList(pull prowapi.Pull, allPRs []CodeReviewCommon) bool { 2202 for _, pullRequest := range allPRs { 2203 if pull.Number != int(pullRequest.Number) { 2204 continue 2205 } 2206 return pull.SHA == string(pullRequest.HeadRefOID) 2207 } 2208 2209 return false 2210 } 2211 2212 func mapKeyWithHighestvalue(m map[string]int) string { 2213 var result string 2214 var resultVal int 2215 for k, v := range m { 2216 if v > resultVal { 2217 result = k 2218 resultVal = v 2219 } 2220 } 2221 2222 return result 2223 } 2224 2225 // getBetterSimpleState returns the better simple state. It supports 2226 // no state, failure, pending and success. 2227 func getBetterSimpleState(a, b simpleState) simpleState { 2228 if a == "" || a == failureState || b == successState { 2229 // b can't be worse than no state or failure and a can't be beter than success 2230 return b 2231 } 2232 2233 // a must be pending and b can not be success, so b can't be better than a 2234 return a 2235 } 2236 2237 func requireManuallyTriggeredJobs(c *config.Config, org, repo, branch string) bool { 2238 options := config.ParseTideContextPolicyOptions(org, repo, branch, c.Tide.ContextOptions) 2239 if options.FromBranchProtection != nil && *options.FromBranchProtection { 2240 if b, err := c.BranchProtection.GetOrg(org).GetRepo(repo).GetBranch(branch); err == nil { 2241 if policy, err := c.GetPolicy(org, repo, branch, *b, []config.Presubmit{}, nil); err == nil && policy != nil { 2242 return policy.RequireManuallyTriggeredJobs != nil && *policy.RequireManuallyTriggeredJobs 2243 } 2244 } 2245 } 2246 return false 2247 }