sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/jenkins/controller.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package jenkins 18 19 import ( 20 "bytes" 21 "context" 22 "fmt" 23 "strconv" 24 "sync" 25 26 "github.com/bwmarrin/snowflake" 27 "github.com/sirupsen/logrus" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 ktypes "k8s.io/apimachinery/pkg/types" 30 "k8s.io/utils/clock" 31 prowv1 "sigs.k8s.io/prow/pkg/client/clientset/versioned/typed/prowjobs/v1" 32 33 prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1" 34 "sigs.k8s.io/prow/pkg/config" 35 "sigs.k8s.io/prow/pkg/github" 36 reportlib "sigs.k8s.io/prow/pkg/github/report" 37 "sigs.k8s.io/prow/pkg/kube" 38 "sigs.k8s.io/prow/pkg/pjutil" 39 ) 40 41 type prowJobClient interface { 42 Create(context.Context, *prowapi.ProwJob, metav1.CreateOptions) (*prowapi.ProwJob, error) 43 List(context.Context, metav1.ListOptions) (*prowapi.ProwJobList, error) 44 Patch(ctx context.Context, name string, pt ktypes.PatchType, data []byte, o metav1.PatchOptions, subresources ...string) (result *prowapi.ProwJob, err error) 45 } 46 47 type jenkinsClient interface { 48 Build(*prowapi.ProwJob, string) error 49 ListBuilds(jobs []BuildQueryParams) (map[string]Build, error) 50 Abort(job string, build *Build) error 51 } 52 53 type githubClient interface { 54 reportlib.GitHubClient 55 GetPullRequestChanges(org, repo string, number int) ([]github.PullRequestChange, error) 56 } 57 58 type syncFn func(prowapi.ProwJob, chan<- prowapi.ProwJob, map[string]Build) error 59 60 // Controller manages ProwJobs. 61 type Controller struct { 62 prowJobClient prowJobClient 63 jc jenkinsClient 64 ghc githubClient 65 log *logrus.Entry 66 cfg config.Getter 67 node *snowflake.Node 68 totURL string 69 // if skip report job results to github 70 skipReport bool 71 // selector that will be applied on prowjobs. 72 selector string 73 74 lock sync.RWMutex 75 // pendingJobs is a short-lived cache that helps in limiting 76 // the maximum concurrency of jobs. 77 pendingJobs map[string]int 78 79 pjLock sync.RWMutex 80 // shared across the controller and a goroutine that gathers metrics. 81 pjs []prowapi.ProwJob 82 clock clock.WithTickerAndDelayedExecution 83 } 84 85 // NewController creates a new Controller from the provided clients. 86 func NewController(prowJobClient prowv1.ProwJobInterface, jc *Client, ghc github.Client, logger *logrus.Entry, cfg config.Getter, totURL, selector string, skipReport bool) (*Controller, error) { 87 n, err := snowflake.NewNode(1) 88 if err != nil { 89 return nil, err 90 } 91 if logger == nil { 92 logger = logrus.NewEntry(logrus.StandardLogger()) 93 } 94 return &Controller{ 95 prowJobClient: prowJobClient, 96 jc: jc, 97 ghc: ghc, 98 log: logger, 99 cfg: cfg, 100 selector: selector, 101 node: n, 102 totURL: totURL, 103 skipReport: skipReport, 104 pendingJobs: make(map[string]int), 105 clock: clock.RealClock{}, 106 }, nil 107 } 108 109 func (c *Controller) config() config.Controller { 110 operators := c.cfg().JenkinsOperators 111 if len(operators) == 1 { 112 return operators[0].Controller 113 } 114 configured := make([]string, 0, len(operators)) 115 for _, cfg := range operators { 116 if cfg.LabelSelectorString == c.selector { 117 return cfg.Controller 118 } 119 configured = append(configured, cfg.LabelSelectorString) 120 } 121 if len(c.selector) == 0 { 122 c.log.Panicf("You need to specify a non-empty --label-selector (existing selectors: %v).", configured) 123 } else { 124 c.log.Panicf("No config exists for --label-selector=%s.", c.selector) 125 } 126 return config.Controller{} 127 } 128 129 // canExecuteConcurrently checks whether the provided ProwJob can 130 // be executed concurrently. 131 func (c *Controller) canExecuteConcurrently(pj *prowapi.ProwJob) bool { 132 c.lock.Lock() 133 defer c.lock.Unlock() 134 135 if max := c.config().MaxConcurrency; max > 0 { 136 var running int 137 for _, num := range c.pendingJobs { 138 running += num 139 } 140 if running >= max { 141 c.log.WithFields(pjutil.ProwJobFields(pj)).Debugf("Not starting another job, already %d running.", running) 142 return false 143 } 144 } 145 146 if pj.Spec.MaxConcurrency == 0 { 147 c.pendingJobs[pj.Spec.Job]++ 148 return true 149 } 150 151 numPending := c.pendingJobs[pj.Spec.Job] 152 if numPending >= pj.Spec.MaxConcurrency { 153 c.log.WithFields(pjutil.ProwJobFields(pj)).Debugf("Not starting another instance of %s, already %d running.", pj.Spec.Job, numPending) 154 return false 155 } 156 c.pendingJobs[pj.Spec.Job]++ 157 return true 158 } 159 160 // incrementNumPendingJobs increments the amount of 161 // pending ProwJobs for the given job identifier 162 func (c *Controller) incrementNumPendingJobs(job string) { 163 c.lock.Lock() 164 defer c.lock.Unlock() 165 c.pendingJobs[job]++ 166 } 167 168 // Sync does one sync iteration. 169 func (c *Controller) Sync() error { 170 pjs, err := c.prowJobClient.List(context.TODO(), metav1.ListOptions{LabelSelector: c.selector}) 171 if err != nil { 172 return fmt.Errorf("error listing prow jobs: %w", err) 173 } 174 // Share what we have for gathering metrics. 175 c.pjLock.Lock() 176 c.pjs = pjs.Items 177 c.pjLock.Unlock() 178 179 // TODO: Replace the following filtering with a field selector once CRDs support field selectors. 180 // https://github.com/kubernetes/kubernetes/issues/53459 181 var jenkinsJobs []prowapi.ProwJob 182 for _, pj := range pjs.Items { 183 if pj.Spec.Agent == prowapi.JenkinsAgent { 184 jenkinsJobs = append(jenkinsJobs, pj) 185 } 186 } 187 jbs, err := c.jc.ListBuilds(getJenkinsJobs(jenkinsJobs)) 188 if err != nil { 189 return fmt.Errorf("error listing jenkins builds: %w", err) 190 } 191 192 var syncErrs []error 193 if err := c.terminateDupes(jenkinsJobs, jbs); err != nil { 194 syncErrs = append(syncErrs, err) 195 } 196 197 pendingCh, triggeredCh, abortedCh := pjutil.PartitionActive(jenkinsJobs) 198 errCh := make(chan error, len(jenkinsJobs)) 199 reportCh := make(chan prowapi.ProwJob, len(jenkinsJobs)) 200 201 // Reinstantiate on every resync of the controller instead of trying 202 // to keep this in sync with the state of the world. 203 c.pendingJobs = make(map[string]int) 204 // Sync pending jobs first so we can determine what is the maximum 205 // number of new jobs we can trigger when syncing the non-pendings. 206 maxSyncRoutines := c.config().MaxGoroutines 207 c.log.Debugf("Handling %d pending prowjobs", len(pendingCh)) 208 syncProwJobs(c.log, c.syncPendingJob, maxSyncRoutines, pendingCh, reportCh, errCh, jbs) 209 c.log.Debugf("Handling %d triggered prowjobs", len(triggeredCh)) 210 syncProwJobs(c.log, c.syncTriggeredJob, maxSyncRoutines, triggeredCh, reportCh, errCh, jbs) 211 c.log.Debugf("Handling %d aborted prowjobs", len(abortedCh)) 212 syncProwJobs(c.log, c.syncAbortedJob, maxSyncRoutines, abortedCh, reportCh, errCh, jbs) 213 214 close(errCh) 215 close(reportCh) 216 217 for err := range errCh { 218 syncErrs = append(syncErrs, err) 219 } 220 221 var reportErrs []error 222 if !c.skipReport { 223 reportConfig := c.cfg().GitHubReporter 224 jConfig := c.config() 225 for report := range reportCh { 226 reportTemplate := jConfig.ReportTemplateForRepo(report.Spec.Refs) 227 if err := reportlib.Report(context.Background(), c.ghc, reportTemplate, report, reportConfig); err != nil { 228 reportErrs = append(reportErrs, err) 229 c.log.WithFields(pjutil.ProwJobFields(&report)).WithError(err).Warn("Failed to report ProwJob status") 230 } 231 } 232 } 233 234 if len(syncErrs) == 0 && len(reportErrs) == 0 { 235 return nil 236 } 237 return fmt.Errorf("errors syncing: %v, errors reporting: %v", syncErrs, reportErrs) 238 } 239 240 // SyncMetrics records metrics for the cached prowjobs. 241 func (c *Controller) SyncMetrics() { 242 c.pjLock.RLock() 243 defer c.pjLock.RUnlock() 244 kube.GatherProwJobMetrics(c.log, c.pjs) 245 } 246 247 // getJenkinsJobs returns all the Jenkins jobs for all active 248 // prowjobs from the provided list. It handles deduplication. 249 func getJenkinsJobs(pjs []prowapi.ProwJob) []BuildQueryParams { 250 jenkinsJobs := []BuildQueryParams{} 251 252 for _, pj := range pjs { 253 if pj.Complete() { 254 continue 255 } 256 257 jenkinsJobs = append(jenkinsJobs, BuildQueryParams{ 258 JobName: getJobName(&pj.Spec), 259 ProwJobID: pj.Name, 260 }) 261 } 262 263 return jenkinsJobs 264 } 265 266 // terminateDupes aborts presubmits that have a newer version. It modifies pjs 267 // in-place when it aborts. 268 func (c *Controller) terminateDupes(pjs []prowapi.ProwJob, jbs map[string]Build) error { 269 // "job org/repo#number" -> newest job 270 dupes := make(map[string]int) 271 for i, pj := range pjs { 272 if pj.Complete() || pj.Spec.Type != prowapi.PresubmitJob { 273 continue 274 } 275 n := fmt.Sprintf("%s %s/%s#%d", pj.Spec.Job, pj.Spec.Refs.Org, pj.Spec.Refs.Repo, pj.Spec.Refs.Pulls[0].Number) 276 prev, ok := dupes[n] 277 if !ok { 278 dupes[n] = i 279 continue 280 } 281 cancelIndex := i 282 if (&pjs[prev].Status.StartTime).Before(&pj.Status.StartTime) { 283 cancelIndex = prev 284 dupes[n] = i 285 } 286 toCancel := pjs[cancelIndex] 287 288 // Abort presubmit jobs for commits that have been superseded by 289 // newer commits in GitHub pull requests. 290 build, buildExists := jbs[toCancel.ObjectMeta.Name] 291 // Avoid cancelling enqueued builds. 292 if buildExists && build.IsEnqueued() { 293 continue 294 } 295 // Otherwise, abort it. 296 if buildExists { 297 if err := c.jc.Abort(getJobName(&toCancel.Spec), &build); err != nil { 298 c.log.WithError(err).WithFields(pjutil.ProwJobFields(&toCancel)).Warn("Cannot cancel Jenkins build") 299 } 300 } 301 302 srcPJ := toCancel.DeepCopy() 303 toCancel.SetComplete() 304 prevState := toCancel.Status.State 305 toCancel.Status.State = prowapi.AbortedState 306 toCancel.Status.Description = "Aborted as the newer version of this job is running." 307 c.log.WithFields(pjutil.ProwJobFields(&toCancel)). 308 WithField("from", prevState). 309 WithField("to", toCancel.Status.State).Info("Transitioning states.") 310 npj, err := pjutil.PatchProwjob(context.TODO(), c.prowJobClient, c.log, *srcPJ, toCancel) 311 if err != nil { 312 return err 313 } 314 pjs[cancelIndex] = *npj 315 } 316 return nil 317 } 318 319 func syncProwJobs( 320 l *logrus.Entry, 321 syncFn syncFn, 322 maxSyncRoutines int, 323 jobs <-chan prowapi.ProwJob, 324 reports chan<- prowapi.ProwJob, 325 syncErrors chan<- error, 326 jbs map[string]Build, 327 ) { 328 goroutines := maxSyncRoutines 329 if goroutines > len(jobs) { 330 goroutines = len(jobs) 331 } 332 wg := &sync.WaitGroup{} 333 wg.Add(goroutines) 334 l.Debugf("Firing up %d goroutines", goroutines) 335 for i := 0; i < goroutines; i++ { 336 go func() { 337 defer wg.Done() 338 for pj := range jobs { 339 if err := syncFn(pj, reports, jbs); err != nil { 340 syncErrors <- err 341 } 342 } 343 }() 344 } 345 wg.Wait() 346 } 347 348 func (c *Controller) syncPendingJob(pj prowapi.ProwJob, reports chan<- prowapi.ProwJob, jbs map[string]Build) error { 349 // Record last known state so we can patch 350 prevPJ := pj.DeepCopy() 351 352 jb, jbExists := jbs[pj.ObjectMeta.Name] 353 if !jbExists { 354 pj.SetComplete() 355 pj.Status.State = prowapi.ErrorState 356 pj.Status.URL = c.cfg().StatusErrorLink 357 pj.Status.Description = "Error finding Jenkins job." 358 } else { 359 switch { 360 case jb.IsEnqueued(): 361 // Still in queue. 362 c.incrementNumPendingJobs(pj.Spec.Job) 363 return nil 364 365 case jb.IsRunning(): 366 // Build still going. 367 c.incrementNumPendingJobs(pj.Spec.Job) 368 if pj.Status.Description == "Jenkins job running." { 369 return nil 370 } 371 pj.Status.Description = "Jenkins job running." 372 373 case jb.IsSuccess(): 374 // Build is complete. 375 pj.SetComplete() 376 pj.Status.State = prowapi.SuccessState 377 pj.Status.Description = "Jenkins job succeeded." 378 379 case jb.IsFailure(): 380 pj.SetComplete() 381 pj.Status.State = prowapi.FailureState 382 pj.Status.Description = "Jenkins job failed." 383 384 case jb.IsAborted(): 385 pj.SetComplete() 386 pj.Status.State = prowapi.AbortedState 387 pj.Status.Description = "Jenkins job aborted." 388 } 389 // Construct the status URL that will be used in reports. 390 pj.Status.PodName = pj.ObjectMeta.Name 391 pj.Status.BuildID = jb.BuildID() 392 pj.Status.JenkinsBuildID = strconv.Itoa(jb.Number) 393 var b bytes.Buffer 394 if err := c.config().JobURLTemplate.Execute(&b, &pj); err != nil { 395 c.log.WithFields(pjutil.ProwJobFields(&pj)).Errorf("error executing URL template: %v", err) 396 } else { 397 pj.Status.URL = b.String() 398 } 399 } 400 // Report to GitHub. 401 reports <- pj 402 if prevPJ.Status.State != pj.Status.State { 403 c.log.WithFields(pjutil.ProwJobFields(&pj)). 404 WithField("from", prevPJ.Status.State). 405 WithField("to", pj.Status.State).Info("Transitioning states.") 406 } 407 _, err := pjutil.PatchProwjob(context.TODO(), c.prowJobClient, c.log, *prevPJ, pj) 408 return err 409 } 410 411 func (c *Controller) syncAbortedJob(pj prowapi.ProwJob, _ chan<- prowapi.ProwJob, jbs map[string]Build) error { 412 if pj.Status.State != prowapi.AbortedState || pj.Complete() { 413 return nil 414 } 415 416 if build, exists := jbs[pj.Name]; exists { 417 if err := c.jc.Abort(getJobName(&pj.Spec), &build); err != nil { 418 return fmt.Errorf("failed to abort Jenkins build: %w", err) 419 } 420 } 421 422 originalPJ := pj.DeepCopy() 423 pj.SetComplete() 424 _, err := pjutil.PatchProwjob(context.TODO(), c.prowJobClient, c.log, *originalPJ, pj) 425 return err 426 } 427 428 func (c *Controller) syncTriggeredJob(pj prowapi.ProwJob, reports chan<- prowapi.ProwJob, jbs map[string]Build) error { 429 // Record last known state so we can patch 430 prevPJ := pj.DeepCopy() 431 432 if _, jbExists := jbs[pj.ObjectMeta.Name]; !jbExists { 433 // Do not start more jobs than specified. 434 if !c.canExecuteConcurrently(&pj) { 435 return nil 436 } 437 buildID, err := c.getBuildID(pj.Spec.Job) 438 if err != nil { 439 return fmt.Errorf("error getting build ID: %w", err) 440 } 441 // Start the Jenkins job. 442 if err := c.jc.Build(&pj, buildID); err != nil { 443 c.log.WithError(err).WithFields(pjutil.ProwJobFields(&pj)).Warn("Cannot start Jenkins build") 444 pj.SetComplete() 445 pj.Status.State = prowapi.ErrorState 446 pj.Status.URL = c.cfg().StatusErrorLink 447 pj.Status.Description = "Error starting Jenkins job." 448 } else { 449 now := metav1.NewTime(c.clock.Now()) 450 pj.Status.PendingTime = &now 451 pj.Status.State = prowapi.PendingState 452 pj.Status.Description = "Jenkins job enqueued." 453 } 454 } else { 455 // If a Jenkins build already exists for this job, advance the ProwJob to Pending and 456 // it should be handled by syncPendingJob in the next sync. 457 if pj.Status.PendingTime == nil { 458 now := metav1.NewTime(c.clock.Now()) 459 pj.Status.PendingTime = &now 460 } 461 pj.Status.State = prowapi.PendingState 462 pj.Status.Description = "Jenkins job enqueued." 463 } 464 // Report to GitHub. 465 reports <- pj 466 467 if prevPJ.Status.State != pj.Status.State { 468 c.log.WithFields(pjutil.ProwJobFields(&pj)). 469 WithField("from", prevPJ.Status.State). 470 WithField("to", pj.Status.State).Info("Transitioning states.") 471 } 472 _, err := pjutil.PatchProwjob(context.TODO(), c.prowJobClient, c.log, *prevPJ, pj) 473 return err 474 } 475 476 func (c *Controller) getBuildID(name string) (string, error) { 477 if c.totURL == "" { 478 return c.node.Generate().String(), nil 479 } 480 return pjutil.GetBuildID(name, c.totURL) 481 }