github.com/yrj2011/jx-test-infra@v0.0.0-20190529031832-7a2065ee98eb/prow/jenkins/controller.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package jenkins 18 19 import ( 20 "bytes" 21 "fmt" 22 "strconv" 23 "sync" 24 25 "github.com/bwmarrin/snowflake" 26 "github.com/sirupsen/logrus" 27 28 "k8s.io/test-infra/prow/config" 29 "k8s.io/test-infra/prow/github" 30 "k8s.io/test-infra/prow/kube" 31 "k8s.io/test-infra/prow/pjutil" 32 reportlib "k8s.io/test-infra/prow/report" 33 ) 34 35 const ( 36 testInfra = "https://github.com/kubernetes/test-infra/issues" 37 ) 38 39 type kubeClient interface { 40 CreateProwJob(kube.ProwJob) (kube.ProwJob, error) 41 ListProwJobs(string) ([]kube.ProwJob, error) 42 ReplaceProwJob(string, kube.ProwJob) (kube.ProwJob, error) 43 } 44 45 type jenkinsClient interface { 46 Build(*kube.ProwJob, string) error 47 ListBuilds(jobs []string) (map[string]Build, error) 48 Abort(job string, build *Build) error 49 } 50 51 type githubClient interface { 52 BotName() (string, error) 53 CreateStatus(org, repo, ref string, s github.Status) error 54 ListIssueComments(org, repo string, number int) ([]github.IssueComment, error) 55 CreateComment(org, repo string, number int, comment string) error 56 DeleteComment(org, repo string, ID int) error 57 EditComment(org, repo string, ID int, comment string) error 58 GetPullRequestChanges(org, repo string, number int) ([]github.PullRequestChange, error) 59 } 60 61 type configAgent interface { 62 Config() *config.Config 63 } 64 65 type syncFn func(kube.ProwJob, chan<- kube.ProwJob, map[string]Build) error 66 67 // Controller manages ProwJobs. 68 type Controller struct { 69 kc kubeClient 70 jc jenkinsClient 71 ghc githubClient 72 log *logrus.Entry 73 ca configAgent 74 node *snowflake.Node 75 totURL string 76 // selector that will be applied on prowjobs. 77 selector string 78 79 lock sync.RWMutex 80 // pendingJobs is a short-lived cache that helps in limiting 81 // the maximum concurrency of jobs. 82 pendingJobs map[string]int 83 84 pjLock sync.RWMutex 85 // shared across the controller and a goroutine that gathers metrics. 86 pjs []kube.ProwJob 87 } 88 89 // NewController creates a new Controller from the provided clients. 90 func NewController(kc *kube.Client, jc *Client, ghc *github.Client, logger *logrus.Entry, ca *config.Agent, totURL, selector string) (*Controller, error) { 91 n, err := snowflake.NewNode(1) 92 if err != nil { 93 return nil, err 94 } 95 if logger == nil { 96 logger = logrus.NewEntry(logrus.StandardLogger()) 97 } 98 return &Controller{ 99 kc: kc, 100 jc: jc, 101 ghc: ghc, 102 log: logger, 103 ca: ca, 104 selector: selector, 105 node: n, 106 totURL: totURL, 107 pendingJobs: make(map[string]int), 108 }, nil 109 } 110 111 func (c *Controller) config() config.Controller { 112 operators := c.ca.Config().JenkinsOperators 113 if len(operators) == 1 { 114 return operators[0].Controller 115 } 116 configured := make([]string, 0, len(operators)) 117 for _, cfg := range operators { 118 if cfg.LabelSelectorString == c.selector { 119 return cfg.Controller 120 } 121 configured = append(configured, cfg.LabelSelectorString) 122 } 123 if len(c.selector) == 0 { 124 c.log.Panicf("You need to specify a non-empty --label-selector (existing selectors: %v).", configured) 125 } else { 126 c.log.Panicf("No config exists for --label-selector=%s.", c.selector) 127 } 128 return config.Controller{} 129 } 130 131 // canExecuteConcurrently checks whether the provided ProwJob can 132 // be executed concurrently. 133 func (c *Controller) canExecuteConcurrently(pj *kube.ProwJob) bool { 134 c.lock.Lock() 135 defer c.lock.Unlock() 136 137 if max := c.config().MaxConcurrency; max > 0 { 138 var running int 139 for _, num := range c.pendingJobs { 140 running += num 141 } 142 if running >= max { 143 c.log.WithFields(pjutil.ProwJobFields(pj)).Debugf("Not starting another job, already %d running.", running) 144 return false 145 } 146 } 147 148 if pj.Spec.MaxConcurrency == 0 { 149 c.pendingJobs[pj.Spec.Job]++ 150 return true 151 } 152 153 numPending := c.pendingJobs[pj.Spec.Job] 154 if numPending >= pj.Spec.MaxConcurrency { 155 c.log.WithFields(pjutil.ProwJobFields(pj)).Debugf("Not starting another instance of %s, already %d running.", pj.Spec.Job, numPending) 156 return false 157 } 158 c.pendingJobs[pj.Spec.Job]++ 159 return true 160 } 161 162 // incrementNumPendingJobs increments the amount of 163 // pending ProwJobs for the given job identifier 164 func (c *Controller) incrementNumPendingJobs(job string) { 165 c.lock.Lock() 166 defer c.lock.Unlock() 167 c.pendingJobs[job]++ 168 } 169 170 // Sync does one sync iteration. 171 func (c *Controller) Sync() error { 172 pjs, err := c.kc.ListProwJobs(c.selector) 173 if err != nil { 174 return fmt.Errorf("error listing prow jobs: %v", err) 175 } 176 // Share what we have for gathering metrics. 177 c.pjLock.Lock() 178 c.pjs = pjs 179 c.pjLock.Unlock() 180 181 // TODO: Replace the following filtering with a field selector once CRDs support field selectors. 182 // https://github.com/kubernetes/kubernetes/issues/53459 183 var jenkinsJobs []kube.ProwJob 184 for _, pj := range pjs { 185 if pj.Spec.Agent == kube.JenkinsAgent { 186 jenkinsJobs = append(jenkinsJobs, pj) 187 } 188 } 189 pjs = jenkinsJobs 190 jbs, err := c.jc.ListBuilds(getJenkinsJobs(pjs)) 191 if err != nil { 192 return fmt.Errorf("error listing jenkins builds: %v", err) 193 } 194 195 var syncErrs []error 196 if err := c.terminateDupes(pjs, jbs); err != nil { 197 syncErrs = append(syncErrs, err) 198 } 199 200 pendingCh, triggeredCh := pjutil.PartitionActive(pjs) 201 errCh := make(chan error, len(pjs)) 202 reportCh := make(chan kube.ProwJob, len(pjs)) 203 204 // Reinstantiate on every resync of the controller instead of trying 205 // to keep this in sync with the state of the world. 206 c.pendingJobs = make(map[string]int) 207 // Sync pending jobs first so we can determine what is the maximum 208 // number of new jobs we can trigger when syncing the non-pendings. 209 maxSyncRoutines := c.config().MaxGoroutines 210 c.log.Debugf("Handling %d pending prowjobs", len(pendingCh)) 211 syncProwJobs(c.log, c.syncPendingJob, maxSyncRoutines, pendingCh, reportCh, errCh, jbs) 212 c.log.Debugf("Handling %d triggered prowjobs", len(triggeredCh)) 213 syncProwJobs(c.log, c.syncTriggeredJob, maxSyncRoutines, triggeredCh, reportCh, errCh, jbs) 214 215 close(errCh) 216 close(reportCh) 217 218 for err := range errCh { 219 syncErrs = append(syncErrs, err) 220 } 221 222 var reportErrs []error 223 reportTemplate := c.config().ReportTemplate 224 for report := range reportCh { 225 if err := reportlib.Report(c.ghc, reportTemplate, report); err != nil { 226 reportErrs = append(reportErrs, err) 227 c.log.WithFields(pjutil.ProwJobFields(&report)).WithError(err).Warn("Failed to report ProwJob status") 228 } 229 } 230 231 if len(syncErrs) == 0 && len(reportErrs) == 0 { 232 return nil 233 } 234 return fmt.Errorf("errors syncing: %v, errors reporting: %v", syncErrs, reportErrs) 235 } 236 237 // SyncMetrics records metrics for the cached prowjobs. 238 func (c *Controller) SyncMetrics() { 239 c.pjLock.RLock() 240 defer c.pjLock.RUnlock() 241 kube.GatherProwJobMetrics(c.pjs) 242 } 243 244 // getJenkinsJobs returns all the Jenkins jobs for all active 245 // prowjobs from the provided list. It handles deduplication. 246 func getJenkinsJobs(pjs []kube.ProwJob) []string { 247 jenkinsJobs := make(map[string]struct{}) 248 for _, pj := range pjs { 249 if pj.Complete() { 250 continue 251 } 252 jenkinsJobs[pj.Spec.Job] = struct{}{} 253 } 254 var jobs []string 255 for job := range jenkinsJobs { 256 jobs = append(jobs, job) 257 } 258 return jobs 259 } 260 261 // terminateDupes aborts presubmits that have a newer version. It modifies pjs 262 // in-place when it aborts. 263 func (c *Controller) terminateDupes(pjs []kube.ProwJob, jbs map[string]Build) error { 264 // "job org/repo#number" -> newest job 265 dupes := make(map[string]int) 266 for i, pj := range pjs { 267 if pj.Complete() || pj.Spec.Type != kube.PresubmitJob { 268 continue 269 } 270 n := fmt.Sprintf("%s %s/%s#%d", pj.Spec.Job, pj.Spec.Refs.Org, pj.Spec.Refs.Repo, pj.Spec.Refs.Pulls[0].Number) 271 prev, ok := dupes[n] 272 if !ok { 273 dupes[n] = i 274 continue 275 } 276 cancelIndex := i 277 if (&pjs[prev].Status.StartTime).Before(&pj.Status.StartTime) { 278 cancelIndex = prev 279 dupes[n] = i 280 } 281 toCancel := pjs[cancelIndex] 282 // Allow aborting presubmit jobs for commits that have been superseded by 283 // newer commits in Github pull requests. 284 if c.config().AllowCancellations { 285 build, buildExists := jbs[toCancel.ObjectMeta.Name] 286 // Avoid cancelling enqueued builds. 287 if buildExists && build.IsEnqueued() { 288 continue 289 } 290 // Otherwise, abort it. 291 if buildExists { 292 if err := c.jc.Abort(toCancel.Spec.Job, &build); err != nil { 293 c.log.WithError(err).WithFields(pjutil.ProwJobFields(&toCancel)).Warn("Cannot cancel Jenkins build") 294 } 295 } 296 } 297 toCancel.SetComplete() 298 prevState := toCancel.Status.State 299 toCancel.Status.State = kube.AbortedState 300 c.log.WithFields(pjutil.ProwJobFields(&toCancel)). 301 WithField("from", prevState). 302 WithField("to", toCancel.Status.State).Info("Transitioning states.") 303 npj, err := c.kc.ReplaceProwJob(toCancel.ObjectMeta.Name, toCancel) 304 if err != nil { 305 return err 306 } 307 pjs[cancelIndex] = npj 308 } 309 return nil 310 } 311 312 func syncProwJobs( 313 l *logrus.Entry, 314 syncFn syncFn, 315 maxSyncRoutines int, 316 jobs <-chan kube.ProwJob, 317 reports chan<- kube.ProwJob, 318 syncErrors chan<- error, 319 jbs map[string]Build, 320 ) { 321 goroutines := maxSyncRoutines 322 if goroutines > len(jobs) { 323 goroutines = len(jobs) 324 } 325 wg := &sync.WaitGroup{} 326 wg.Add(goroutines) 327 l.Debugf("Firing up %d goroutines", goroutines) 328 for i := 0; i < goroutines; i++ { 329 go func() { 330 defer wg.Done() 331 for pj := range jobs { 332 if err := syncFn(pj, reports, jbs); err != nil { 333 syncErrors <- err 334 } 335 } 336 }() 337 } 338 wg.Wait() 339 } 340 341 func (c *Controller) syncPendingJob(pj kube.ProwJob, reports chan<- kube.ProwJob, jbs map[string]Build) error { 342 // Record last known state so we can log state transitions. 343 prevState := pj.Status.State 344 345 jb, jbExists := jbs[pj.ObjectMeta.Name] 346 if !jbExists { 347 pj.SetComplete() 348 pj.Status.State = kube.ErrorState 349 pj.Status.URL = testInfra 350 pj.Status.Description = "Error finding Jenkins job." 351 } else { 352 switch { 353 case jb.IsEnqueued(): 354 // Still in queue. 355 c.incrementNumPendingJobs(pj.Spec.Job) 356 return nil 357 358 case jb.IsRunning(): 359 // Build still going. 360 c.incrementNumPendingJobs(pj.Spec.Job) 361 if pj.Status.Description == "Jenkins job running." { 362 return nil 363 } 364 pj.Status.Description = "Jenkins job running." 365 366 case jb.IsSuccess(): 367 // Build is complete. 368 pj.SetComplete() 369 pj.Status.State = kube.SuccessState 370 pj.Status.Description = "Jenkins job succeeded." 371 for _, nj := range pj.Spec.RunAfterSuccess { 372 child := pjutil.NewProwJob(nj, pj.ObjectMeta.Labels) 373 if !c.RunAfterSuccessCanRun(&pj, &child, c.ca, c.ghc) { 374 continue 375 } 376 if _, err := c.kc.CreateProwJob(pjutil.NewProwJob(nj, pj.ObjectMeta.Labels)); err != nil { 377 return fmt.Errorf("error starting next prowjob: %v", err) 378 } 379 } 380 381 case jb.IsFailure(): 382 pj.SetComplete() 383 pj.Status.State = kube.FailureState 384 pj.Status.Description = "Jenkins job failed." 385 386 case jb.IsAborted(): 387 pj.SetComplete() 388 pj.Status.State = kube.AbortedState 389 pj.Status.Description = "Jenkins job aborted." 390 } 391 // Construct the status URL that will be used in reports. 392 pj.Status.PodName = pj.ObjectMeta.Name 393 pj.Status.BuildID = jb.BuildID() 394 pj.Status.JenkinsBuildID = strconv.Itoa(jb.Number) 395 var b bytes.Buffer 396 if err := c.config().JobURLTemplate.Execute(&b, &pj); err != nil { 397 c.log.WithFields(pjutil.ProwJobFields(&pj)).Errorf("error executing URL template: %v", err) 398 } else { 399 pj.Status.URL = b.String() 400 } 401 } 402 // Report to Github. 403 reports <- pj 404 if prevState != pj.Status.State { 405 c.log.WithFields(pjutil.ProwJobFields(&pj)). 406 WithField("from", prevState). 407 WithField("to", pj.Status.State).Info("Transitioning states.") 408 } 409 _, err := c.kc.ReplaceProwJob(pj.ObjectMeta.Name, pj) 410 return err 411 } 412 413 func (c *Controller) syncTriggeredJob(pj kube.ProwJob, reports chan<- kube.ProwJob, jbs map[string]Build) error { 414 // Record last known state so we can log state transitions. 415 prevState := pj.Status.State 416 417 if _, jbExists := jbs[pj.ObjectMeta.Name]; !jbExists { 418 // Do not start more jobs than specified. 419 if !c.canExecuteConcurrently(&pj) { 420 return nil 421 } 422 buildID, err := c.getBuildID(pj.Spec.Job) 423 if err != nil { 424 return fmt.Errorf("error getting build ID: %v", err) 425 } 426 // Start the Jenkins job. 427 if err := c.jc.Build(&pj, buildID); err != nil { 428 c.log.WithError(err).WithFields(pjutil.ProwJobFields(&pj)).Warn("Cannot start Jenkins build") 429 pj.SetComplete() 430 pj.Status.State = kube.ErrorState 431 pj.Status.URL = testInfra 432 pj.Status.Description = "Error starting Jenkins job." 433 } else { 434 pj.Status.State = kube.PendingState 435 pj.Status.Description = "Jenkins job enqueued." 436 } 437 } else { 438 // If a Jenkins build already exists for this job, advance the ProwJob to Pending and 439 // it should be handled by syncPendingJob in the next sync. 440 pj.Status.State = kube.PendingState 441 pj.Status.Description = "Jenkins job enqueued." 442 } 443 // Report to Github. 444 reports <- pj 445 446 if prevState != pj.Status.State { 447 c.log.WithFields(pjutil.ProwJobFields(&pj)). 448 WithField("from", prevState). 449 WithField("to", pj.Status.State).Info("Transitioning states.") 450 } 451 _, err := c.kc.ReplaceProwJob(pj.ObjectMeta.Name, pj) 452 return err 453 } 454 455 func (c *Controller) getBuildID(name string) (string, error) { 456 if c.totURL == "" { 457 return c.node.Generate().String(), nil 458 } 459 return pjutil.GetBuildID(name, c.totURL) 460 } 461 462 // RunAfterSuccessCanRun returns whether a child job (specified as run_after_success in the 463 // prow config) can run once its parent job succeeds. The only case we will not run a child job 464 // is when it is a presubmit job and has a run_if_changed regular expression specified which does 465 // not match the changed filenames in the pull request the job was meant to run for. 466 // TODO: Collapse with plank, impossible to reuse as is due to the interfaces. 467 func (c *Controller) RunAfterSuccessCanRun(parent, child *kube.ProwJob, ca configAgent, ghc githubClient) bool { 468 if parent.Spec.Type != kube.PresubmitJob { 469 return true 470 } 471 472 // TODO: Make sure that parent and child have always the same org/repo. 473 org := parent.Spec.Refs.Org 474 repo := parent.Spec.Refs.Repo 475 prNum := parent.Spec.Refs.Pulls[0].Number 476 477 ps := ca.Config().GetPresubmit(org+"/"+repo, child.Spec.Job) 478 if ps == nil { 479 // The config has changed ever since we started the parent. 480 // Not sure what is more correct here. Run the child for now. 481 return true 482 } 483 if ps.RunIfChanged == "" { 484 return true 485 } 486 changesFull, err := ghc.GetPullRequestChanges(org, repo, prNum) 487 if err != nil { 488 c.log.WithError(err).WithFields(pjutil.ProwJobFields(parent)).Warnf("Cannot get PR changes for #%d", prNum) 489 return true 490 } 491 // We only care about the filenames here 492 var changes []string 493 for _, change := range changesFull { 494 changes = append(changes, change.Filename) 495 } 496 return ps.RunsAgainstChanges(changes) 497 }