github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/prow/jenkins/controller.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package jenkins 18 19 import ( 20 "bytes" 21 "fmt" 22 "strconv" 23 "sync" 24 "time" 25 26 "github.com/sirupsen/logrus" 27 28 "k8s.io/test-infra/prow/config" 29 "k8s.io/test-infra/prow/github" 30 "k8s.io/test-infra/prow/kube" 31 "k8s.io/test-infra/prow/pjutil" 32 reportlib "k8s.io/test-infra/prow/report" 33 ) 34 35 const ( 36 testInfra = "https://github.com/kubernetes/test-infra/issues" 37 38 // maxSyncRoutines is the maximum number of goroutines 39 // that will be active at any one time for the sync 40 maxSyncRoutines = 20 41 ) 42 43 type kubeClient interface { 44 CreateProwJob(kube.ProwJob) (kube.ProwJob, error) 45 ListProwJobs(map[string]string) ([]kube.ProwJob, error) 46 ReplaceProwJob(string, kube.ProwJob) (kube.ProwJob, error) 47 } 48 49 type jenkinsClient interface { 50 Build(*kube.ProwJob) error 51 ListJenkinsBuilds(jobs map[string]struct{}) (map[string]JenkinsBuild, error) 52 Abort(job string, build *JenkinsBuild) error 53 } 54 55 type githubClient interface { 56 BotName() (string, error) 57 CreateStatus(org, repo, ref string, s github.Status) error 58 ListIssueComments(org, repo string, number int) ([]github.IssueComment, error) 59 CreateComment(org, repo string, number int, comment string) error 60 DeleteComment(org, repo string, ID int) error 61 EditComment(org, repo string, ID int, comment string) error 62 GetPullRequestChanges(org, repo string, number int) ([]github.PullRequestChange, error) 63 } 64 65 type configAgent interface { 66 Config() *config.Config 67 } 68 69 type syncFn func(kube.ProwJob, chan<- kube.ProwJob, map[string]JenkinsBuild) error 70 71 // Controller manages ProwJobs. 72 type Controller struct { 73 kc kubeClient 74 jc jenkinsClient 75 ghc githubClient 76 ca configAgent 77 78 lock sync.RWMutex 79 // pendingJobs is a short-lived cache that helps in limiting 80 // the maximum concurrency of jobs. 81 pendingJobs map[string]int 82 } 83 84 // NewController creates a new Controller from the provided clients. 85 func NewController(kc *kube.Client, jc *Client, ghc *github.Client, ca *config.Agent) *Controller { 86 return &Controller{ 87 kc: kc, 88 jc: jc, 89 ghc: ghc, 90 ca: ca, 91 lock: sync.RWMutex{}, 92 pendingJobs: make(map[string]int), 93 } 94 } 95 96 // canExecuteConcurrently checks whether the provided ProwJob can 97 // be executed concurrently. 98 func (c *Controller) canExecuteConcurrently(pj *kube.ProwJob) bool { 99 c.lock.Lock() 100 defer c.lock.Unlock() 101 102 if max := c.ca.Config().JenkinsOperator.MaxConcurrency; max > 0 { 103 var running int 104 for _, num := range c.pendingJobs { 105 running += num 106 } 107 if running >= max { 108 logrus.Infof("Not starting another job, already %d running.", running) 109 return false 110 } 111 } 112 113 if pj.Spec.MaxConcurrency == 0 { 114 c.pendingJobs[pj.Spec.Job]++ 115 return true 116 } 117 118 numPending := c.pendingJobs[pj.Spec.Job] 119 if numPending >= pj.Spec.MaxConcurrency { 120 logrus.WithField("job", pj.Spec.Job).Infof("Not starting another instance of %s, already %d running.", pj.Spec.Job, numPending) 121 return false 122 } 123 c.pendingJobs[pj.Spec.Job]++ 124 return true 125 } 126 127 // incrementNumPendingJobs increments the amount of 128 // pending ProwJobs for the given job identifier 129 func (c *Controller) incrementNumPendingJobs(job string) { 130 c.lock.Lock() 131 defer c.lock.Unlock() 132 c.pendingJobs[job]++ 133 } 134 135 // Sync does one sync iteration. 136 func (c *Controller) Sync() error { 137 pjs, err := c.kc.ListProwJobs(nil) 138 if err != nil { 139 return fmt.Errorf("error listing prow jobs: %v", err) 140 } 141 var jenkinsJobs []kube.ProwJob 142 for _, pj := range pjs { 143 if pj.Spec.Agent == kube.JenkinsAgent { 144 jenkinsJobs = append(jenkinsJobs, pj) 145 } 146 } 147 pjs = jenkinsJobs 148 jbs, err := c.jc.ListJenkinsBuilds(getJenkinsJobs(pjs)) 149 if err != nil { 150 return fmt.Errorf("error listing jenkins builds: %v", err) 151 } 152 153 var syncErrs []error 154 if err := c.terminateDupes(pjs, jbs); err != nil { 155 syncErrs = append(syncErrs, err) 156 } 157 158 pendingCh, nonPendingCh := pjutil.PartitionPending(pjs) 159 errCh := make(chan error, len(pjs)) 160 reportCh := make(chan kube.ProwJob, len(pjs)) 161 162 // Reinstantiate on every resync of the controller instead of trying 163 // to keep this in sync with the state of the world. 164 c.pendingJobs = make(map[string]int) 165 // Sync pending jobs first so we can determine what is the maximum 166 // number of new jobs we can trigger when syncing the non-pendings. 167 syncProwJobs(c.syncPendingJob, pendingCh, reportCh, errCh, jbs) 168 syncProwJobs(c.syncNonPendingJob, nonPendingCh, reportCh, errCh, jbs) 169 170 close(errCh) 171 close(reportCh) 172 173 for err := range errCh { 174 syncErrs = append(syncErrs, err) 175 } 176 177 var reportErrs []error 178 reportTemplate := c.ca.Config().JenkinsOperator.ReportTemplate 179 for report := range reportCh { 180 if err := reportlib.Report(c.ghc, reportTemplate, report); err != nil { 181 reportErrs = append(reportErrs, err) 182 } 183 } 184 185 if len(syncErrs) == 0 && len(reportErrs) == 0 { 186 return nil 187 } 188 return fmt.Errorf("errors syncing: %v, errors reporting: %v", syncErrs, reportErrs) 189 } 190 191 // getJenkinsJobs returns all the active Jenkins jobs for the provided 192 // list of prowjobs. 193 func getJenkinsJobs(pjs []kube.ProwJob) map[string]struct{} { 194 jenkinsJobs := make(map[string]struct{}) 195 for _, pj := range pjs { 196 if pj.Complete() { 197 continue 198 } 199 jenkinsJobs[pj.Spec.Job] = struct{}{} 200 } 201 return jenkinsJobs 202 } 203 204 // terminateDupes aborts presubmits that have a newer version. It modifies pjs 205 // in-place when it aborts. 206 func (c *Controller) terminateDupes(pjs []kube.ProwJob, jbs map[string]JenkinsBuild) error { 207 // "job org/repo#number" -> newest job 208 dupes := make(map[string]int) 209 for i, pj := range pjs { 210 if pj.Complete() || pj.Spec.Type != kube.PresubmitJob { 211 continue 212 } 213 n := fmt.Sprintf("%s %s/%s#%d", pj.Spec.Job, pj.Spec.Refs.Org, pj.Spec.Refs.Repo, pj.Spec.Refs.Pulls[0].Number) 214 prev, ok := dupes[n] 215 if !ok { 216 dupes[n] = i 217 continue 218 } 219 cancelIndex := i 220 if pjs[prev].Status.StartTime.Before(pj.Status.StartTime) { 221 cancelIndex = prev 222 dupes[n] = i 223 } 224 toCancel := pjs[cancelIndex] 225 // Allow aborting presubmit jobs for commits that have been superseded by 226 // newer commits in Github pull requests. 227 if c.ca.Config().JenkinsOperator.AllowCancellations { 228 build, buildExists := jbs[toCancel.Metadata.Name] 229 // Avoid cancelling enqueued builds. 230 if buildExists && build.IsEnqueued() { 231 continue 232 } 233 // Otherwise, abort it. 234 if buildExists { 235 if err := c.jc.Abort(toCancel.Spec.Job, &build); err != nil { 236 logrus.Warningf("Cannot cancel Jenkins build for prowjob %q: %v", toCancel.Metadata.Name, err) 237 } 238 } 239 } 240 toCancel.Status.CompletionTime = time.Now() 241 toCancel.Status.State = kube.AbortedState 242 npj, err := c.kc.ReplaceProwJob(toCancel.Metadata.Name, toCancel) 243 if err != nil { 244 return err 245 } 246 pjs[cancelIndex] = npj 247 } 248 return nil 249 } 250 251 func syncProwJobs( 252 syncFn syncFn, 253 jobs <-chan kube.ProwJob, 254 reports chan<- kube.ProwJob, 255 syncErrors chan<- error, 256 jbs map[string]JenkinsBuild, 257 ) { 258 wg := &sync.WaitGroup{} 259 wg.Add(maxSyncRoutines) 260 for i := 0; i < maxSyncRoutines; i++ { 261 go func(jobs <-chan kube.ProwJob) { 262 defer wg.Done() 263 for pj := range jobs { 264 if err := syncFn(pj, reports, jbs); err != nil { 265 syncErrors <- err 266 } 267 } 268 }(jobs) 269 } 270 wg.Wait() 271 } 272 273 func (c *Controller) syncPendingJob(pj kube.ProwJob, reports chan<- kube.ProwJob, jbs map[string]JenkinsBuild) error { 274 jb, jbExists := jbs[pj.Metadata.Name] 275 if !jbExists { 276 pj.Status.CompletionTime = time.Now() 277 pj.Status.State = kube.ErrorState 278 pj.Status.URL = testInfra 279 pj.Status.Description = "Error finding Jenkins job." 280 } else { 281 switch { 282 case jb.IsEnqueued(): 283 // Still in queue. 284 c.incrementNumPendingJobs(pj.Spec.Job) 285 return nil 286 287 case jb.IsRunning(): 288 // Build still going. 289 c.incrementNumPendingJobs(pj.Spec.Job) 290 if pj.Status.Description == "Jenkins job running." { 291 return nil 292 } 293 pj.Status.Description = "Jenkins job running." 294 295 case jb.IsSuccess(): 296 // Build is complete. 297 pj.Status.CompletionTime = time.Now() 298 pj.Status.State = kube.SuccessState 299 pj.Status.Description = "Jenkins job succeeded." 300 for _, nj := range pj.Spec.RunAfterSuccess { 301 child := pjutil.NewProwJob(nj) 302 if !RunAfterSuccessCanRun(&pj, &child, c.ca, c.ghc) { 303 continue 304 } 305 if _, err := c.kc.CreateProwJob(pjutil.NewProwJob(nj)); err != nil { 306 return fmt.Errorf("error starting next prowjob: %v", err) 307 } 308 } 309 310 case jb.IsFailure(): 311 // Build either failed or aborted. 312 pj.Status.CompletionTime = time.Now() 313 pj.Status.State = kube.FailureState 314 pj.Status.Description = "Jenkins job failed." 315 } 316 // Construct the status URL that will be used in reports. 317 pj.Status.PodName = fmt.Sprintf("%s-%d", pj.Spec.Job, jb.Number) 318 pj.Status.BuildID = strconv.Itoa(jb.Number) 319 var b bytes.Buffer 320 if err := c.ca.Config().JenkinsOperator.JobURLTemplate.Execute(&b, &pj); err != nil { 321 return fmt.Errorf("error executing URL template: %v", err) 322 } 323 pj.Status.URL = b.String() 324 } 325 // Report to Github. 326 reports <- pj 327 328 _, err := c.kc.ReplaceProwJob(pj.Metadata.Name, pj) 329 return err 330 } 331 332 func (c *Controller) syncNonPendingJob(pj kube.ProwJob, reports chan<- kube.ProwJob, jbs map[string]JenkinsBuild) error { 333 if pj.Complete() { 334 return nil 335 } 336 337 // The rest are new prowjobs. 338 339 if _, jbExists := jbs[pj.Metadata.Name]; !jbExists { 340 // Do not start more jobs than specified. 341 if !c.canExecuteConcurrently(&pj) { 342 return nil 343 } 344 // Start the Jenkins job. 345 if err := c.jc.Build(&pj); err != nil { 346 logrus.WithField("job", pj.Spec.Job).Warningf("error starting Jenkins build: %v", err) 347 pj.Status.CompletionTime = time.Now() 348 pj.Status.State = kube.ErrorState 349 pj.Status.URL = testInfra 350 pj.Status.Description = "Error starting Jenkins job." 351 } else { 352 pj.Status.State = kube.PendingState 353 pj.Status.Description = "Jenkins job enqueued." 354 } 355 } else { 356 // If a Jenkins build already exists for this job, advance the ProwJob to Pending and 357 // it should be handled by syncPendingJob in the next sync. 358 pj.Status.State = kube.PendingState 359 pj.Status.Description = "Jenkins job enqueued." 360 } 361 // Report to Github. 362 reports <- pj 363 364 _, err := c.kc.ReplaceProwJob(pj.Metadata.Name, pj) 365 if err != nil { 366 return fmt.Errorf("error replacing prow job: %v", err) 367 } 368 return nil 369 } 370 371 // RunAfterSuccessCanRun returns whether a child job (specified as run_after_success in the 372 // prow config) can run once its parent job succeeds. The only case we will not run a child job 373 // is when it is a presubmit job and has a run_if_changed regural expression specified which does 374 // not match the changed filenames in the pull request the job was meant to run for. 375 // TODO: Collapse with plank, impossible to reuse as is due to the interfaces. 376 func RunAfterSuccessCanRun(parent, child *kube.ProwJob, c configAgent, ghc githubClient) bool { 377 if parent.Spec.Type != kube.PresubmitJob { 378 return true 379 } 380 381 // TODO: Make sure that parent and child have always the same org/repo. 382 org := parent.Spec.Refs.Org 383 repo := parent.Spec.Refs.Repo 384 prNum := parent.Spec.Refs.Pulls[0].Number 385 386 ps := c.Config().GetPresubmit(org+"/"+repo, child.Spec.Job) 387 if ps == nil { 388 // The config has changed ever since we started the parent. 389 // Not sure what is more correct here. Run the child for now. 390 return true 391 } 392 if ps.RunIfChanged == "" { 393 return true 394 } 395 changesFull, err := ghc.GetPullRequestChanges(org, repo, prNum) 396 if err != nil { 397 logrus.Warningf("Cannot get PR changes for %d: %v", prNum, err) 398 return true 399 } 400 // We only care about the filenames here 401 var changes []string 402 for _, change := range changesFull { 403 changes = append(changes, change.Filename) 404 } 405 return ps.RunsAgainstChanges(changes) 406 }