github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/prow/jenkins/controller.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package jenkins
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"strconv"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/sirupsen/logrus"
    27  
    28  	"k8s.io/test-infra/prow/config"
    29  	"k8s.io/test-infra/prow/github"
    30  	"k8s.io/test-infra/prow/kube"
    31  	"k8s.io/test-infra/prow/pjutil"
    32  	reportlib "k8s.io/test-infra/prow/report"
    33  )
    34  
    35  const (
    36  	testInfra = "https://github.com/kubernetes/test-infra/issues"
    37  
    38  	// maxSyncRoutines is the maximum number of goroutines
    39  	// that will be active at any one time for the sync
    40  	maxSyncRoutines = 20
    41  )
    42  
    43  type kubeClient interface {
    44  	CreateProwJob(kube.ProwJob) (kube.ProwJob, error)
    45  	ListProwJobs(map[string]string) ([]kube.ProwJob, error)
    46  	ReplaceProwJob(string, kube.ProwJob) (kube.ProwJob, error)
    47  }
    48  
    49  type jenkinsClient interface {
    50  	Build(*kube.ProwJob) error
    51  	ListJenkinsBuilds(jobs map[string]struct{}) (map[string]JenkinsBuild, error)
    52  	Abort(job string, build *JenkinsBuild) error
    53  }
    54  
    55  type githubClient interface {
    56  	BotName() (string, error)
    57  	CreateStatus(org, repo, ref string, s github.Status) error
    58  	ListIssueComments(org, repo string, number int) ([]github.IssueComment, error)
    59  	CreateComment(org, repo string, number int, comment string) error
    60  	DeleteComment(org, repo string, ID int) error
    61  	EditComment(org, repo string, ID int, comment string) error
    62  	GetPullRequestChanges(org, repo string, number int) ([]github.PullRequestChange, error)
    63  }
    64  
    65  type configAgent interface {
    66  	Config() *config.Config
    67  }
    68  
    69  type syncFn func(kube.ProwJob, chan<- kube.ProwJob, map[string]JenkinsBuild) error
    70  
    71  // Controller manages ProwJobs.
    72  type Controller struct {
    73  	kc  kubeClient
    74  	jc  jenkinsClient
    75  	ghc githubClient
    76  	ca  configAgent
    77  
    78  	lock sync.RWMutex
    79  	// pendingJobs is a short-lived cache that helps in limiting
    80  	// the maximum concurrency of jobs.
    81  	pendingJobs map[string]int
    82  }
    83  
    84  // NewController creates a new Controller from the provided clients.
    85  func NewController(kc *kube.Client, jc *Client, ghc *github.Client, ca *config.Agent) *Controller {
    86  	return &Controller{
    87  		kc:          kc,
    88  		jc:          jc,
    89  		ghc:         ghc,
    90  		ca:          ca,
    91  		lock:        sync.RWMutex{},
    92  		pendingJobs: make(map[string]int),
    93  	}
    94  }
    95  
    96  // canExecuteConcurrently checks whether the provided ProwJob can
    97  // be executed concurrently.
    98  func (c *Controller) canExecuteConcurrently(pj *kube.ProwJob) bool {
    99  	c.lock.Lock()
   100  	defer c.lock.Unlock()
   101  
   102  	if max := c.ca.Config().JenkinsOperator.MaxConcurrency; max > 0 {
   103  		var running int
   104  		for _, num := range c.pendingJobs {
   105  			running += num
   106  		}
   107  		if running >= max {
   108  			logrus.Infof("Not starting another job, already %d running.", running)
   109  			return false
   110  		}
   111  	}
   112  
   113  	if pj.Spec.MaxConcurrency == 0 {
   114  		c.pendingJobs[pj.Spec.Job]++
   115  		return true
   116  	}
   117  
   118  	numPending := c.pendingJobs[pj.Spec.Job]
   119  	if numPending >= pj.Spec.MaxConcurrency {
   120  		logrus.WithField("job", pj.Spec.Job).Infof("Not starting another instance of %s, already %d running.", pj.Spec.Job, numPending)
   121  		return false
   122  	}
   123  	c.pendingJobs[pj.Spec.Job]++
   124  	return true
   125  }
   126  
   127  // incrementNumPendingJobs increments the amount of
   128  // pending ProwJobs for the given job identifier
   129  func (c *Controller) incrementNumPendingJobs(job string) {
   130  	c.lock.Lock()
   131  	defer c.lock.Unlock()
   132  	c.pendingJobs[job]++
   133  }
   134  
   135  // Sync does one sync iteration.
   136  func (c *Controller) Sync() error {
   137  	pjs, err := c.kc.ListProwJobs(nil)
   138  	if err != nil {
   139  		return fmt.Errorf("error listing prow jobs: %v", err)
   140  	}
   141  	var jenkinsJobs []kube.ProwJob
   142  	for _, pj := range pjs {
   143  		if pj.Spec.Agent == kube.JenkinsAgent {
   144  			jenkinsJobs = append(jenkinsJobs, pj)
   145  		}
   146  	}
   147  	pjs = jenkinsJobs
   148  	jbs, err := c.jc.ListJenkinsBuilds(getJenkinsJobs(pjs))
   149  	if err != nil {
   150  		return fmt.Errorf("error listing jenkins builds: %v", err)
   151  	}
   152  
   153  	var syncErrs []error
   154  	if err := c.terminateDupes(pjs, jbs); err != nil {
   155  		syncErrs = append(syncErrs, err)
   156  	}
   157  
   158  	pendingCh, nonPendingCh := pjutil.PartitionPending(pjs)
   159  	errCh := make(chan error, len(pjs))
   160  	reportCh := make(chan kube.ProwJob, len(pjs))
   161  
   162  	// Reinstantiate on every resync of the controller instead of trying
   163  	// to keep this in sync with the state of the world.
   164  	c.pendingJobs = make(map[string]int)
   165  	// Sync pending jobs first so we can determine what is the maximum
   166  	// number of new jobs we can trigger when syncing the non-pendings.
   167  	syncProwJobs(c.syncPendingJob, pendingCh, reportCh, errCh, jbs)
   168  	syncProwJobs(c.syncNonPendingJob, nonPendingCh, reportCh, errCh, jbs)
   169  
   170  	close(errCh)
   171  	close(reportCh)
   172  
   173  	for err := range errCh {
   174  		syncErrs = append(syncErrs, err)
   175  	}
   176  
   177  	var reportErrs []error
   178  	reportTemplate := c.ca.Config().JenkinsOperator.ReportTemplate
   179  	for report := range reportCh {
   180  		if err := reportlib.Report(c.ghc, reportTemplate, report); err != nil {
   181  			reportErrs = append(reportErrs, err)
   182  		}
   183  	}
   184  
   185  	if len(syncErrs) == 0 && len(reportErrs) == 0 {
   186  		return nil
   187  	}
   188  	return fmt.Errorf("errors syncing: %v, errors reporting: %v", syncErrs, reportErrs)
   189  }
   190  
   191  // getJenkinsJobs returns all the active Jenkins jobs for the provided
   192  // list of prowjobs.
   193  func getJenkinsJobs(pjs []kube.ProwJob) map[string]struct{} {
   194  	jenkinsJobs := make(map[string]struct{})
   195  	for _, pj := range pjs {
   196  		if pj.Complete() {
   197  			continue
   198  		}
   199  		jenkinsJobs[pj.Spec.Job] = struct{}{}
   200  	}
   201  	return jenkinsJobs
   202  }
   203  
   204  // terminateDupes aborts presubmits that have a newer version. It modifies pjs
   205  // in-place when it aborts.
   206  func (c *Controller) terminateDupes(pjs []kube.ProwJob, jbs map[string]JenkinsBuild) error {
   207  	// "job org/repo#number" -> newest job
   208  	dupes := make(map[string]int)
   209  	for i, pj := range pjs {
   210  		if pj.Complete() || pj.Spec.Type != kube.PresubmitJob {
   211  			continue
   212  		}
   213  		n := fmt.Sprintf("%s %s/%s#%d", pj.Spec.Job, pj.Spec.Refs.Org, pj.Spec.Refs.Repo, pj.Spec.Refs.Pulls[0].Number)
   214  		prev, ok := dupes[n]
   215  		if !ok {
   216  			dupes[n] = i
   217  			continue
   218  		}
   219  		cancelIndex := i
   220  		if pjs[prev].Status.StartTime.Before(pj.Status.StartTime) {
   221  			cancelIndex = prev
   222  			dupes[n] = i
   223  		}
   224  		toCancel := pjs[cancelIndex]
   225  		// Allow aborting presubmit jobs for commits that have been superseded by
   226  		// newer commits in Github pull requests.
   227  		if c.ca.Config().JenkinsOperator.AllowCancellations {
   228  			build, buildExists := jbs[toCancel.Metadata.Name]
   229  			// Avoid cancelling enqueued builds.
   230  			if buildExists && build.IsEnqueued() {
   231  				continue
   232  			}
   233  			// Otherwise, abort it.
   234  			if buildExists {
   235  				if err := c.jc.Abort(toCancel.Spec.Job, &build); err != nil {
   236  					logrus.Warningf("Cannot cancel Jenkins build for prowjob %q: %v", toCancel.Metadata.Name, err)
   237  				}
   238  			}
   239  		}
   240  		toCancel.Status.CompletionTime = time.Now()
   241  		toCancel.Status.State = kube.AbortedState
   242  		npj, err := c.kc.ReplaceProwJob(toCancel.Metadata.Name, toCancel)
   243  		if err != nil {
   244  			return err
   245  		}
   246  		pjs[cancelIndex] = npj
   247  	}
   248  	return nil
   249  }
   250  
   251  func syncProwJobs(
   252  	syncFn syncFn,
   253  	jobs <-chan kube.ProwJob,
   254  	reports chan<- kube.ProwJob,
   255  	syncErrors chan<- error,
   256  	jbs map[string]JenkinsBuild,
   257  ) {
   258  	wg := &sync.WaitGroup{}
   259  	wg.Add(maxSyncRoutines)
   260  	for i := 0; i < maxSyncRoutines; i++ {
   261  		go func(jobs <-chan kube.ProwJob) {
   262  			defer wg.Done()
   263  			for pj := range jobs {
   264  				if err := syncFn(pj, reports, jbs); err != nil {
   265  					syncErrors <- err
   266  				}
   267  			}
   268  		}(jobs)
   269  	}
   270  	wg.Wait()
   271  }
   272  
   273  func (c *Controller) syncPendingJob(pj kube.ProwJob, reports chan<- kube.ProwJob, jbs map[string]JenkinsBuild) error {
   274  	jb, jbExists := jbs[pj.Metadata.Name]
   275  	if !jbExists {
   276  		pj.Status.CompletionTime = time.Now()
   277  		pj.Status.State = kube.ErrorState
   278  		pj.Status.URL = testInfra
   279  		pj.Status.Description = "Error finding Jenkins job."
   280  	} else {
   281  		switch {
   282  		case jb.IsEnqueued():
   283  			// Still in queue.
   284  			c.incrementNumPendingJobs(pj.Spec.Job)
   285  			return nil
   286  
   287  		case jb.IsRunning():
   288  			// Build still going.
   289  			c.incrementNumPendingJobs(pj.Spec.Job)
   290  			if pj.Status.Description == "Jenkins job running." {
   291  				return nil
   292  			}
   293  			pj.Status.Description = "Jenkins job running."
   294  
   295  		case jb.IsSuccess():
   296  			// Build is complete.
   297  			pj.Status.CompletionTime = time.Now()
   298  			pj.Status.State = kube.SuccessState
   299  			pj.Status.Description = "Jenkins job succeeded."
   300  			for _, nj := range pj.Spec.RunAfterSuccess {
   301  				child := pjutil.NewProwJob(nj)
   302  				if !RunAfterSuccessCanRun(&pj, &child, c.ca, c.ghc) {
   303  					continue
   304  				}
   305  				if _, err := c.kc.CreateProwJob(pjutil.NewProwJob(nj)); err != nil {
   306  					return fmt.Errorf("error starting next prowjob: %v", err)
   307  				}
   308  			}
   309  
   310  		case jb.IsFailure():
   311  			// Build either failed or aborted.
   312  			pj.Status.CompletionTime = time.Now()
   313  			pj.Status.State = kube.FailureState
   314  			pj.Status.Description = "Jenkins job failed."
   315  		}
   316  		// Construct the status URL that will be used in reports.
   317  		pj.Status.PodName = fmt.Sprintf("%s-%d", pj.Spec.Job, jb.Number)
   318  		pj.Status.BuildID = strconv.Itoa(jb.Number)
   319  		var b bytes.Buffer
   320  		if err := c.ca.Config().JenkinsOperator.JobURLTemplate.Execute(&b, &pj); err != nil {
   321  			return fmt.Errorf("error executing URL template: %v", err)
   322  		}
   323  		pj.Status.URL = b.String()
   324  	}
   325  	// Report to Github.
   326  	reports <- pj
   327  
   328  	_, err := c.kc.ReplaceProwJob(pj.Metadata.Name, pj)
   329  	return err
   330  }
   331  
   332  func (c *Controller) syncNonPendingJob(pj kube.ProwJob, reports chan<- kube.ProwJob, jbs map[string]JenkinsBuild) error {
   333  	if pj.Complete() {
   334  		return nil
   335  	}
   336  
   337  	// The rest are new prowjobs.
   338  
   339  	if _, jbExists := jbs[pj.Metadata.Name]; !jbExists {
   340  		// Do not start more jobs than specified.
   341  		if !c.canExecuteConcurrently(&pj) {
   342  			return nil
   343  		}
   344  		// Start the Jenkins job.
   345  		if err := c.jc.Build(&pj); err != nil {
   346  			logrus.WithField("job", pj.Spec.Job).Warningf("error starting Jenkins build: %v", err)
   347  			pj.Status.CompletionTime = time.Now()
   348  			pj.Status.State = kube.ErrorState
   349  			pj.Status.URL = testInfra
   350  			pj.Status.Description = "Error starting Jenkins job."
   351  		} else {
   352  			pj.Status.State = kube.PendingState
   353  			pj.Status.Description = "Jenkins job enqueued."
   354  		}
   355  	} else {
   356  		// If a Jenkins build already exists for this job, advance the ProwJob to Pending and
   357  		// it should be handled by syncPendingJob in the next sync.
   358  		pj.Status.State = kube.PendingState
   359  		pj.Status.Description = "Jenkins job enqueued."
   360  	}
   361  	// Report to Github.
   362  	reports <- pj
   363  
   364  	_, err := c.kc.ReplaceProwJob(pj.Metadata.Name, pj)
   365  	if err != nil {
   366  		return fmt.Errorf("error replacing prow job: %v", err)
   367  	}
   368  	return nil
   369  }
   370  
   371  // RunAfterSuccessCanRun returns whether a child job (specified as run_after_success in the
   372  // prow config) can run once its parent job succeeds. The only case we will not run a child job
   373  // is when it is a presubmit job and has a run_if_changed regural expression specified which does
   374  // not match the changed filenames in the pull request the job was meant to run for.
   375  // TODO: Collapse with plank, impossible to reuse as is due to the interfaces.
   376  func RunAfterSuccessCanRun(parent, child *kube.ProwJob, c configAgent, ghc githubClient) bool {
   377  	if parent.Spec.Type != kube.PresubmitJob {
   378  		return true
   379  	}
   380  
   381  	// TODO: Make sure that parent and child have always the same org/repo.
   382  	org := parent.Spec.Refs.Org
   383  	repo := parent.Spec.Refs.Repo
   384  	prNum := parent.Spec.Refs.Pulls[0].Number
   385  
   386  	ps := c.Config().GetPresubmit(org+"/"+repo, child.Spec.Job)
   387  	if ps == nil {
   388  		// The config has changed ever since we started the parent.
   389  		// Not sure what is more correct here. Run the child for now.
   390  		return true
   391  	}
   392  	if ps.RunIfChanged == "" {
   393  		return true
   394  	}
   395  	changesFull, err := ghc.GetPullRequestChanges(org, repo, prNum)
   396  	if err != nil {
   397  		logrus.Warningf("Cannot get PR changes for %d: %v", prNum, err)
   398  		return true
   399  	}
   400  	// We only care about the filenames here
   401  	var changes []string
   402  	for _, change := range changesFull {
   403  		changes = append(changes, change.Filename)
   404  	}
   405  	return ps.RunsAgainstChanges(changes)
   406  }