github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/mungegithub/mungers/submit-queue.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mungers
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"math"
    25  	"net/http"
    26  	"sort"
    27  	"strconv"
    28  	"strings"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	utilclock "k8s.io/kubernetes/pkg/util/clock"
    34  	"k8s.io/kubernetes/pkg/util/sets"
    35  
    36  	"k8s.io/contrib/test-utils/utils"
    37  	"k8s.io/test-infra/mungegithub/features"
    38  	"k8s.io/test-infra/mungegithub/github"
    39  	"k8s.io/test-infra/mungegithub/mungeopts"
    40  	"k8s.io/test-infra/mungegithub/mungers/e2e"
    41  	fake_e2e "k8s.io/test-infra/mungegithub/mungers/e2e/fake"
    42  	"k8s.io/test-infra/mungegithub/mungers/mungerutil"
    43  	"k8s.io/test-infra/mungegithub/mungers/shield"
    44  	"k8s.io/test-infra/mungegithub/options"
    45  	"k8s.io/test-infra/mungegithub/sharedmux"
    46  
    47  	"github.com/NYTimes/gziphandler"
    48  	"github.com/golang/glog"
    49  	githubapi "github.com/google/go-github/github"
    50  	"github.com/prometheus/client_golang/prometheus"
    51  )
    52  
    53  const (
    54  	approvedLabel                    = "approved"
    55  	lgtmLabel                        = "lgtm"
    56  	retestNotRequiredLabel           = "retest-not-required"
    57  	retestNotRequiredDocsOnlyLabel   = "retest-not-required-docs-only"
    58  	doNotMergeLabel                  = "do-not-merge"
    59  	wipLabel                         = "do-not-merge/work-in-progress"
    60  	holdLabel                        = "do-not-merge/hold"
    61  	deprecatedReleaseNoteLabelNeeded = "release-note-label-needed"
    62  	releaseNoteLabelNeeded           = "do-not-merge/release-note-label-needed"
    63  	cncfClaYesLabel                  = "cncf-cla: yes"
    64  	cncfClaNoLabel                   = "cncf-cla: no"
    65  	claHumanLabel                    = "cla: human-approved"
    66  	criticalFixLabel                 = "queue/critical-fix"
    67  	blocksOthersLabel                = "queue/blocks-others"
    68  	fixLabel                         = "queue/fix"
    69  	multirebaseLabel                 = "queue/multiple-rebases"
    70  
    71  	sqContext = "Submit Queue"
    72  
    73  	githubE2EPollTime = 30 * time.Second
    74  )
    75  
    76  var (
    77  	// This MUST cause a RETEST of everything in the mungeopts.RequiredContexts.Retest
    78  	newRetestBody = "/test all [submit-queue is verifying that this PR is safe to merge]"
    79  
    80  	// this is the order in which labels will be compared for queue priority
    81  	labelPriorities = []string{criticalFixLabel, retestNotRequiredLabel, retestNotRequiredDocsOnlyLabel, multirebaseLabel, fixLabel, blocksOthersLabel}
    82  	// high priority labels are checked before the release
    83  	lastHighPriorityLabel = 2 // retestNotRequiredDocsOnlyLabel
    84  )
    85  
    86  type submitStatus struct {
    87  	Time time.Time
    88  	statusPullRequest
    89  	Reason string
    90  }
    91  
    92  type statusPullRequest struct {
    93  	Number    int
    94  	URL       string
    95  	Title     string
    96  	Login     string
    97  	AvatarURL string
    98  	Additions int
    99  	Deletions int
   100  	ExtraInfo []string
   101  	BaseRef   string
   102  }
   103  
   104  type e2eQueueStatus struct {
   105  	E2ERunning  *statusPullRequest
   106  	E2EQueue    []*statusPullRequest
   107  	BatchStatus *submitQueueBatchStatus
   108  }
   109  
   110  type submitQueueStatus struct {
   111  	PRStatus map[string]submitStatus
   112  }
   113  
   114  // Information about the e2e test health. Call updateHealth on the SubmitQueue
   115  // at roughly constant intervals to keep this up to date. The mergeable fraction
   116  // of time for the queue as a whole and the individual jobs will then be
   117  // NumStable[PerJob] / TotalLoops.
   118  type submitQueueHealth struct {
   119  	TotalLoops       int
   120  	NumStable        int
   121  	NumStablePerJob  map[string]int
   122  	MergePossibleNow bool
   123  }
   124  
   125  // Generate health information using a queue of healthRecords. The bools are
   126  // true for stable and false otherwise.
   127  type healthRecord struct {
   128  	Time    time.Time
   129  	Overall bool
   130  	Jobs    map[string]bool
   131  }
   132  
   133  // information about the sq itself including how fast things are merging and
   134  // how long since the last merge
   135  type submitQueueStats struct {
   136  	Added              int // Number of items added to the queue since restart
   137  	FlakesIgnored      int
   138  	Initialized        bool // true if we've made at least one complete pass
   139  	InstantMerges      int  // Number of merges without retests required
   140  	BatchMerges        int  // Number of merges caused by batch
   141  	LastMergeTime      time.Time
   142  	MergeRate          float64
   143  	MergesSinceRestart int
   144  	Removed            int // Number of items dequeued since restart
   145  	RetestsAvoided     int
   146  	StartTime          time.Time
   147  	Tested             int // Number of e2e tests completed
   148  }
   149  
   150  // pull-request that has been tested as successful, but interrupted because head flaked
   151  type submitQueueInterruptedObject struct {
   152  	obj *github.MungeObject
   153  	// If these two items match when we're about to kick off a retest, it's safe to skip the retest.
   154  	interruptedMergeHeadSHA string
   155  	interruptedMergeBaseSHA string
   156  }
   157  
   158  // Contains metadata about this instance of the submit queue such as URLs.
   159  // Consumed by the template system.
   160  type submitQueueMetadata struct {
   161  	ProjectName string
   162  
   163  	ChartURL   string
   164  	HistoryURL string
   165  	// chartURL and historyURL are option storage locations. They are distinct from ChartURL and
   166  	// HistoryURL since the the public variables are used asynchronously by a fileserver and updates
   167  	// to the options values should not cause a race condition.
   168  	chartURL   string
   169  	historyURL string
   170  
   171  	RepoPullURL string
   172  	ProwURL     string
   173  }
   174  
   175  type submitQueueBatchStatus struct {
   176  	Error   map[string]string
   177  	Running *prowJob
   178  }
   179  
   180  type prometheusMetrics struct {
   181  	Blocked       prometheus.Gauge
   182  	OpenPRs       prometheus.Gauge
   183  	QueuedPRs     prometheus.Gauge
   184  	MergeCount    prometheus.Counter
   185  	LastMergeTime prometheus.Gauge
   186  }
   187  
   188  var (
   189  	sqPromMetrics = prometheusMetrics{
   190  		Blocked: prometheus.NewGauge(prometheus.GaugeOpts{
   191  			Name: "submitqueue_blocked",
   192  			Help: "The submit-queue is currently blocked",
   193  		}),
   194  		OpenPRs: prometheus.NewGauge(prometheus.GaugeOpts{
   195  			Name: "submitqueue_open_pullrequests_total",
   196  			Help: "Number of open pull-requests",
   197  		}),
   198  		QueuedPRs: prometheus.NewGauge(prometheus.GaugeOpts{
   199  			Name: "submitqueue_queued_pullrequests_total",
   200  			Help: "Number of pull-requests queued",
   201  		}),
   202  		MergeCount: prometheus.NewCounter(prometheus.CounterOpts{
   203  			Name: "submitqueue_merge_total",
   204  			Help: "Number of merges done",
   205  		}),
   206  		LastMergeTime: prometheus.NewGauge(prometheus.GaugeOpts{
   207  			Name: "submitqueue_time_of_last_merge",
   208  			Help: "Time of last merge",
   209  		}),
   210  	}
   211  )
   212  
   213  // marshaled in serveCIStatus
   214  type jobStatus struct {
   215  	State   string `json:"state"`
   216  	BuildID string `json:"build_id"`
   217  	URL     string `json:"url"`
   218  }
   219  
   220  // SubmitQueue will merge PR which meet a set of requirements.
   221  //  PR must have LGTM after the last commit
   222  //  PR must have passed all github CI checks
   223  //  The google internal jenkins instance must be passing the BlockingJobNames e2e tests
   224  type SubmitQueue struct {
   225  	githubConfig        *github.Config
   226  	opts                *options.Options
   227  	NonBlockingJobNames []string
   228  
   229  	GateApproved                 bool
   230  	GateCLA                      bool
   231  	GateGHReviewApproved         bool
   232  	GateGHReviewChangesRequested bool
   233  
   234  	// AdditionalRequiredLabels is a set of additional labels required for merging
   235  	// on top of the existing required ("lgtm", "approved", "cncf-cla: yes").
   236  	AdditionalRequiredLabels []string
   237  
   238  	// If FakeE2E is true, don't try to connect to JenkinsHost, all jobs are passing.
   239  	FakeE2E bool
   240  
   241  	DoNotMergeMilestones []string
   242  
   243  	Metadata  submitQueueMetadata
   244  	AdminPort int
   245  
   246  	sync.Mutex
   247  	prStatus       map[string]submitStatus // protected by sync.Mutex
   248  	statusHistory  []submitStatus          // protected by sync.Mutex
   249  	lastClosedTime time.Time
   250  
   251  	clock         utilclock.Clock
   252  	startTime     time.Time // when the queue started (duh)
   253  	lastMergeTime time.Time
   254  	totalMerges   int32
   255  	mergeRate     float64 // per 24 hours
   256  	loopStarts    int32   // if > 1, then we must have made a complete pass.
   257  
   258  	githubE2ERunning   *github.MungeObject         // protect by sync.Mutex!
   259  	githubE2EQueue     map[int]*github.MungeObject // protected by sync.Mutex!
   260  	githubE2EPollTime  time.Duration
   261  	lgtmTimeCache      *mungerutil.LabelTimeCache
   262  	githubE2ELastPRNum int
   263  
   264  	lastE2EStable bool // was e2e stable last time they were checked, protect by sync.Mutex
   265  	e2e           e2e.E2ETester
   266  
   267  	interruptedObj *submitQueueInterruptedObject
   268  	flakesIgnored  int32 // Increments for each merge while 1+ job is flaky
   269  	instantMerges  int32 // Increments whenever we merge without retesting
   270  	batchMerges    int32 // Increments whenever we merge because of a batch
   271  	prsAdded       int32 // Increments whenever an items queues
   272  	prsRemoved     int32 // Increments whenever an item dequeues
   273  	prsTested      int32 // Number of prs that completed second testing
   274  	retestsAvoided int32 // Increments whenever we skip due to head not changing.
   275  
   276  	health        submitQueueHealth
   277  	healthHistory []healthRecord
   278  
   279  	emergencyMergeStopFlag int32
   280  
   281  	features *features.Features
   282  
   283  	mergeLock    sync.Mutex // acquired when attempting to merge a specific PR
   284  	ProwURL      string     // prow base page
   285  	BatchEnabled bool
   286  	ContextURL   string
   287  	batchStatus  submitQueueBatchStatus
   288  	ciStatus     map[string]map[string]jobStatus // type (eg batch) : job : status
   289  
   290  	// MergeToMasterMessage is an extra message when PR is merged to master branch,
   291  	// it must not end in a period.
   292  	MergeToMasterMessage string
   293  }
   294  
   295  func init() {
   296  	clock := utilclock.RealClock{}
   297  	prometheus.MustRegister(sqPromMetrics.Blocked)
   298  	prometheus.MustRegister(sqPromMetrics.OpenPRs)
   299  	prometheus.MustRegister(sqPromMetrics.QueuedPRs)
   300  	prometheus.MustRegister(sqPromMetrics.MergeCount)
   301  	prometheus.MustRegister(sqPromMetrics.LastMergeTime)
   302  	sq := &SubmitQueue{
   303  		clock:          clock,
   304  		startTime:      clock.Now(),
   305  		lastMergeTime:  clock.Now(),
   306  		lastE2EStable:  true,
   307  		prStatus:       map[string]submitStatus{},
   308  		githubE2EQueue: map[int]*github.MungeObject{},
   309  	}
   310  	RegisterMungerOrDie(sq)
   311  	RegisterStaleIssueComments(sq)
   312  }
   313  
   314  // Name is the name usable in --pr-mungers
   315  func (sq *SubmitQueue) Name() string { return "submit-queue" }
   316  
   317  // RequiredFeatures is a slice of 'features' that must be provided
   318  func (sq *SubmitQueue) RequiredFeatures() []string {
   319  	return []string{features.BranchProtectionFeature, features.ServerFeatureName}
   320  }
   321  
   322  func (sq *SubmitQueue) emergencyMergeStop() bool {
   323  	return atomic.LoadInt32(&sq.emergencyMergeStopFlag) != 0
   324  }
   325  
   326  func (sq *SubmitQueue) setEmergencyMergeStop(stopMerges bool) {
   327  	if stopMerges {
   328  		atomic.StoreInt32(&sq.emergencyMergeStopFlag, 1)
   329  	} else {
   330  		atomic.StoreInt32(&sq.emergencyMergeStopFlag, 0)
   331  	}
   332  }
   333  
   334  // EmergencyStopHTTP sets the emergency stop flag. It expects the path of
   335  // req.URL to contain either "emergency/stop", "emergency/resume", or "emergency/status".
   336  func (sq *SubmitQueue) EmergencyStopHTTP(res http.ResponseWriter, req *http.Request) {
   337  	switch {
   338  	case strings.Contains(req.URL.Path, "emergency/stop"):
   339  		sq.setEmergencyMergeStop(true)
   340  	case strings.Contains(req.URL.Path, "emergency/resume"):
   341  		sq.setEmergencyMergeStop(false)
   342  	case strings.Contains(req.URL.Path, "emergency/status"):
   343  	default:
   344  		http.NotFound(res, req)
   345  		return
   346  	}
   347  	sq.serve(sq.marshal(struct{ EmergencyInProgress bool }{sq.emergencyMergeStop()}), res, req)
   348  }
   349  
   350  func round(num float64) int {
   351  	return int(num + math.Copysign(0.5, num))
   352  }
   353  
   354  func toFixed(num float64) float64 {
   355  	output := math.Pow(10, float64(3))
   356  	return float64(round(num*output)) / output
   357  }
   358  
   359  // This is the calculation of the exponential smoothing factor. It tries to
   360  // make sure that if we get lots of fast merges we don't race the 'daily'
   361  // avg really high really fast. But more importantly it means that if merges
   362  // start going slowly the 'daily' average will get pulled down a lot by one
   363  // slow merge instead of requiring numerous merges to get pulled down
   364  func getSmoothFactor(dur time.Duration) float64 {
   365  	hours := dur.Hours()
   366  	smooth := .155*math.Log(hours) + .422
   367  	if smooth < .1 {
   368  		return .1
   369  	}
   370  	if smooth > .999 {
   371  		return .999
   372  	}
   373  	return smooth
   374  }
   375  
   376  // This calculates an exponentially smoothed merge Rate based on the formula
   377  //   newRate = (1-smooth)oldRate + smooth*newRate
   378  // Which is really great and simple for constant time series data. But of course
   379  // ours isn't time series data so I vary the smoothing factor based on how long
   380  // its been since the last entry. See the comments on the `getSmoothFactor` for
   381  // a discussion of why.
   382  //    This whole thing was dreamed up by eparis one weekend via a combination
   383  //    of guess-and-test and intuition. Someone who knows about this stuff
   384  //    is likely to laugh at the naivete. Point him to where someone intelligent
   385  //    has thought about this stuff and he will gladly do something smart.
   386  // Merges that took less than 5 minutes are ignored completely for the rate
   387  // calculation.
   388  func calcMergeRate(oldRate float64, last, now time.Time) float64 {
   389  	since := now.Sub(last)
   390  	if since <= 5*time.Minute {
   391  		// retest-not-required PR merges shouldn't affect our best
   392  		// guess about the rate.
   393  		return oldRate
   394  	}
   395  	var rate float64
   396  	if since == 0 {
   397  		rate = 96
   398  	} else {
   399  		rate = 24.0 * time.Hour.Hours() / since.Hours()
   400  	}
   401  	smoothingFactor := getSmoothFactor(since)
   402  	mergeRate := ((1.0 - smoothingFactor) * oldRate) + (smoothingFactor * rate)
   403  	return toFixed(mergeRate)
   404  }
   405  
   406  // Updates a smoothed rate at which PRs are merging per day.
   407  // Updates merge stats. Should be called once for every merge.
   408  func (sq *SubmitQueue) updateMergeRate() {
   409  	now := sq.clock.Now()
   410  	sq.mergeRate = calcMergeRate(sq.mergeRate, sq.lastMergeTime, now)
   411  
   412  	// Update stats
   413  	sqPromMetrics.MergeCount.Inc()
   414  	atomic.AddInt32(&sq.totalMerges, 1)
   415  	sq.lastMergeTime = now
   416  	sqPromMetrics.LastMergeTime.Set(float64(sq.lastMergeTime.Unix()))
   417  }
   418  
   419  // This calculated the smoothed merge rate BUT it looks at the time since
   420  // the last merge vs 'Now'. If we have not passed the next 'expected' time
   421  // for a merge this just returns previous calculations. If 'Now' is later
   422  // than we would expect given the existing mergeRate then pretend a merge
   423  // happened right now and return the new merge rate. This way the merge rate
   424  // is lower even if no merge has happened in a long time.
   425  func (sq *SubmitQueue) calcMergeRateWithTail() float64 {
   426  	now := sq.clock.Now()
   427  
   428  	if sq.mergeRate == 0 {
   429  		return 0
   430  	}
   431  	// Figure out when we think the next merge would happen given the history
   432  	next := time.Duration(24/sq.mergeRate*time.Hour.Hours()) * time.Hour
   433  	expectedMergeTime := sq.lastMergeTime.Add(next)
   434  
   435  	// If we aren't there yet, just return the history
   436  	if !now.After(expectedMergeTime) {
   437  		return sq.mergeRate
   438  	}
   439  
   440  	// Pretend as though a merge happened right now to pull down the rate
   441  	return calcMergeRate(sq.mergeRate, sq.lastMergeTime, now)
   442  }
   443  
   444  // Initialize will initialize the munger
   445  func (sq *SubmitQueue) Initialize(config *github.Config, features *features.Features) error {
   446  	sq.features = features
   447  	return sq.internalInitialize(config, features, "")
   448  }
   449  
   450  // internalInitialize will initialize the munger.
   451  // if overrideURL is specified, will create testUtils
   452  func (sq *SubmitQueue) internalInitialize(config *github.Config, features *features.Features, overrideURL string) error {
   453  	sq.Lock()
   454  	defer sq.Unlock()
   455  
   456  	// initialize to invalid pr number
   457  	sq.githubE2ELastPRNum = -1
   458  
   459  	sq.Metadata.ChartURL = sq.Metadata.chartURL
   460  	sq.Metadata.HistoryURL = sq.Metadata.historyURL
   461  	sq.Metadata.ProwURL = sq.ProwURL
   462  	sq.Metadata.RepoPullURL = fmt.Sprintf("https://github.com/%s/%s/pulls/", config.Org, config.Project)
   463  	sq.Metadata.ProjectName = strings.Title(config.Project)
   464  	sq.githubConfig = config
   465  
   466  	if sq.BatchEnabled && sq.ProwURL == "" {
   467  		return errors.New("batch merges require prow-url to be set")
   468  	}
   469  
   470  	// TODO: This is not how injection for tests should work.
   471  	if sq.FakeE2E {
   472  		sq.e2e = &fake_e2e.FakeE2ETester{}
   473  	} else {
   474  		var gcs *utils.Utils
   475  		if overrideURL != "" {
   476  			gcs = utils.NewTestUtils("bucket", "logs", overrideURL)
   477  		} else {
   478  			gcs = utils.NewWithPresubmitDetection(
   479  				mungeopts.GCS.BucketName, mungeopts.GCS.LogDir,
   480  				mungeopts.GCS.PullKey, mungeopts.GCS.PullLogDir,
   481  			)
   482  		}
   483  
   484  		sq.e2e = (&e2e.RealE2ETester{
   485  			Opts:                 sq.opts,
   486  			NonBlockingJobNames:  &sq.NonBlockingJobNames,
   487  			BuildStatus:          map[string]e2e.BuildInfo{},
   488  			GoogleGCSBucketUtils: gcs,
   489  		}).Init(sharedmux.Admin)
   490  	}
   491  
   492  	sq.lgtmTimeCache = mungerutil.NewLabelTimeCache(lgtmLabel)
   493  
   494  	if features.Server.Enabled {
   495  		features.Server.Handle("/prs", gziphandler.GzipHandler(http.HandlerFunc(sq.servePRs)))
   496  		features.Server.Handle("/history", gziphandler.GzipHandler(http.HandlerFunc(sq.serveHistory)))
   497  		features.Server.Handle("/github-e2e-queue", gziphandler.GzipHandler(http.HandlerFunc(sq.serveGithubE2EStatus)))
   498  		features.Server.Handle("/merge-info", gziphandler.GzipHandler(http.HandlerFunc(sq.serveMergeInfo)))
   499  		features.Server.Handle("/priority-info", gziphandler.GzipHandler(http.HandlerFunc(sq.servePriorityInfo)))
   500  		features.Server.Handle("/health", gziphandler.GzipHandler(http.HandlerFunc(sq.serveHealth)))
   501  		features.Server.Handle("/health.svg", gziphandler.GzipHandler(http.HandlerFunc(sq.serveHealthSVG)))
   502  		features.Server.Handle("/sq-stats", gziphandler.GzipHandler(http.HandlerFunc(sq.serveSQStats)))
   503  		features.Server.Handle("/flakes", gziphandler.GzipHandler(http.HandlerFunc(sq.serveFlakes)))
   504  		features.Server.Handle("/metadata", gziphandler.GzipHandler(http.HandlerFunc(sq.serveMetadata)))
   505  		if sq.BatchEnabled {
   506  			features.Server.Handle("/batch", gziphandler.GzipHandler(http.HandlerFunc(sq.serveBatch)))
   507  		}
   508  		// this endpoint is useless without access to prow
   509  		if sq.ProwURL != "" {
   510  			features.Server.Handle("/ci-status", gziphandler.GzipHandler(http.HandlerFunc(sq.serveCIStatus)))
   511  		}
   512  	}
   513  
   514  	sharedmux.Admin.HandleFunc("/api/emergency/stop", sq.EmergencyStopHTTP)
   515  	sharedmux.Admin.HandleFunc("/api/emergency/resume", sq.EmergencyStopHTTP)
   516  	sharedmux.Admin.HandleFunc("/api/emergency/status", sq.EmergencyStopHTTP)
   517  
   518  	if sq.githubE2EPollTime == 0 {
   519  		sq.githubE2EPollTime = githubE2EPollTime
   520  	}
   521  
   522  	sq.healthHistory = make([]healthRecord, 0)
   523  
   524  	go sq.handleGithubE2EAndMerge()
   525  	go sq.updateGoogleE2ELoop()
   526  	if sq.BatchEnabled {
   527  		go sq.handleGithubE2EBatchMerge()
   528  	}
   529  	if sq.ProwURL != "" {
   530  		go sq.monitorProw()
   531  	}
   532  
   533  	if sq.AdminPort != 0 {
   534  		go http.ListenAndServe(fmt.Sprintf("0.0.0.0:%v", sq.AdminPort), sharedmux.Admin)
   535  	}
   536  	return nil
   537  }
   538  
   539  // EachLoop is called at the start of every munge loop
   540  func (sq *SubmitQueue) EachLoop() error {
   541  	issues := []*githubapi.Issue{}
   542  	if !sq.lastClosedTime.IsZero() {
   543  		listOpts := &githubapi.IssueListByRepoOptions{
   544  			State: "closed",
   545  			Since: sq.lastClosedTime,
   546  		}
   547  		var err error
   548  		issues, err = sq.githubConfig.ListAllIssues(listOpts)
   549  		if err != nil {
   550  			return err
   551  		}
   552  	} else {
   553  		sq.lastClosedTime = time.Now()
   554  	}
   555  
   556  	sq.Lock()
   557  	for _, issue := range issues {
   558  		if issue.ClosedAt != nil && issue.ClosedAt.After(sq.lastClosedTime) {
   559  			sq.lastClosedTime = *issue.ClosedAt
   560  		}
   561  		delete(sq.prStatus, strconv.Itoa(*issue.Number))
   562  	}
   563  
   564  	sq.updateHealth()
   565  	sqPromMetrics.OpenPRs.Set(float64(len(sq.prStatus)))
   566  	sqPromMetrics.QueuedPRs.Set(float64(len(sq.githubE2EQueue)))
   567  
   568  	objs := []*github.MungeObject{}
   569  	for _, obj := range sq.githubE2EQueue {
   570  		objs = append(objs, obj)
   571  	}
   572  	sq.Unlock()
   573  
   574  	for _, obj := range objs {
   575  		obj.Refresh()
   576  		// This should recheck it and clean up the queue, we don't care about the result
   577  		_ = sq.validForMerge(obj)
   578  	}
   579  	atomic.AddInt32(&sq.loopStarts, 1)
   580  	return nil
   581  }
   582  
   583  // RegisterOptions registers options for this munger; returns any that require a restart when changed.
   584  func (sq *SubmitQueue) RegisterOptions(opts *options.Options) sets.String {
   585  	sq.opts = opts
   586  	opts.RegisterStringSlice(&sq.NonBlockingJobNames, "nonblocking-jobs", []string{}, "Comma separated list of jobs that don't block merges, but will have status reported and issues filed.")
   587  	opts.RegisterStringSlice(&sq.AdditionalRequiredLabels, "additional-required-labels", []string{}, "Comma separated list of labels required for merging PRs on top of the existing required.")
   588  	opts.RegisterBool(&sq.FakeE2E, "fake-e2e", false, "Whether to use a fake for testing E2E stability.")
   589  	opts.RegisterStringSlice(&sq.DoNotMergeMilestones, "do-not-merge-milestones", []string{}, "List of milestones which, when applied, will cause the PR to not be merged.")
   590  	opts.RegisterInt(&sq.AdminPort, "admin-port", 9999, "If non-zero, will serve administrative actions on this port.")
   591  	opts.RegisterString(&sq.Metadata.historyURL, "history-url", "", "URL to access the submit-queue instance's health history.")
   592  	opts.RegisterString(&sq.Metadata.chartURL, "chart-url", "", "URL to access the submit-queue instance's health charts.")
   593  	opts.RegisterString(&sq.ProwURL, "prow-url", "", "Prow deployment base URL to read batch results and direct users to.")
   594  	opts.RegisterBool(&sq.BatchEnabled, "batch-enabled", false, "Do batch merges (requires prow/splice coordination).")
   595  	opts.RegisterString(&sq.ContextURL, "context-url", "", "URL where the submit queue is serving - used in Github status contexts.")
   596  	opts.RegisterBool(&sq.GateApproved, "gate-approved", false, "Gate on approved label.")
   597  	opts.RegisterBool(&sq.GateCLA, "gate-cla", false, "Gate on cla labels.")
   598  	opts.RegisterString(&sq.MergeToMasterMessage, "merge-to-master-message", "", "Extra message when PR is merged to master branch.")
   599  	opts.RegisterBool(&sq.GateGHReviewApproved, "gh-review-approved", false, "Gate github review, approve")
   600  	opts.RegisterBool(&sq.GateGHReviewChangesRequested, "gh-review-changes-requested", false, "Gate github review, changes request")
   601  
   602  	opts.RegisterUpdateCallback(func(changed sets.String) error {
   603  		if changed.HasAny("prow-url", "batch-enabled") {
   604  			if sq.BatchEnabled && sq.ProwURL == "" {
   605  				return fmt.Errorf("batch merges require prow-url to be set")
   606  			}
   607  		}
   608  		return nil
   609  	})
   610  
   611  	return sets.NewString(
   612  		"batch-enabled", // Need to start or kill batch processing.
   613  		"context-url",   // Need to remunge all PRs to update statuses with new url.
   614  		"admin-port",    // Need to restart server on new port.
   615  		// For the following: need to restart fileserver.
   616  		"chart-url",
   617  		"history-url",
   618  		// For the following: need to re-initialize e2e which is used by other goroutines.
   619  		"fake-e2e",
   620  		"gcs-bucket",
   621  		"gcs-logs-dir",
   622  		"pull-logs-dir",
   623  		"pull-key",
   624  		// For the following: need to remunge all PRs if changed from true to false.
   625  		"gate-cla",
   626  		"gate-approved",
   627  		// Need to remunge all PRs if anything changes in the following set of labels.
   628  		"additional-required-labels",
   629  	)
   630  }
   631  
   632  // Hold the lock
   633  func (sq *SubmitQueue) updateHealth() {
   634  	// Remove old entries from the front.
   635  	for len(sq.healthHistory) > 0 && time.Since(sq.healthHistory[0].Time).Hours() > 24.0 {
   636  		sq.healthHistory = sq.healthHistory[1:]
   637  	}
   638  	// Make the current record
   639  	emergencyStop := sq.emergencyMergeStop()
   640  	newEntry := healthRecord{
   641  		Time:    time.Now(),
   642  		Overall: !emergencyStop,
   643  		Jobs:    map[string]bool{},
   644  	}
   645  	for job, status := range sq.e2e.GetBuildStatus() {
   646  		// Ignore flakes.
   647  		newEntry.Jobs[job] = status.Status != "Not Stable"
   648  	}
   649  	if emergencyStop {
   650  		// invent an "emergency stop" job that's failing.
   651  		newEntry.Jobs["Emergency Stop"] = false
   652  	}
   653  	sq.healthHistory = append(sq.healthHistory, newEntry)
   654  	// Now compute the health structure so we don't have to do it on page load
   655  	sq.health.TotalLoops = len(sq.healthHistory)
   656  	sq.health.NumStable = 0
   657  	sq.health.NumStablePerJob = map[string]int{}
   658  	sq.health.MergePossibleNow = !emergencyStop
   659  	if sq.health.MergePossibleNow {
   660  		sqPromMetrics.Blocked.Set(0)
   661  	} else {
   662  		sqPromMetrics.Blocked.Set(1)
   663  	}
   664  	for _, record := range sq.healthHistory {
   665  		if record.Overall {
   666  			sq.health.NumStable++
   667  		}
   668  		for job, stable := range record.Jobs {
   669  			if _, ok := sq.health.NumStablePerJob[job]; !ok {
   670  				sq.health.NumStablePerJob[job] = 0
   671  			}
   672  			if stable {
   673  				sq.health.NumStablePerJob[job]++
   674  			}
   675  		}
   676  	}
   677  }
   678  
   679  func (sq *SubmitQueue) monitorProw() {
   680  	nonBlockingJobNames := make(map[string]bool)
   681  	requireRetestJobNames := make(map[string]bool)
   682  
   683  	for {
   684  		sq.opts.Lock()
   685  		for _, jobName := range sq.NonBlockingJobNames {
   686  			nonBlockingJobNames[jobName] = true
   687  		}
   688  		for _, jobName := range mungeopts.RequiredContexts.Retest {
   689  			requireRetestJobNames[jobName] = true
   690  		}
   691  		url := sq.ProwURL + "/data.js"
   692  
   693  		currentPR := -1
   694  		if sq.githubE2ERunning != nil {
   695  			currentPR = *sq.githubE2ERunning.Issue.Number
   696  		}
   697  		sq.opts.Unlock()
   698  
   699  		lastPR := sq.githubE2ELastPRNum
   700  		// get current job info from prow
   701  		allJobs, err := getJobs(url)
   702  		if err != nil {
   703  			glog.Errorf("Error reading batch jobs from Prow URL %v: %v", url, err)
   704  			time.Sleep(time.Minute)
   705  			continue
   706  		}
   707  		// TODO: copy these from sq first instead
   708  		ciStatus := make(map[string]map[string]jobStatus)
   709  		ciLatest := make(map[string]map[string]time.Time)
   710  
   711  		for _, job := range allJobs {
   712  			if job.Finished == "" || job.BuildID == "" {
   713  				continue
   714  			}
   715  			// type/category
   716  			key := job.Type + "/"
   717  			// the most recent submit-queue PR(s)
   718  			if job.Number == currentPR || job.Number == lastPR {
   719  				key += "single"
   720  			} else if nonBlockingJobNames[job.Job] {
   721  				key += "nonblocking"
   722  			} else if requireRetestJobNames[job.Job] {
   723  				key += "requiredretest"
   724  			}
   725  
   726  			ft, err := time.Parse(time.RFC3339Nano, job.Finished)
   727  			if err != nil {
   728  				glog.Errorf("Error parsing job finish time %s: %v", job.Finished, err)
   729  				continue
   730  			}
   731  
   732  			if _, ok := ciLatest[key]; !ok {
   733  				ciLatest[key] = make(map[string]time.Time)
   734  				ciStatus[key] = make(map[string]jobStatus)
   735  			}
   736  			latest, ok := ciLatest[key][job.Job]
   737  
   738  			// TODO: flake cache?
   739  			if !ok || latest.Before(ft) {
   740  				ciLatest[key][job.Job] = ft
   741  				ciStatus[key][job.Job] = jobStatus{
   742  					State:   job.State,
   743  					BuildID: job.BuildID,
   744  					URL:     job.URL,
   745  				}
   746  			}
   747  		}
   748  
   749  		sq.Lock()
   750  		sq.ciStatus = ciStatus
   751  		sq.Unlock()
   752  
   753  		time.Sleep(time.Minute)
   754  	}
   755  }
   756  
   757  func (sq *SubmitQueue) e2eStable(aboutToMerge bool) bool {
   758  	wentStable := false
   759  	wentUnstable := false
   760  
   761  	sq.e2e.LoadNonBlockingStatus()
   762  	stable := !sq.emergencyMergeStop()
   763  
   764  	sq.Lock()
   765  	last := sq.lastE2EStable
   766  	if last && !stable {
   767  		wentUnstable = true
   768  	} else if !last && stable {
   769  		wentStable = true
   770  	}
   771  	sq.lastE2EStable = stable
   772  	sq.Unlock()
   773  
   774  	reason := ""
   775  	avatar := ""
   776  	if wentStable {
   777  		reason = e2eRecover
   778  		avatar = "success.png"
   779  	} else if wentUnstable {
   780  		reason = e2eFailure
   781  		avatar = "error.png"
   782  	}
   783  	if reason != "" {
   784  		submitStatus := submitStatus{
   785  			Time: sq.clock.Now(),
   786  			statusPullRequest: statusPullRequest{
   787  				Title:     reason,
   788  				AvatarURL: avatar,
   789  			},
   790  			Reason: reason,
   791  		}
   792  		sq.Lock()
   793  		sq.statusHistory = append(sq.statusHistory, submitStatus)
   794  		sq.Unlock()
   795  	}
   796  	return stable
   797  }
   798  
   799  // This serves little purpose other than to show updates every minute in the
   800  // web UI. Stable() will get called as needed against individual PRs as well.
   801  func (sq *SubmitQueue) updateGoogleE2ELoop() {
   802  	for {
   803  		_ = sq.e2eStable(false)
   804  		time.Sleep(1 * time.Minute)
   805  	}
   806  }
   807  
   808  func objToStatusPullRequest(obj *github.MungeObject) *statusPullRequest {
   809  	if obj == nil {
   810  		return &statusPullRequest{}
   811  	}
   812  	res := statusPullRequest{
   813  		Number:    *obj.Issue.Number,
   814  		URL:       *obj.Issue.HTMLURL,
   815  		Title:     *obj.Issue.Title,
   816  		Login:     *obj.Issue.User.Login,
   817  		AvatarURL: *obj.Issue.User.AvatarURL,
   818  	}
   819  	pr, ok := obj.GetPR()
   820  	if !ok {
   821  		return &res
   822  	}
   823  	if pr.Additions != nil {
   824  		res.Additions = *pr.Additions
   825  	}
   826  	if pr.Deletions != nil {
   827  		res.Deletions = *pr.Deletions
   828  	}
   829  	if pr.Base != nil && pr.Base.Ref != nil {
   830  		res.BaseRef = *pr.Base.Ref
   831  	}
   832  
   833  	labelPriority := labelPriority(obj)
   834  	if labelPriority <= lastHighPriorityLabel {
   835  		res.ExtraInfo = append(res.ExtraInfo, labelPriorities[labelPriority])
   836  	}
   837  
   838  	milestone, ok := obj.Annotations["milestone"]
   839  	if !ok {
   840  		milestone, _ = obj.ReleaseMilestone()
   841  		obj.Annotations["milestone"] = milestone
   842  	}
   843  	if milestone != "" {
   844  		res.ExtraInfo = append(res.ExtraInfo, milestone)
   845  	}
   846  
   847  	if labelPriority > lastHighPriorityLabel && labelPriority < len(labelPriorities) {
   848  		res.ExtraInfo = append(res.ExtraInfo, labelPriorities[labelPriority])
   849  	}
   850  
   851  	return &res
   852  }
   853  
   854  func reasonToState(reason string) string {
   855  	switch reason {
   856  	case merged, mergedByHand, mergedSkippedRetest, mergedBatch:
   857  		return "success"
   858  	case e2eFailure, ghE2EQueued, ghE2EWaitingStart, ghE2ERunning:
   859  		return "success"
   860  	case unknown:
   861  		return "failure"
   862  	default:
   863  		return "pending"
   864  	}
   865  }
   866  
   867  // SetMergeStatus will set the status given a particular PR. This function should
   868  // be used instead of manipulating the prStatus directly as sq.Lock() must be
   869  // called when manipulating that structure
   870  // `obj` is the active github object
   871  // `reason` is the new 'status' for this object
   872  func (sq *SubmitQueue) SetMergeStatus(obj *github.MungeObject, reason string) {
   873  	glog.V(4).Infof("SubmitQueue not merging %d because %q", *obj.Issue.Number, reason)
   874  	submitStatus := submitStatus{
   875  		Time:              sq.clock.Now(),
   876  		statusPullRequest: *objToStatusPullRequest(obj),
   877  		Reason:            reason,
   878  	}
   879  
   880  	status, ok := obj.GetStatus(sqContext)
   881  	if !ok || status == nil || *status.Description != reason {
   882  		state := reasonToState(reason)
   883  		sq.opts.Lock()
   884  		contextURL := sq.ContextURL
   885  		sq.opts.Unlock()
   886  		url := fmt.Sprintf("%s/#/prs?prDisplay=%d&historyDisplay=%d", contextURL, *obj.Issue.Number, *obj.Issue.Number)
   887  		_ = obj.SetStatus(state, url, reason, sqContext)
   888  	}
   889  
   890  	sq.Lock()
   891  	defer sq.Unlock()
   892  
   893  	// If we are currently retesting E2E the normal munge loop might find
   894  	// that the ci tests are not green. That's normal and expected and we
   895  	// should just ignore that status update entirely.
   896  	if sq.githubE2ERunning != nil && *sq.githubE2ERunning.Issue.Number == *obj.Issue.Number && strings.HasPrefix(reason, ciFailure) {
   897  		return
   898  	}
   899  
   900  	if sq.onQueue(obj) {
   901  		sq.statusHistory = append(sq.statusHistory, submitStatus)
   902  		if len(sq.statusHistory) > 128 {
   903  			sq.statusHistory = sq.statusHistory[1:]
   904  		}
   905  	}
   906  	sq.prStatus[strconv.Itoa(*obj.Issue.Number)] = submitStatus
   907  	sq.cleanupOldE2E(obj, reason)
   908  }
   909  
   910  // setContextFailedStatus calls SetMergeStatus after determining a particular github status
   911  // which is failed.
   912  func (sq *SubmitQueue) setContextFailedStatus(obj *github.MungeObject, contexts []string) {
   913  	for i, context := range contexts {
   914  		contextSlice := contexts[i : i+1]
   915  		success, ok := obj.IsStatusSuccess(contextSlice)
   916  		if ok && success {
   917  			continue
   918  		}
   919  		failMsg := fmt.Sprintf(ciFailureFmt, context)
   920  		sq.SetMergeStatus(obj, failMsg)
   921  		return
   922  	}
   923  	glog.Errorf("Inside setContextFailedStatus() but none of the status's failed! %d: %v", obj.Number(), contexts)
   924  	sq.SetMergeStatus(obj, ciFailure)
   925  }
   926  
   927  // sq.Lock() MUST be held!
   928  func (sq *SubmitQueue) getE2EQueueStatus() []*statusPullRequest {
   929  	queue := []*statusPullRequest{}
   930  	keys := sq.orderedE2EQueue()
   931  	for _, k := range keys {
   932  		obj := sq.githubE2EQueue[k]
   933  		request := objToStatusPullRequest(obj)
   934  		queue = append(queue, request)
   935  	}
   936  	return queue
   937  }
   938  
   939  func (sq *SubmitQueue) marshal(data interface{}) []byte {
   940  	b, err := json.Marshal(data)
   941  	if err != nil {
   942  		glog.Errorf("Unable to Marshal data: %#v: %v", data, err)
   943  		return nil
   944  	}
   945  	return b
   946  }
   947  
   948  func (sq *SubmitQueue) getQueueHistory() []byte {
   949  	sq.Lock()
   950  	defer sq.Unlock()
   951  	return sq.marshal(sq.statusHistory)
   952  }
   953  
   954  // GetQueueStatus returns a json representation of the state of the submit
   955  // queue. This can be used to generate web pages about the submit queue.
   956  func (sq *SubmitQueue) getQueueStatus() []byte {
   957  	status := submitQueueStatus{PRStatus: map[string]submitStatus{}}
   958  	sq.Lock()
   959  	defer sq.Unlock()
   960  
   961  	for key, value := range sq.prStatus {
   962  		status.PRStatus[key] = value
   963  	}
   964  	return sq.marshal(status)
   965  }
   966  
   967  func (sq *SubmitQueue) getGithubE2EStatus() []byte {
   968  	sq.Lock()
   969  	defer sq.Unlock()
   970  	status := e2eQueueStatus{
   971  		E2EQueue:    sq.getE2EQueueStatus(),
   972  		E2ERunning:  objToStatusPullRequest(sq.githubE2ERunning),
   973  		BatchStatus: &sq.batchStatus,
   974  	}
   975  	return sq.marshal(status)
   976  }
   977  
   978  func noMergeMessage(label string) string {
   979  	return "Will not auto merge because " + label + " is present"
   980  }
   981  
   982  func noAdditionalLabelMessage(label string) string {
   983  	return "Will not auto merge because " + label + " is missing"
   984  }
   985  
   986  const (
   987  	unknown                  = "unknown failure"
   988  	noCLA                    = "PR is missing CLA label; needs one of " + cncfClaYesLabel + " or " + claHumanLabel
   989  	noLGTM                   = "PR does not have " + lgtmLabel + " label."
   990  	noApproved               = "PR does not have " + approvedLabel + " label."
   991  	lgtmEarly                = "The PR was changed after the " + lgtmLabel + " label was added."
   992  	unmergeable              = "PR is unable to be automatically merged. Needs rebase."
   993  	undeterminedMergability  = "Unable to determine is PR is mergeable. Will try again later."
   994  	ciFailure                = "Required Github CI test is not green"
   995  	ciFailureFmt             = ciFailure + ": %s"
   996  	e2eFailure               = "The e2e tests are failing. The entire submit queue is blocked."
   997  	e2eRecover               = "The e2e tests started passing. The submit queue is unblocked."
   998  	merged                   = "MERGED!"
   999  	mergedSkippedRetest      = "MERGED! (skipped retest because of label)"
  1000  	mergedBatch              = "MERGED! (batch)"
  1001  	mergedByHand             = "MERGED! (by hand outside of submit queue)"
  1002  	ghE2EQueued              = "Queued to run github e2e tests a second time."
  1003  	ghE2EWaitingStart        = "Requested and waiting for github e2e test to start running a second time."
  1004  	ghE2ERunning             = "Running github e2e tests a second time."
  1005  	ghE2EFailed              = "Second github e2e run failed."
  1006  	unmergeableMilestone     = "Milestone is for a future release and cannot be merged"
  1007  	headCommitChanged        = "This PR has changed since we ran the tests"
  1008  	ghReviewStateUnclear     = "Cannot get gh reviews status"
  1009  	ghReviewApproved         = "This pr has no Github review \"approved\"."
  1010  	ghReviewChangesRequested = "Reviewer(s) requested changes through github review process."
  1011  )
  1012  
  1013  // validForMergeExt is the base logic about what PR can be automatically merged.
  1014  // PRs must pass this logic to be placed on the queue and they must pass this
  1015  // logic a second time to be retested/merged after they get to the top of
  1016  // the queue.
  1017  //
  1018  // checkStatus is true if the PR should only merge if the appropriate Github status
  1019  // checks are passing.
  1020  //
  1021  // If you update the logic PLEASE PLEASE PLEASE update serveMergeInfo() as well.
  1022  func (sq *SubmitQueue) validForMergeExt(obj *github.MungeObject, checkStatus bool) bool {
  1023  	// Can't merge an issue!
  1024  	if !obj.IsPR() {
  1025  		return false
  1026  	}
  1027  
  1028  	// Can't merge something already merged.
  1029  	if m, ok := obj.IsMerged(); !ok {
  1030  		glog.Errorf("%d: unknown err", *obj.Issue.Number)
  1031  		sq.SetMergeStatus(obj, unknown)
  1032  		return false
  1033  	} else if m {
  1034  		sq.SetMergeStatus(obj, mergedByHand)
  1035  		return false
  1036  	}
  1037  
  1038  	// Lock to get options since we may be running on a goroutine besides the main one.
  1039  	sq.opts.Lock()
  1040  	gateCLA := sq.GateCLA
  1041  	gateApproved := sq.GateApproved
  1042  	doNotMergeMilestones := sq.DoNotMergeMilestones
  1043  	mergeContexts := mungeopts.RequiredContexts.Merge
  1044  	retestContexts := mungeopts.RequiredContexts.Retest
  1045  	additionalLabels := sq.AdditionalRequiredLabels
  1046  	sq.opts.Unlock()
  1047  
  1048  	milestone := obj.Issue.Milestone
  1049  	title := ""
  1050  	// Net set means the empty milestone, ""
  1051  	if milestone != nil && milestone.Title != nil {
  1052  		title = *milestone.Title
  1053  	}
  1054  	for _, blocked := range doNotMergeMilestones {
  1055  		if title == blocked || (title == "" && blocked == "NO-MILESTONE") {
  1056  			sq.SetMergeStatus(obj, unmergeableMilestone)
  1057  			return false
  1058  		}
  1059  	}
  1060  
  1061  	// Must pass CLA checks
  1062  	if gateCLA {
  1063  		if !obj.HasLabel(claHumanLabel) && !obj.HasLabel(cncfClaYesLabel) {
  1064  			sq.SetMergeStatus(obj, noCLA)
  1065  			return false
  1066  		}
  1067  	}
  1068  
  1069  	// Obviously must be mergeable
  1070  	if mergeable, ok := obj.IsMergeable(); !ok {
  1071  		sq.SetMergeStatus(obj, undeterminedMergability)
  1072  		return false
  1073  	} else if !mergeable {
  1074  		sq.SetMergeStatus(obj, unmergeable)
  1075  		return false
  1076  	}
  1077  
  1078  	// Validate the status information for this PR
  1079  	if checkStatus {
  1080  		if len(mergeContexts) > 0 {
  1081  			if success, ok := obj.IsStatusSuccess(mergeContexts); !ok || !success {
  1082  				sq.setContextFailedStatus(obj, mergeContexts)
  1083  				return false
  1084  			}
  1085  		}
  1086  		if len(retestContexts) > 0 {
  1087  			if success, ok := obj.IsStatusSuccess(retestContexts); !ok || !success {
  1088  				sq.setContextFailedStatus(obj, retestContexts)
  1089  				return false
  1090  			}
  1091  		}
  1092  	}
  1093  
  1094  	if sq.GateGHReviewApproved || sq.GateGHReviewChangesRequested {
  1095  		if approvedReview, changesRequestedReview, ok := obj.CollectGHReviewStatus(); !ok {
  1096  			sq.SetMergeStatus(obj, ghReviewStateUnclear)
  1097  			return false
  1098  		} else if len(approvedReview) == 0 && sq.GateGHReviewApproved {
  1099  			sq.SetMergeStatus(obj, ghReviewApproved)
  1100  			return false
  1101  		} else if len(changesRequestedReview) > 0 && sq.GateGHReviewChangesRequested {
  1102  			sq.SetMergeStatus(obj, ghReviewChangesRequested)
  1103  			return false
  1104  		}
  1105  	}
  1106  
  1107  	if !obj.HasLabel(lgtmLabel) {
  1108  		sq.SetMergeStatus(obj, noLGTM)
  1109  		return false
  1110  	}
  1111  
  1112  	// PR cannot change since LGTM was added
  1113  	if after, ok := obj.ModifiedAfterLabeled(lgtmLabel); !ok {
  1114  		sq.SetMergeStatus(obj, unknown)
  1115  		return false
  1116  	} else if after {
  1117  		sq.SetMergeStatus(obj, lgtmEarly)
  1118  		return false
  1119  	}
  1120  
  1121  	if gateApproved {
  1122  		if !obj.HasLabel(approvedLabel) {
  1123  			sq.SetMergeStatus(obj, noApproved)
  1124  			return false
  1125  		}
  1126  	}
  1127  
  1128  	// PR cannot have any labels which prevent merging.
  1129  	for _, label := range []string{
  1130  		cherrypickUnapprovedLabel,
  1131  		blockedPathsLabel,
  1132  		deprecatedReleaseNoteLabelNeeded,
  1133  		releaseNoteLabelNeeded,
  1134  		doNotMergeLabel,
  1135  		wipLabel,
  1136  		holdLabel,
  1137  	} {
  1138  		if obj.HasLabel(label) {
  1139  			sq.SetMergeStatus(obj, noMergeMessage(label))
  1140  			return false
  1141  		}
  1142  	}
  1143  
  1144  	for _, label := range additionalLabels {
  1145  		if !obj.HasLabel(label) {
  1146  			sq.SetMergeStatus(obj, noAdditionalLabelMessage(label))
  1147  			return false
  1148  		}
  1149  	}
  1150  
  1151  	return true
  1152  }
  1153  
  1154  func (sq *SubmitQueue) validForMerge(obj *github.MungeObject) bool {
  1155  	return sq.validForMergeExt(obj, true)
  1156  }
  1157  
  1158  // Munge is the workhorse the will actually make updates to the PR
  1159  func (sq *SubmitQueue) Munge(obj *github.MungeObject) {
  1160  	if !sq.validForMerge(obj) {
  1161  		return
  1162  	}
  1163  
  1164  	added := false
  1165  	sq.Lock()
  1166  	if _, ok := sq.githubE2EQueue[*obj.Issue.Number]; !ok {
  1167  		atomic.AddInt32(&sq.prsAdded, 1)
  1168  		added = true
  1169  	}
  1170  	// Add this most-recent object in place of the existing object. It will
  1171  	// have more up2date information. Even though we explicitly refresh the
  1172  	// PR information before do anything with it, this allow things like the
  1173  	// queue order to change dynamically as labels are added/removed.
  1174  	sq.githubE2EQueue[*obj.Issue.Number] = obj
  1175  	sq.Unlock()
  1176  	if added {
  1177  		sq.SetMergeStatus(obj, ghE2EQueued)
  1178  	}
  1179  
  1180  	return
  1181  }
  1182  
  1183  func (sq *SubmitQueue) deleteQueueItem(obj *github.MungeObject) {
  1184  	if sq.onQueue(obj) {
  1185  		atomic.AddInt32(&sq.prsRemoved, 1)
  1186  	}
  1187  	delete(sq.githubE2EQueue, *obj.Issue.Number)
  1188  }
  1189  
  1190  // If the PR was put in the github e2e queue previously, but now we don't
  1191  // think it should be in the e2e queue, remove it. MUST be called with sq.Lock()
  1192  // held.
  1193  func (sq *SubmitQueue) cleanupOldE2E(obj *github.MungeObject, reason string) {
  1194  	switch {
  1195  	case reason == e2eFailure:
  1196  	case reason == ghE2EQueued:
  1197  	case reason == ghE2EWaitingStart:
  1198  	case reason == ghE2ERunning:
  1199  		// Do nothing
  1200  	case strings.HasPrefix(reason, ciFailure):
  1201  		// ciFailure is intersting. If the PR is being actively retested and then the
  1202  		// time based loop finds the same PR it will try to set ciFailure. We should in fact
  1203  		// not ever call this function in this case, but if we do call here, log it.
  1204  		if sq.githubE2ERunning != nil && *sq.githubE2ERunning.Issue.Number == *obj.Issue.Number {
  1205  			glog.Errorf("Trying to clean up %d due to ciFailure while it is being tested", *obj.Issue.Number)
  1206  			return
  1207  		}
  1208  		fallthrough
  1209  	default:
  1210  		if sq.githubE2ERunning != nil && *sq.githubE2ERunning.Issue.Number == *obj.Issue.Number {
  1211  			sq.githubE2ERunning = nil
  1212  		}
  1213  		sq.deleteQueueItem(obj)
  1214  	}
  1215  
  1216  }
  1217  
  1218  func labelPriority(obj *github.MungeObject) int {
  1219  	for i, label := range labelPriorities {
  1220  		if obj.HasLabel(label) {
  1221  			return i
  1222  		}
  1223  	}
  1224  	return len(labelPriorities)
  1225  }
  1226  
  1227  func compareHighPriorityLabels(a *github.MungeObject, b *github.MungeObject) int {
  1228  	aPrio := labelPriority(a)
  1229  	bPrio := labelPriority(b)
  1230  
  1231  	if aPrio > lastHighPriorityLabel && bPrio > lastHighPriorityLabel {
  1232  		return 0
  1233  	}
  1234  	return aPrio - bPrio
  1235  }
  1236  
  1237  func compareLowPriorityLabels(a *github.MungeObject, b *github.MungeObject) int {
  1238  	aPrio := labelPriority(a)
  1239  	bPrio := labelPriority(b)
  1240  
  1241  	return aPrio - bPrio
  1242  }
  1243  
  1244  type queueSorter struct {
  1245  	queue          []*github.MungeObject
  1246  	labelTimeCache *mungerutil.LabelTimeCache
  1247  }
  1248  
  1249  func (s queueSorter) Len() int      { return len(s.queue) }
  1250  func (s queueSorter) Swap(i, j int) { s.queue[i], s.queue[j] = s.queue[j], s.queue[i] }
  1251  
  1252  // If you update the function PLEASE PLEASE PLEASE also update servePriorityInfo()
  1253  func (s queueSorter) Less(i, j int) bool {
  1254  	a := s.queue[i]
  1255  	b := s.queue[j]
  1256  
  1257  	if c := compareHighPriorityLabels(a, b); c < 0 {
  1258  		return true
  1259  	} else if c > 0 {
  1260  		return false
  1261  	}
  1262  
  1263  	aDue, _ := a.ReleaseMilestoneDue()
  1264  	bDue, _ := b.ReleaseMilestoneDue()
  1265  
  1266  	if aDue.Before(bDue) {
  1267  		return true
  1268  	} else if aDue.After(bDue) {
  1269  		return false
  1270  	}
  1271  
  1272  	if c := compareLowPriorityLabels(a, b); c < 0 {
  1273  		return true
  1274  	} else if c > 0 {
  1275  		return false
  1276  	}
  1277  
  1278  	aTime, aOK := s.labelTimeCache.FirstLabelTime(a)
  1279  	bTime, bOK := s.labelTimeCache.FirstLabelTime(b)
  1280  
  1281  	// Shouldn't really happen since these have been LGTMed to be
  1282  	// in the queue at all. But just in case, .
  1283  	if !aOK && bOK {
  1284  		return false
  1285  	} else if aOK && !bOK {
  1286  		return true
  1287  	} else if !aOK && !bOK {
  1288  		return false
  1289  	}
  1290  
  1291  	return aTime.Before(bTime)
  1292  }
  1293  
  1294  // onQueue just tells if a PR is already on the queue.
  1295  // sq.Lock() must be held
  1296  func (sq *SubmitQueue) onQueue(obj *github.MungeObject) bool {
  1297  	for _, queueObj := range sq.githubE2EQueue {
  1298  		if *queueObj.Issue.Number == *obj.Issue.Number {
  1299  			return true
  1300  		}
  1301  
  1302  	}
  1303  	return false
  1304  }
  1305  
  1306  // sq.Lock() better held!!!
  1307  func (sq *SubmitQueue) orderedE2EQueue() []int {
  1308  	prs := []*github.MungeObject{}
  1309  	for _, obj := range sq.githubE2EQueue {
  1310  		prs = append(prs, obj)
  1311  	}
  1312  	sort.Sort(queueSorter{prs, sq.lgtmTimeCache})
  1313  
  1314  	var ordered []int
  1315  	for _, obj := range prs {
  1316  		ordered = append(ordered, *obj.Issue.Number)
  1317  	}
  1318  	return ordered
  1319  }
  1320  
  1321  // handleGithubE2EAndMerge waits for PRs that are ready to re-run the github
  1322  // e2e tests, runs the test, and then merges if everything was successful.
  1323  func (sq *SubmitQueue) handleGithubE2EAndMerge() {
  1324  	for {
  1325  		sq.Lock()
  1326  		l := len(sq.githubE2EQueue)
  1327  		sq.Unlock()
  1328  		// Wait until something is ready to be processed
  1329  		if l == 0 {
  1330  			time.Sleep(sq.githubE2EPollTime)
  1331  			continue
  1332  		}
  1333  
  1334  		obj := sq.selectPullRequest()
  1335  		if obj == nil {
  1336  			continue
  1337  		}
  1338  
  1339  		// only critical fixes can be merged if postsubmits are failing
  1340  		if !sq.e2eStable(false) && !obj.HasLabel(criticalFixLabel) {
  1341  			time.Sleep(sq.githubE2EPollTime)
  1342  			continue
  1343  		}
  1344  
  1345  		// re-test and maybe merge
  1346  		remove := sq.doGithubE2EAndMerge(obj)
  1347  		if remove {
  1348  			// remove it from the map after we finish testing
  1349  			sq.Lock()
  1350  			if sq.githubE2ERunning != nil {
  1351  				sq.githubE2ELastPRNum = *sq.githubE2ERunning.Issue.Number
  1352  			}
  1353  			sq.githubE2ERunning = nil
  1354  			sq.deleteQueueItem(obj)
  1355  			sq.Unlock()
  1356  		}
  1357  	}
  1358  }
  1359  
  1360  func (sq *SubmitQueue) mergePullRequest(obj *github.MungeObject, msg, extra string) bool {
  1361  	isMaster, _ := obj.IsForBranch("master")
  1362  	if isMaster {
  1363  		sq.opts.Lock()
  1364  		if sq.MergeToMasterMessage != "" {
  1365  			extra = extra + ". " + sq.MergeToMasterMessage
  1366  		}
  1367  		sq.opts.Unlock()
  1368  	}
  1369  	ok := obj.MergePR("submit-queue" + extra)
  1370  	if !ok {
  1371  		return ok
  1372  	}
  1373  	sq.SetMergeStatus(obj, msg)
  1374  	sq.updateMergeRate()
  1375  	return true
  1376  }
  1377  
  1378  func (sq *SubmitQueue) selectPullRequest() *github.MungeObject {
  1379  	if sq.interruptedObj != nil {
  1380  		return sq.interruptedObj.obj
  1381  	}
  1382  	sq.Lock()
  1383  	defer sq.Unlock()
  1384  	if len(sq.githubE2EQueue) == 0 {
  1385  		return nil
  1386  	}
  1387  	keys := sq.orderedE2EQueue()
  1388  	obj := sq.githubE2EQueue[keys[0]]
  1389  	if sq.githubE2ERunning != nil {
  1390  		sq.githubE2ELastPRNum = *sq.githubE2ERunning.Issue.Number
  1391  	}
  1392  	sq.githubE2ERunning = obj
  1393  
  1394  	return obj
  1395  }
  1396  
  1397  func (interruptedObj *submitQueueInterruptedObject) hasSHAChanged() bool {
  1398  	headSHA, baseRef, gotHeadSHA := interruptedObj.obj.GetHeadAndBase()
  1399  	if !gotHeadSHA {
  1400  		return true
  1401  	}
  1402  
  1403  	baseSHA, gotBaseSHA := interruptedObj.obj.GetSHAFromRef(baseRef)
  1404  	if !gotBaseSHA {
  1405  		return true
  1406  	}
  1407  
  1408  	return interruptedObj.interruptedMergeBaseSHA != baseSHA ||
  1409  		interruptedObj.interruptedMergeHeadSHA != headSHA
  1410  }
  1411  
  1412  func newInterruptedObject(obj *github.MungeObject) *submitQueueInterruptedObject {
  1413  	if headSHA, baseRef, gotHeadSHA := obj.GetHeadAndBase(); !gotHeadSHA {
  1414  		return nil
  1415  	} else if baseSHA, gotBaseSHA := obj.GetSHAFromRef(baseRef); !gotBaseSHA {
  1416  		return nil
  1417  	} else {
  1418  		return &submitQueueInterruptedObject{obj, headSHA, baseSHA}
  1419  	}
  1420  }
  1421  
  1422  // Returns true if we can discard the PR from the queue, false if we must keep it for later.
  1423  // If you modify this, consider modifying doBatchMerge too.
  1424  func (sq *SubmitQueue) doGithubE2EAndMerge(obj *github.MungeObject) bool {
  1425  	interruptedObj := sq.interruptedObj
  1426  	sq.interruptedObj = nil
  1427  
  1428  	ok := obj.Refresh()
  1429  	if !ok {
  1430  		glog.Errorf("%d: unknown err", *obj.Issue.Number)
  1431  		sq.SetMergeStatus(obj, unknown)
  1432  		return true
  1433  	}
  1434  
  1435  	if !sq.validForMerge(obj) {
  1436  		return true
  1437  	}
  1438  
  1439  	if obj.HasLabel(retestNotRequiredLabel) || obj.HasLabel(retestNotRequiredDocsOnlyLabel) {
  1440  		atomic.AddInt32(&sq.instantMerges, 1)
  1441  		sq.mergePullRequest(obj, mergedSkippedRetest, "")
  1442  		return true
  1443  	}
  1444  
  1445  	sha, _, ok := obj.GetHeadAndBase()
  1446  	if !ok {
  1447  		glog.Errorf("%d: Unable to get SHA", *obj.Issue.Number)
  1448  		sq.SetMergeStatus(obj, unknown)
  1449  		return true
  1450  	}
  1451  	if interruptedObj != nil {
  1452  		if interruptedObj.hasSHAChanged() {
  1453  			// This PR will have to be rested.
  1454  			// Make sure we don't have higher priority first.
  1455  			return false
  1456  		}
  1457  		glog.Infof("Skipping retest since head and base sha match previous attempt!")
  1458  		atomic.AddInt32(&sq.retestsAvoided, 1)
  1459  	} else {
  1460  		if sq.retestPR(obj) {
  1461  			return true
  1462  		}
  1463  
  1464  		ok := obj.Refresh()
  1465  		if !ok {
  1466  			sq.SetMergeStatus(obj, unknown)
  1467  			return true
  1468  		}
  1469  	}
  1470  
  1471  	sq.mergeLock.Lock()
  1472  	defer sq.mergeLock.Unlock()
  1473  
  1474  	// We shouldn't merge if it's not valid anymore
  1475  	if !sq.validForMerge(obj) {
  1476  		glog.Errorf("%d: Not mergeable anymore. Do not merge.", *obj.Issue.Number)
  1477  		return true
  1478  	}
  1479  
  1480  	if newSha, _, ok := obj.GetHeadAndBase(); !ok {
  1481  		glog.Errorf("%d: Unable to get SHA", *obj.Issue.Number)
  1482  		sq.SetMergeStatus(obj, unknown)
  1483  		return true
  1484  	} else if newSha != sha {
  1485  		glog.Errorf("%d: Changed while running the test. Do not merge.", *obj.Issue.Number)
  1486  		sq.SetMergeStatus(obj, headCommitChanged)
  1487  		return false
  1488  	}
  1489  
  1490  	if !sq.e2eStable(true) && !obj.HasLabel(criticalFixLabel) {
  1491  		if sq.validForMerge(obj) {
  1492  			sq.interruptedObj = newInterruptedObject(obj)
  1493  		}
  1494  		sq.SetMergeStatus(obj, e2eFailure)
  1495  		return true
  1496  	}
  1497  
  1498  	sq.mergePullRequest(obj, merged, "")
  1499  	return true
  1500  }
  1501  
  1502  // Returns true if merge status changes, and false otherwise.
  1503  func (sq *SubmitQueue) retestPR(obj *github.MungeObject) bool {
  1504  	sq.opts.Lock()
  1505  	retestContexts := mungeopts.RequiredContexts.Retest
  1506  	sq.opts.Unlock()
  1507  
  1508  	if len(retestContexts) == 0 {
  1509  		return false
  1510  	}
  1511  
  1512  	if err := obj.WriteComment(newRetestBody); err != nil {
  1513  		glog.Errorf("%d: unknown err: %v", *obj.Issue.Number, err)
  1514  		sq.SetMergeStatus(obj, unknown)
  1515  		return true
  1516  	}
  1517  
  1518  	// Wait for the retest to start
  1519  	sq.SetMergeStatus(obj, ghE2EWaitingStart)
  1520  	atomic.AddInt32(&sq.prsTested, 1)
  1521  	sq.opts.Lock()
  1522  	prMaxWaitTime := mungeopts.PRMaxWaitTime
  1523  	sq.opts.Unlock()
  1524  	done := obj.WaitForPending(retestContexts, prMaxWaitTime)
  1525  	if !done {
  1526  		sq.SetMergeStatus(obj, fmt.Sprintf("Timed out waiting for PR %d to start testing", obj.Number()))
  1527  		return true
  1528  	}
  1529  
  1530  	// Wait for the status to go back to something other than pending
  1531  	sq.SetMergeStatus(obj, ghE2ERunning)
  1532  	done = obj.WaitForNotPending(retestContexts, prMaxWaitTime)
  1533  	if !done {
  1534  		sq.SetMergeStatus(obj, fmt.Sprintf("Timed out waiting for PR %d to finish testing", obj.Number()))
  1535  		return true
  1536  	}
  1537  
  1538  	// Check if the thing we care about is success
  1539  	if success, ok := obj.IsStatusSuccess(retestContexts); !success || !ok {
  1540  		sq.SetMergeStatus(obj, ghE2EFailed)
  1541  		return true
  1542  	}
  1543  
  1544  	// no action taken.
  1545  	return false
  1546  }
  1547  
  1548  func (sq *SubmitQueue) serve(data []byte, res http.ResponseWriter, req *http.Request) {
  1549  	if data == nil {
  1550  		res.Header().Set("Content-type", "text/plain")
  1551  		res.WriteHeader(http.StatusInternalServerError)
  1552  	} else {
  1553  		res.Header().Set("Content-type", "application/json")
  1554  		res.WriteHeader(http.StatusOK)
  1555  		res.Write(data)
  1556  	}
  1557  }
  1558  
  1559  func (sq *SubmitQueue) serveHistory(res http.ResponseWriter, req *http.Request) {
  1560  	data := sq.getQueueHistory()
  1561  	sq.serve(data, res, req)
  1562  }
  1563  
  1564  func (sq *SubmitQueue) servePRs(res http.ResponseWriter, req *http.Request) {
  1565  	data := sq.getQueueStatus()
  1566  	sq.serve(data, res, req)
  1567  }
  1568  
  1569  func (sq *SubmitQueue) serveGithubE2EStatus(res http.ResponseWriter, req *http.Request) {
  1570  	data := sq.getGithubE2EStatus()
  1571  	sq.serve(data, res, req)
  1572  }
  1573  
  1574  func (sq *SubmitQueue) serveCIStatus(res http.ResponseWriter, req *http.Request) {
  1575  	sq.Lock()
  1576  	data := sq.marshal(sq.ciStatus)
  1577  	sq.Unlock()
  1578  	sq.serve(data, res, req)
  1579  }
  1580  
  1581  func (sq *SubmitQueue) serveHealth(res http.ResponseWriter, req *http.Request) {
  1582  	sq.Lock()
  1583  	data := sq.marshal(sq.health)
  1584  	sq.Unlock()
  1585  	sq.serve(data, res, req)
  1586  }
  1587  
  1588  func (sq *SubmitQueue) serveSQStats(res http.ResponseWriter, req *http.Request) {
  1589  	data := submitQueueStats{
  1590  		Added:              int(atomic.LoadInt32(&sq.prsAdded)),
  1591  		FlakesIgnored:      int(atomic.LoadInt32(&sq.flakesIgnored)),
  1592  		Initialized:        atomic.LoadInt32(&sq.loopStarts) > 1,
  1593  		InstantMerges:      int(atomic.LoadInt32(&sq.instantMerges)),
  1594  		BatchMerges:        int(atomic.LoadInt32(&sq.batchMerges)),
  1595  		LastMergeTime:      sq.lastMergeTime,
  1596  		MergeRate:          sq.calcMergeRateWithTail(),
  1597  		MergesSinceRestart: int(atomic.LoadInt32(&sq.totalMerges)),
  1598  		Removed:            int(atomic.LoadInt32(&sq.prsRemoved)),
  1599  		RetestsAvoided:     int(atomic.LoadInt32(&sq.retestsAvoided)),
  1600  		StartTime:          sq.startTime,
  1601  		Tested:             int(atomic.LoadInt32(&sq.prsTested)),
  1602  	}
  1603  	sq.serve(sq.marshal(data), res, req)
  1604  }
  1605  
  1606  func (sq *SubmitQueue) serveFlakes(res http.ResponseWriter, req *http.Request) {
  1607  	data := sq.e2e.Flakes()
  1608  	sq.serve(mungerutil.PrettyMarshal(data), res, req)
  1609  }
  1610  
  1611  func (sq *SubmitQueue) serveMetadata(res http.ResponseWriter, req *http.Request) {
  1612  	sq.Lock()
  1613  	data := sq.marshal(sq.Metadata)
  1614  	sq.Unlock()
  1615  	sq.serve(data, res, req)
  1616  }
  1617  
  1618  func (sq *SubmitQueue) serveBatch(res http.ResponseWriter, req *http.Request) {
  1619  	sq.serve(sq.marshal(sq.batchStatus), res, req)
  1620  }
  1621  
  1622  func (sq *SubmitQueue) serveMergeInfo(res http.ResponseWriter, req *http.Request) {
  1623  	// Lock to get options since we are not running in the main goroutine.
  1624  	sq.opts.Lock()
  1625  	doNotMergeMilestones := sq.DoNotMergeMilestones
  1626  	additionalLabels := sq.AdditionalRequiredLabels
  1627  	gateApproved := sq.GateApproved
  1628  	gateCLA := sq.GateCLA
  1629  	mergeContexts := mungeopts.RequiredContexts.Merge
  1630  	retestContexts := mungeopts.RequiredContexts.Retest
  1631  	sq.opts.Unlock()
  1632  
  1633  	res.Header().Set("Content-type", "text/plain")
  1634  	res.WriteHeader(http.StatusOK)
  1635  	var out bytes.Buffer
  1636  	out.WriteString("PRs must meet the following set of conditions to be considered for automatic merging by the submit queue.")
  1637  	out.WriteString("<ol>")
  1638  	if gateCLA {
  1639  		out.WriteString(fmt.Sprintf("<li>The PR must have the label %q or %q </li>", cncfClaYesLabel, claHumanLabel))
  1640  	}
  1641  	out.WriteString("<li>The PR must be mergeable. aka cannot need a rebase</li>")
  1642  	if len(mergeContexts) > 0 || len(retestContexts) > 0 {
  1643  		out.WriteString("<li>All of the following github statuses must be green")
  1644  		out.WriteString("<ul>")
  1645  		for _, context := range mergeContexts {
  1646  			out.WriteString(fmt.Sprintf("<li>%s</li>", context))
  1647  		}
  1648  		for _, context := range retestContexts {
  1649  			out.WriteString(fmt.Sprintf("<li>%s</li>", context))
  1650  		}
  1651  		out.WriteString("</ul>")
  1652  	}
  1653  	out.WriteString(fmt.Sprintf("<li>The PR cannot have any of the following milestones: %q</li>", doNotMergeMilestones))
  1654  	out.WriteString(fmt.Sprintf(`<li>The PR must have the %q label</li>`, lgtmLabel))
  1655  	out.WriteString(fmt.Sprintf("<li>The PR must not have been updated since the %q label was applied</li>", lgtmLabel))
  1656  	if gateApproved {
  1657  		out.WriteString(fmt.Sprintf(`<li>The PR must have the %q label</li>`, approvedLabel))
  1658  	}
  1659  	if len(additionalLabels) > 0 {
  1660  		out.WriteString(fmt.Sprintf(`<li>The PR must have the following labels: %q</li>`, additionalLabels))
  1661  	}
  1662  	out.WriteString(`<li>The PR must not have the any labels starting with "do-not-merge"</li>`)
  1663  	out.WriteString(`</ol><br>`)
  1664  	out.WriteString("The PR can then be queued to re-test before merge. Once it reaches the top of the queue all of the above conditions must be true but so must the following:")
  1665  	out.WriteString("<ol>")
  1666  	if len(retestContexts) > 0 {
  1667  		out.WriteString("<li>All of the following tests must pass a second time")
  1668  		out.WriteString("<ul>")
  1669  		for _, context := range retestContexts {
  1670  			out.WriteString(fmt.Sprintf("<li>%s</li>", context))
  1671  		}
  1672  		out.WriteString("</ul>")
  1673  		out.WriteString(fmt.Sprintf("Unless the %q or %q label is present</li>", retestNotRequiredLabel, retestNotRequiredDocsOnlyLabel))
  1674  	}
  1675  	out.WriteString("</ol>")
  1676  	out.WriteString("And then the PR will be merged!!")
  1677  	res.Write(out.Bytes())
  1678  }
  1679  
  1680  func writeLabel(label string, res http.ResponseWriter) {
  1681  	out := fmt.Sprintf(`  <li>%q label
  1682      <ul>
  1683        <li>A PR with %q will come next</li>
  1684      </ul>
  1685    </li>
  1686  `, label, label)
  1687  	res.Write([]byte(out))
  1688  }
  1689  
  1690  func (sq *SubmitQueue) servePriorityInfo(res http.ResponseWriter, req *http.Request) {
  1691  	res.Header().Set("Content-type", "text/plain")
  1692  	res.WriteHeader(http.StatusOK)
  1693  	res.Write([]byte(`The merge queue is sorted by the following. If there is a tie in any test the next test will be used.
  1694  <ol>
  1695    <li>'` + criticalFixLabel + `' label
  1696      <ul>
  1697        <li>A PR with '` + criticalFixLabel + `' will come first</li>
  1698        <li>A PR with '` + criticalFixLabel + `' will merge even if the e2e tests are blocked</li>
  1699      </ul>
  1700    </li>
  1701  `))
  1702  	for i := 1; i <= lastHighPriorityLabel; i++ {
  1703  		writeLabel(labelPriorities[i], res)
  1704  	}
  1705  	res.Write([]byte(`  <li>Release milestone due date
  1706      <ul>
  1707        <li>Release milestones are of the form vX.Y where X and Y are integers</li>
  1708        <li>The release milestore must have a due date set to affect queue order</li>
  1709        <li>Other milestones are ignored</li>
  1710      </ul>
  1711    </li>
  1712  `))
  1713  	for i := lastHighPriorityLabel + 1; i < len(labelPriorities); i++ {
  1714  		writeLabel(labelPriorities[i], res)
  1715  	}
  1716  	res.Write([]byte(`  <li>First time at which the LGTM label was applied.
  1717      <ul>
  1718        <li>This means all PRs start at the bottom of the queue (within their priority and milestone bands, of course) and progress towards the top.</li>
  1719      </ul>
  1720    </li>
  1721  </ol> `))
  1722  }
  1723  
  1724  func (sq *SubmitQueue) getHealthSVG() []byte {
  1725  	sq.Lock()
  1726  	defer sq.Unlock()
  1727  	blocked := false
  1728  	blockingJobs := make([]string, 0)
  1729  	blocked = !sq.health.MergePossibleNow
  1730  	status := "running"
  1731  	color := "brightgreen"
  1732  	if blocked {
  1733  		status = "blocked"
  1734  		color = "red"
  1735  		for job, status := range sq.e2e.GetBuildStatus() {
  1736  			if status.Status == "Not Stable" {
  1737  				job = strings.Replace(job, "kubernetes-", "", -1)
  1738  				blockingJobs = append(blockingJobs, job)
  1739  			}
  1740  		}
  1741  		sort.Strings(blockingJobs)
  1742  		if len(blockingJobs) > 3 {
  1743  			blockingJobs = append(blockingJobs[:3], "...")
  1744  		}
  1745  		if len(blockingJobs) > 0 {
  1746  			status += " by " + strings.Join(blockingJobs, ", ")
  1747  		}
  1748  	}
  1749  	return shield.Make("queue", status, color)
  1750  }
  1751  
  1752  func (sq *SubmitQueue) serveHealthSVG(res http.ResponseWriter, req *http.Request) {
  1753  	res.Header().Set("Content-type", "image/svg+xml")
  1754  	res.Header().Set("Cache-Control", "max-age=60")
  1755  	res.WriteHeader(http.StatusOK)
  1756  	res.Write(sq.getHealthSVG())
  1757  }
  1758  
  1759  func (sq *SubmitQueue) isStaleIssueComment(obj *github.MungeObject, comment *githubapi.IssueComment) bool {
  1760  	if !obj.IsRobot(comment.User) {
  1761  		return false
  1762  	}
  1763  	if *comment.Body != newRetestBody {
  1764  		return false
  1765  	}
  1766  	stale := commentBeforeLastCI(obj, comment, mungeopts.RequiredContexts.Retest)
  1767  	if stale {
  1768  		glog.V(6).Infof("Found stale SubmitQueue safe to merge comment")
  1769  	}
  1770  	return stale
  1771  }
  1772  
  1773  // StaleIssueComments returns a slice of stale issue comments.
  1774  func (sq *SubmitQueue) StaleIssueComments(obj *github.MungeObject, comments []*githubapi.IssueComment) []*githubapi.IssueComment {
  1775  	return forEachCommentTest(obj, comments, sq.isStaleIssueComment)
  1776  }