github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/robots/issue-creator/sources/flakyjob-reporter.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sources 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "io/ioutil" 25 "net/http" 26 "sort" 27 "time" 28 29 "github.com/golang/glog" 30 31 githubapi "github.com/google/go-github/github" 32 "k8s.io/test-infra/robots/issue-creator/creator" 33 ) 34 35 // FlakyJob is a struct that represents a single job and the flake data associated with it. 36 // FlakyJob implements the Issue interface so that it can be synced with github issues via the IssueCreator. 37 type FlakyJob struct { 38 // Name is the job's name. 39 Name string 40 // Consistency is the percentage of builds that passed. 41 Consistency *float64 `json:"consistency"` 42 // FlakeCount is the number of flakes. 43 FlakeCount *int `json:"flakes"` 44 // FlakyTests is a map of test names to the number of times that test failed. 45 // Any test that failed at least once a day for the past week on this job is included. 46 FlakyTests map[string]int `json:"flakiest"` 47 // testsSorted is a list of the FlakyTests test names sorted by desc. number of flakes. 48 // This field is lazily populated and should be accessed via TestsSorted(). 49 testsSorted []string 50 51 // reporter is a pointer to the FlakyJobReporter that created this FlakyJob. 52 reporter *FlakyJobReporter 53 } 54 55 // FlakyJobReporter is a munger that creates github issues for the flakiest kubernetes jobs. 56 // The flakiest jobs are parsed from JSON generated by /test-infra/experiment/bigquery/flakes.sh 57 type FlakyJobReporter struct { 58 flakyJobDataURL string 59 syncCount int 60 61 creator *creator.IssueCreator 62 } 63 64 func init() { 65 creator.RegisterSourceOrDie("flakyjob-reporter", &FlakyJobReporter{}) 66 } 67 68 // RegisterFlags registers options for this munger; returns any that require a restart when changed. 69 func (fjr *FlakyJobReporter) RegisterFlags() { 70 flag.StringVar(&fjr.flakyJobDataURL, "flakyjob-url", "https://storage.googleapis.com/k8s-metrics/flakes-latest.json", "The url where flaky job JSON data can be found.") 71 flag.IntVar(&fjr.syncCount, "flakyjob-count", 3, "The number of flaky jobs to try to sync to github.") 72 } 73 74 // Issues is the main work method of FlakyJobReporter. It fetches and parses flaky job data, 75 // then syncs the top issues to github with the IssueCreator. 76 func (fjr *FlakyJobReporter) Issues(c *creator.IssueCreator) ([]creator.Issue, error) { 77 fjr.creator = c 78 json, err := ReadHTTP(fjr.flakyJobDataURL) 79 if err != nil { 80 return nil, err 81 } 82 83 flakyJobs, err := fjr.parseFlakyJobs(json) 84 if err != nil { 85 return nil, err 86 } 87 88 count := fjr.syncCount 89 if len(flakyJobs) < count { 90 count = len(flakyJobs) 91 } 92 issues := make([]creator.Issue, 0, count) 93 for _, fj := range flakyJobs[0:count] { 94 issues = append(issues, fj) 95 } 96 97 return issues, nil 98 } 99 100 // parseFlakyJobs parses JSON generated by the 'flakes' bigquery metric into a sorted slice of 101 // *FlakyJob. 102 func (fjr *FlakyJobReporter) parseFlakyJobs(jsonIn []byte) ([]*FlakyJob, error) { 103 var flakeMap map[string]*FlakyJob 104 err := json.Unmarshal(jsonIn, &flakeMap) 105 if err != nil || flakeMap == nil { 106 return nil, fmt.Errorf("error unmarshaling flaky jobs json: %v", err) 107 } 108 flakyJobs := make([]*FlakyJob, 0, len(flakeMap)) 109 110 for job, fj := range flakeMap { 111 if job == "" { 112 glog.Errorf("Flaky jobs json contained a job with an empty jobname.\n") 113 continue 114 } 115 if fj == nil { 116 glog.Errorf("Flaky jobs json has invalid data for job '%s'.\n", job) 117 continue 118 } 119 if fj.Consistency == nil { 120 glog.Errorf("Flaky jobs json has no 'consistency' field for job '%s'.\n", job) 121 continue 122 } 123 if fj.FlakeCount == nil { 124 glog.Errorf("Flaky jobs json has no 'flakes' field for job '%s'.\n", job) 125 continue 126 } 127 if fj.FlakyTests == nil { 128 glog.Errorf("Flaky jobs json has no 'flakiest' field for job '%s'.\n", job) 129 continue 130 } 131 fj.Name = job 132 fj.reporter = fjr 133 flakyJobs = append(flakyJobs, fj) 134 } 135 136 sort.SliceStable(flakyJobs, func(i, j int) bool { 137 if *flakyJobs[i].FlakeCount == *flakyJobs[j].FlakeCount { 138 return *flakyJobs[i].Consistency < *flakyJobs[j].Consistency 139 } 140 return *flakyJobs[i].FlakeCount > *flakyJobs[j].FlakeCount 141 }) 142 143 return flakyJobs, nil 144 } 145 146 // TestsSorted returns a slice of the testnames from a FlakyJob's FlakyTests map. The slice is 147 // sorted by descending number of failures for the tests. 148 func (fj *FlakyJob) TestsSorted() []string { 149 if fj.testsSorted != nil { 150 return fj.testsSorted 151 } 152 fj.testsSorted = make([]string, len(fj.FlakyTests)) 153 i := 0 154 for test := range fj.FlakyTests { 155 fj.testsSorted[i] = test 156 i++ 157 } 158 sort.SliceStable(fj.testsSorted, func(i, j int) bool { 159 return fj.FlakyTests[fj.testsSorted[i]] > fj.FlakyTests[fj.testsSorted[j]] 160 }) 161 return fj.testsSorted 162 } 163 164 // Title yields the initial title text of the github issue. 165 func (fj *FlakyJob) Title() string { 166 return fmt.Sprintf("%s flaked %d times in the past week", fj.Name, *fj.FlakeCount) 167 } 168 169 // ID yields the string identifier that uniquely identifies this issue. 170 // This ID must appear in the body of the issue. 171 // DO NOT CHANGE how this ID is formatted or duplicate issues may be created on github. 172 func (fj *FlakyJob) ID() string { 173 return fmt.Sprintf("Flaky Job: %s", fj.Name) 174 } 175 176 // Body returns the body text of the github issue and *must* contain the output of ID(). 177 // closedIssues is a (potentially empty) slice containing all closed issues authored by this bot 178 // that contain ID() in their body. 179 // If Body returns an empty string no issue is created. 180 func (fj *FlakyJob) Body(closedIssues []*githubapi.Issue) string { 181 // First check that the most recently closed issue (if any exist) was closed 182 // at least a week ago (since that is the sliding window size used by the flake metric). 183 cutoffTime := time.Now().AddDate(0, 0, -7) 184 for _, closed := range closedIssues { 185 if closed.ClosedAt.After(cutoffTime) { 186 return "" 187 } 188 } 189 190 // Print stats about the flaky job. 191 var buf bytes.Buffer 192 fmt.Fprintf(&buf, "### %s\n Flakes in the past week: **%d**\n Consistency: **%.2f%%**\n", 193 fj.ID(), *fj.FlakeCount, *fj.Consistency*100) 194 if len(fj.FlakyTests) > 0 { 195 fmt.Fprint(&buf, "\n#### Flakiest tests by flake count:\n| Test | Flake Count |\n| --- | --- |\n") 196 for _, testName := range fj.TestsSorted() { 197 fmt.Fprintf(&buf, "| %s | %d |\n", testName, fj.FlakyTests[testName]) 198 } 199 } 200 // List previously closed issues if there are any. 201 if len(closedIssues) > 0 { 202 fmt.Fprint(&buf, "\n#### Previously closed issues for this job flaking:\n") 203 for _, closed := range closedIssues { 204 fmt.Fprintf(&buf, "#%d ", *closed.Number) 205 } 206 fmt.Fprint(&buf, "\n") 207 } 208 209 // Create /assign command. 210 testsSorted := fj.TestsSorted() 211 ownersMap := fj.reporter.creator.TestsOwners(testsSorted) 212 if len(ownersMap) > 0 { 213 fmt.Fprint(&buf, "\n/assign") 214 for user := range ownersMap { 215 fmt.Fprintf(&buf, " @%s", user) 216 } 217 fmt.Fprint(&buf, "\n") 218 } 219 220 // Explain why assignees were assigned and why sig labels were applied. 221 fmt.Fprintf(&buf, "\n%s", fj.reporter.creator.ExplainTestAssignments(testsSorted)) 222 223 fmt.Fprintf(&buf, "\n[Flakiest Jobs](%s)\n", fj.reporter.flakyJobDataURL) 224 return buf.String() 225 } 226 227 // Labels returns the labels to apply to the issue created for this flaky job on github. 228 func (fj *FlakyJob) Labels() []string { 229 labels := []string{"kind/flake"} 230 // get sig labels 231 for sig := range fj.reporter.creator.TestsSIGs(fj.TestsSorted()) { 232 labels = append(labels, "sig/"+sig) 233 } 234 return labels 235 } 236 237 // Owners returns the list of usernames to assign to this issue on github. 238 func (fj *FlakyJob) Owners() []string { 239 // Assign owners by including a /assign command in the body instead of using Owners to set 240 // assignees on the issue request. This lets prow do the assignee validation and will mention 241 // the user we want to assign even if they can't be assigned. 242 return nil 243 } 244 245 // Priority calculates and returns the priority of this issue 246 // The returned bool indicates if the returned priority is valid and can be used 247 func (fj *FlakyJob) Priority() (string, bool) { 248 // TODO: implement priority calculations later 249 return "", false 250 } 251 252 // ReadHTTP fetches file contents from a URL with retries. 253 func ReadHTTP(url string) ([]byte, error) { 254 var err error 255 retryDelay := time.Duration(2) * time.Second 256 for retryCount := 0; retryCount < 5; retryCount++ { 257 if retryCount > 0 { 258 time.Sleep(retryDelay) 259 retryDelay *= time.Duration(2) 260 } 261 262 resp, err := http.Get(url) 263 if resp != nil && resp.StatusCode >= 500 { 264 // Retry on this type of error. 265 continue 266 } 267 if err != nil { 268 return nil, err 269 } 270 defer resp.Body.Close() 271 272 body, err := ioutil.ReadAll(resp.Body) 273 if err != nil { 274 continue 275 } 276 return body, nil 277 } 278 return nil, fmt.Errorf("ran out of retries reading from '%s'. Last error was %v", url, err) 279 }