k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/robots/issue-creator/sources/flakyjob-reporter.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sources 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "io" 25 "net/http" 26 "sort" 27 "time" 28 29 "github.com/golang/glog" 30 31 githubapi "github.com/google/go-github/github" 32 "k8s.io/test-infra/robots/issue-creator/creator" 33 ) 34 35 // FlakyJob is a struct that represents a single job and the flake data associated with it. 36 // FlakyJob implements the Issue interface so that it can be synced with github issues via the IssueCreator. 37 type FlakyJob struct { 38 // Name is the job's name. 39 Name string 40 // Consistency is the percentage of builds that passed. 41 Consistency *float64 `json:"consistency"` 42 // FlakeCount is the number of flakes. 43 FlakeCount *int `json:"flakes"` 44 // FlakyTests is a map of test names to the number of times that test failed. 45 // Any test that failed at least once a day for the past week on this job is included. 46 FlakyTests map[string]int `json:"flakiest"` 47 // testsSorted is a list of the FlakyTests test names sorted by desc. number of flakes. 48 // This field is lazily populated and should be accessed via TestsSorted(). 49 testsSorted []string 50 51 // reporter is a pointer to the FlakyJobReporter that created this FlakyJob. 52 reporter *FlakyJobReporter 53 } 54 55 // FlakyJobReporter is a munger that creates github issues for the flakiest kubernetes jobs. 56 // The flakiest jobs are parsed from JSON generated by /test-infra/experiment/bigquery/flakes.sh 57 type FlakyJobReporter struct { 58 flakyJobDataURL string 59 syncCount int 60 61 creator *creator.IssueCreator 62 } 63 64 func init() { 65 creator.RegisterSourceOrDie("flakyjob-reporter", &FlakyJobReporter{}) 66 } 67 68 // RegisterFlags registers options for this munger; returns any that require a restart when changed. 69 func (fjr *FlakyJobReporter) RegisterFlags() { 70 flag.StringVar(&fjr.flakyJobDataURL, "flakyjob-url", "https://storage.googleapis.com/k8s-metrics/flakes-latest.json", "The url where flaky job JSON data can be found.") 71 flag.IntVar(&fjr.syncCount, "flakyjob-count", 3, "The number of flaky jobs to try to sync to github.") 72 } 73 74 // Issues is the main work method of FlakyJobReporter. It fetches and parses flaky job data, 75 // then syncs the top issues to github with the IssueCreator. 76 func (fjr *FlakyJobReporter) Issues(c *creator.IssueCreator) ([]creator.Issue, error) { 77 fjr.creator = c 78 json, err := ReadHTTP(fjr.flakyJobDataURL) 79 if err != nil { 80 return nil, err 81 } 82 83 flakyJobs, err := fjr.parseFlakyJobs(json) 84 if err != nil { 85 return nil, err 86 } 87 88 count := fjr.syncCount 89 if len(flakyJobs) < count { 90 count = len(flakyJobs) 91 } 92 issues := make([]creator.Issue, 0, count) 93 for _, fj := range flakyJobs[0:count] { 94 issues = append(issues, fj) 95 } 96 97 return issues, nil 98 } 99 100 // parseFlakyJobs parses JSON generated by the 'flakes' bigquery metric into a sorted slice of 101 // *FlakyJob. 102 func (fjr *FlakyJobReporter) parseFlakyJobs(jsonIn []byte) ([]*FlakyJob, error) { 103 var flakeMap map[string]*FlakyJob 104 err := json.Unmarshal(jsonIn, &flakeMap) 105 if err != nil || flakeMap == nil { 106 return nil, fmt.Errorf("error unmarshaling flaky jobs json: %w", err) 107 } 108 flakyJobs := make([]*FlakyJob, 0, len(flakeMap)) 109 110 for job, fj := range flakeMap { 111 if job == "" { 112 glog.Errorf("Flaky jobs json contained a job with an empty jobname.\n") 113 continue 114 } 115 if fj == nil { 116 glog.Errorf("Flaky jobs json has invalid data for job '%s'.\n", job) 117 continue 118 } 119 if fj.Consistency == nil { 120 glog.Errorf("Flaky jobs json has no 'consistency' field for job '%s'.\n", job) 121 continue 122 } 123 if fj.FlakeCount == nil { 124 glog.Errorf("Flaky jobs json has no 'flakes' field for job '%s'.\n", job) 125 continue 126 } 127 if fj.FlakyTests == nil { 128 glog.Errorf("Flaky jobs json has no 'flakiest' field for job '%s'.\n", job) 129 continue 130 } 131 fj.Name = job 132 fj.reporter = fjr 133 flakyJobs = append(flakyJobs, fj) 134 } 135 136 sort.SliceStable(flakyJobs, func(i, j int) bool { 137 if *flakyJobs[i].FlakeCount == *flakyJobs[j].FlakeCount { 138 return *flakyJobs[i].Consistency < *flakyJobs[j].Consistency 139 } 140 return *flakyJobs[i].FlakeCount > *flakyJobs[j].FlakeCount 141 }) 142 143 return flakyJobs, nil 144 } 145 146 // TestsSorted returns a slice of the testnames from a FlakyJob's FlakyTests map. The slice is 147 // sorted by descending number of failures for the tests. 148 func (fj *FlakyJob) TestsSorted() []string { 149 if fj.testsSorted != nil { 150 return fj.testsSorted 151 } 152 fj.testsSorted = make([]string, len(fj.FlakyTests)) 153 i := 0 154 for test := range fj.FlakyTests { 155 fj.testsSorted[i] = test 156 i++ 157 } 158 sort.SliceStable(fj.testsSorted, func(i, j int) bool { 159 return fj.FlakyTests[fj.testsSorted[i]] > fj.FlakyTests[fj.testsSorted[j]] 160 }) 161 return fj.testsSorted 162 } 163 164 // Title yields the initial title text of the github issue. 165 func (fj *FlakyJob) Title() string { 166 return fmt.Sprintf("%s flaked %d times in the past week", fj.Name, *fj.FlakeCount) 167 } 168 169 // ID yields the string identifier that uniquely identifies this issue. 170 // This ID must appear in the body of the issue. 171 // DO NOT CHANGE how this ID is formatted or duplicate issues may be created on github. 172 func (fj *FlakyJob) ID() string { 173 return fmt.Sprintf("Flaky Job: %s", fj.Name) 174 } 175 176 // Body returns the body text of the github issue and *must* contain the output of ID(). 177 // closedIssues is a (potentially empty) slice containing all closed issues authored by this bot 178 // that contain ID() in their body. 179 // If Body returns an empty string no issue is created. 180 func (fj *FlakyJob) Body(closedIssues []*githubapi.Issue) string { 181 // First check that the most recently closed issue (if any exist) was closed 182 // at least a week ago (since that is the sliding window size used by the flake metric). 183 cutoffTime := time.Now().AddDate(0, 0, -7) 184 for _, closed := range closedIssues { 185 if closed.ClosedAt.After(cutoffTime) { 186 return "" 187 } 188 } 189 190 // Print stats about the flaky job. 191 var buf bytes.Buffer 192 fmt.Fprintf(&buf, "### %s\n Flakes in the past week: **%d**\n Consistency: **%.2f%%**\n", 193 fj.ID(), *fj.FlakeCount, *fj.Consistency*100) 194 if len(fj.FlakyTests) > 0 { 195 fmt.Fprint(&buf, "\n#### Flakiest tests by flake count:\n| Test | Flake Count |\n| --- | --- |\n") 196 for _, testName := range fj.TestsSorted() { 197 fmt.Fprintf(&buf, "| %s | %d |\n", testName, fj.FlakyTests[testName]) 198 } 199 } 200 // List previously closed issues if there are any. 201 if len(closedIssues) > 0 { 202 fmt.Fprint(&buf, "\n#### Previously closed issues for this job flaking:\n") 203 for _, closed := range closedIssues { 204 fmt.Fprintf(&buf, "#%d ", *closed.Number) 205 } 206 fmt.Fprint(&buf, "\n") 207 } 208 209 // Create /assign command. 210 testsSorted := fj.TestsSorted() 211 ownersMap := fj.reporter.creator.TestsOwners(testsSorted) 212 if len(ownersMap) > 0 { 213 fmt.Fprint(&buf, "\n/assign") 214 for user := range ownersMap { 215 fmt.Fprintf(&buf, " @%s", user) 216 } 217 fmt.Fprint(&buf, "\n") 218 } 219 220 // Explain why assignees were assigned and why sig labels were applied. 221 fmt.Fprintf(&buf, "\n%s", fj.reporter.creator.ExplainTestAssignments(testsSorted)) 222 223 fmt.Fprintf(&buf, "\n[Flakiest Jobs](%s)\n", fj.reporter.flakyJobDataURL) 224 225 fmt.Fprintf(&buf, "\n/kind flake\n") 226 227 return buf.String() 228 } 229 230 // Labels returns the labels to apply to the issue created for this flaky job on github. 231 func (fj *FlakyJob) Labels() []string { 232 labels := []string{"kind/flake"} 233 // get sig labels 234 for sig := range fj.reporter.creator.TestsSIGs(fj.TestsSorted()) { 235 labels = append(labels, "sig/"+sig) 236 } 237 return labels 238 } 239 240 // Owners returns the list of usernames to assign to this issue on github. 241 func (fj *FlakyJob) Owners() []string { 242 // Assign owners by including a /assign command in the body instead of using Owners to set 243 // assignees on the issue request. This lets prow do the assignee validation and will mention 244 // the user we want to assign even if they can't be assigned. 245 return nil 246 } 247 248 // Priority calculates and returns the priority of this issue 249 // The returned bool indicates if the returned priority is valid and can be used 250 func (fj *FlakyJob) Priority() (string, bool) { 251 // TODO: implement priority calculations later 252 return "", false 253 } 254 255 // ReadHTTP fetches file contents from a URL with retries. 256 func ReadHTTP(url string) ([]byte, error) { 257 var err error 258 retryDelay := time.Duration(2) * time.Second 259 for retryCount := 0; retryCount < 5; retryCount++ { 260 if retryCount > 0 { 261 time.Sleep(retryDelay) 262 retryDelay *= time.Duration(2) 263 } 264 265 resp, err := http.Get(url) 266 if resp != nil && resp.StatusCode >= 500 { 267 // Retry on this type of error. 268 continue 269 } 270 if err != nil { 271 return nil, err 272 } 273 defer resp.Body.Close() 274 275 body, err := io.ReadAll(resp.Body) 276 if err != nil { 277 continue 278 } 279 return body, nil 280 } 281 return nil, fmt.Errorf("ran out of retries reading from '%s'. Last error was %w", url, err) 282 }