k8s.io/test-infra/triage@v0.0.0-20240520184403-27c6b4c223d8/summarize/output.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 Contains functions that prepare data for output. 19 */ 20 21 package summarize 22 23 import ( 24 "fmt" 25 "regexp" 26 "sort" 27 "strconv" 28 "strings" 29 30 "k8s.io/apimachinery/pkg/util/sets" 31 "k8s.io/klog/v2" 32 "k8s.io/test-infra/triage/utils" 33 ) 34 35 // jsonOutput represents the output as it will be written to the JSON. 36 type jsonOutput struct { 37 Clustered []jsonCluster `json:"clustered"` 38 Builds columns `json:"builds"` 39 } 40 41 // render accepts a map from build paths to builds, and the global clusters, and renders them in a 42 // format consumable by the web page. 43 func render(builds map[string]build, clustered nestedFailuresGroups, maxFailureTextLength int) jsonOutput { 44 clusteredSorted := clustered.sortByMostAggregatedFailures() 45 46 flattenedClusters := make([]flattenedGlobalCluster, len(clusteredSorted)) 47 48 for i, pair := range clusteredSorted { 49 k := pair.Key 50 clusters := pair.Group 51 52 flattenedClusters[i] = flattenedGlobalCluster{ 53 k, 54 makeNgramCountsDigest(k), 55 clusters.sortByMostFailures(), 56 } 57 } 58 59 return jsonOutput{ 60 clustersToDisplay(flattenedClusters, builds, maxFailureTextLength), 61 buildsToColumns(builds), 62 } 63 } 64 65 // sigLabelRE matches '[sig-x]', so long as x does not contain a closing bracket. 66 var sigLabelRE = regexp.MustCompile(`\[sig-([^]]*)\]`) 67 68 /* 69 annotateOwners assigns ownership to a cluster based on the share of hits in the last day. It modifies 70 the data parameter in place. 71 72 owners maps SIG names to collections of SIG-specific prefixes. 73 */ 74 func annotateOwners(data *jsonOutput, builds map[string]build, owners map[string][]string) error { 75 var ownerRE *regexp.Regexp = nil 76 if owners != nil { 77 // Dynamically create a regular expression based on the value of owners. 78 /* 79 namedOwnerREs is a collection of regular expressions of the form 80 (?P<signame>prefixA|prefixB|prefixC) 81 where signame is the name of a SIG (such as 'sig-testing') with '-' replaced with '_' for 82 compatibility with regex capture group name rules. There can be any number of prefixes 83 following the capture group name. 84 */ 85 namedOwnerREs := make([]string, 0, len(owners)) 86 for sig, prefixes := range owners { 87 // prefixREs is a collection of non-empty prefixes with any special regex characters quoted 88 prefixREs := make([]string, 0, len(prefixes)) 89 for _, prefix := range prefixes { 90 if prefix != "" { 91 prefixREs = append(prefixREs, regexp.QuoteMeta(prefix)) 92 } 93 } 94 95 namedOwnerREs = append(namedOwnerREs, 96 fmt.Sprintf("(?P<%s>%s)", 97 strings.Replace(sig, "-", "_", -1), // Regex group names can't have '-', we'll substitute back later 98 strings.Join(prefixREs, "|"))) 99 } 100 101 // ownerRE is the final regex created from the values of namedOwnerREs, placed into a 102 // non-capturing group 103 var err error 104 ownerRE, err = regexp.Compile(fmt.Sprintf(`(?:%s)`, strings.Join(namedOwnerREs, "|"))) 105 if err != nil { 106 return fmt.Errorf("Could not compile ownerRE from provided SIG names and prefixes: %s", err) 107 } 108 } 109 110 jobPaths := data.Builds.JobPaths 111 yesterday := 0 112 if len(data.Builds.Cols.Started) > 0 { 113 yesterday = utils.Max(data.Builds.Cols.Started...) - (60 * 60 * 24) 114 } 115 116 // Determine the owner for each cluster 117 for i := range data.Clustered { 118 cluster := &data.Clustered[i] 119 // Maps owner names to hits (I think hits yesterday and hits today, respectively) 120 ownerCounts := make(map[string][]int) 121 122 // For each test, determine the owner with the most hits 123 for _, test := range cluster.Tests { 124 var owner string 125 if submatches := sigLabelRE.FindStringSubmatch(test.Name); submatches != nil { 126 owner = submatches[1] // Get the first (and only) submatch of sigLabelRE 127 } else if ownerRE != nil { 128 normalizedTestName := normalizeName(test.Name) 129 130 // Determine whether there were any named groups with matches for normalizedTestName, 131 // and if so what the first named group with a match is 132 namedGroupMatchExists := false 133 firstMatchingGroupName := "" 134 // Names of the named capturing groups, which are really the names of the owners 135 groupNames := ownerRE.SubexpNames() 136 outer: 137 for _, submatches := range ownerRE.FindAllStringSubmatch(normalizedTestName, -1) { 138 for i, submatch := range submatches { 139 // If the group is named and there was a match 140 if groupNames[i] != "" && submatch != "" { 141 namedGroupMatchExists = true 142 firstMatchingGroupName = groupNames[i] 143 break outer 144 } 145 } 146 } 147 148 ownerIndex := ownerRE.FindStringIndex(normalizedTestName) 149 150 if ownerIndex == nil || // If no match was found for the owner, or 151 ownerIndex[0] != 0 || // the test name did not begin with the owner name, or 152 !namedGroupMatchExists { // there were no named groups that matched 153 continue 154 } 155 156 // Get the name of the first named group with a non-empty match, and assign it to owner 157 owner = firstMatchingGroupName 158 } 159 160 owner = strings.Replace(owner, "_", "-", -1) // Substitute '_' back to '-' 161 162 if _, ok := ownerCounts[owner]; !ok { 163 ownerCounts[owner] = []int{0, 0} 164 } 165 counts := ownerCounts[owner] 166 167 for _, job := range test.Jobs { 168 if strings.Contains(job.Name, ":") { // non-standard CI 169 continue 170 } 171 172 jobPath := jobPaths[job.Name] 173 for _, build := range job.BuildNumbers { 174 bucketKey := fmt.Sprintf("%s/%s", jobPath, build) 175 if _, ok := builds[bucketKey]; !ok { 176 continue 177 } else if builds[bucketKey].Started > yesterday { 178 counts[0]++ 179 } else { 180 counts[1]++ 181 } 182 } 183 } 184 } 185 186 if len(ownerCounts) != 0 { 187 // Utility function to find the owner with the most hits yesterday, then most hits today, 188 // then first name alphabetically. Returns true if current owner wins, false otherwise. 189 newOwnerHasMoreHits := func(topOwner string, topOwnerCounts []int, currentOwner string, currentCounts []int) bool { 190 if currentCounts[0] == topOwnerCounts[0] { 191 if currentCounts[1] == topOwnerCounts[1] { 192 // Which has the earlier name alphabetically 193 return currentOwner < topOwner 194 } 195 return currentCounts[1] > topOwnerCounts[1] 196 } 197 return currentCounts[0] > topOwnerCounts[0] 198 } 199 200 // Find the owner with the most hits 201 var topOwner string 202 topCounts := []int{0, 0} 203 for owner, counts := range ownerCounts { 204 if newOwnerHasMoreHits(topOwner, topCounts, owner, counts) { 205 topOwner = owner 206 topCounts = counts 207 } 208 } 209 cluster.Owner = topOwner 210 } else { 211 cluster.Owner = "testing" 212 } 213 } 214 return nil 215 } 216 217 // renderSlice returns clusters whose owner field is the owner parameter or whose id field has a 218 // prefix of the prefix parameter, and the columnar form of the jobs belonging to those clusters. 219 // If parameters prefix and owner are both the empty string, the function will return empty objects. 220 func renderSlice(data jsonOutput, builds map[string]build, prefix string, owner string) ([]jsonCluster, columns) { 221 clustered := make([]jsonCluster, 0) 222 // Maps build paths to builds 223 buildsOut := make(map[string]build) 224 jobs := make(sets.String) 225 226 // For each cluster whose owner field is the owner parameter, or whose id field has a prefix of 227 // the prefix parameter, add its tests' jobs to the jobs set. 228 for _, cluster := range data.Clustered { 229 if owner != "" && cluster.Owner == owner { 230 clustered = append(clustered, cluster) 231 } else if prefix != "" && strings.HasPrefix(cluster.ID, prefix) { 232 clustered = append(clustered, cluster) 233 } else { 234 continue 235 } 236 237 for _, tst := range cluster.Tests { 238 for _, jb := range tst.Jobs { 239 jobs.Insert(jb.Name) 240 } 241 } 242 } 243 244 // Add builds whose job is in jobs to buildsOut 245 for _, bld := range builds { 246 if jobs.Has(bld.Job) { 247 buildsOut[bld.Path] = bld 248 } 249 } 250 251 return clustered, buildsToColumns(buildsOut) 252 } 253 254 // flattenedGlobalCluster is the key and value of a specific global cluster (as clusterText and 255 // sortedTests, respectively), plus the result of calling makeNgramCountsDigest on the key. 256 type flattenedGlobalCluster struct { 257 clusterText string 258 ngramCountsDigest string 259 sortedTests []failuresGroupPair 260 } 261 262 // test represents a test name and a collection of associated jobs. 263 type test struct { 264 Name string `json:"name"` 265 Jobs []job `json:"jobs"` 266 } 267 268 /* 269 jsonCluster represents a global cluster as it will be written to the JSON. 270 271 key: the cluster text 272 id: the result of calling makeNgramCountsDigest() on key 273 text: a failure text from one of the cluster's failures 274 spans: common spans between all of the cluster's failure texts 275 tests: the build numbers that belong to the cluster's failures as per testGroupByJob() 276 owner: the SIG that owns the cluster, determined by annotateOwners() 277 */ 278 type jsonCluster struct { 279 Key string `json:"key"` 280 ID string `json:"id"` 281 Text string `json:"text"` 282 Spans []int `json:"spans"` 283 Tests []test `json:"tests"` 284 Owner string `json:"owner"` 285 } 286 287 // clustersToDisplay transposes and sorts the flattened output of clusterGlobal. 288 // builds maps a build path to a build object. 289 func clustersToDisplay(clustered []flattenedGlobalCluster, builds map[string]build, maxFailureTextLength int) []jsonCluster { 290 jsonClusters := make([]jsonCluster, 0, len(clustered)) 291 292 for _, flattened := range clustered { 293 key := flattened.clusterText 294 keyID := flattened.ngramCountsDigest 295 clusters := flattened.sortedTests 296 297 // Determine the number of failures across all clusters 298 numClusterFailures := 0 299 for _, cluster := range clusters { 300 numClusterFailures += len(cluster.Failures) 301 } 302 303 if numClusterFailures > 1 { 304 jCluster := jsonCluster{ 305 Key: key, 306 ID: keyID, 307 Text: truncate(clusters[0].Failures[0].FailureText, maxFailureTextLength), 308 Tests: make([]test, len(clusters)), 309 } 310 311 // Get all of the failure texts from all clusters 312 clusterFailureTexts := make([]string, 0, numClusterFailures) 313 for _, cluster := range clusters { 314 for _, flr := range cluster.Failures { 315 clusterFailureTexts = append(clusterFailureTexts, truncate(flr.FailureText, maxFailureTextLength)) 316 } 317 } 318 jCluster.Spans = commonSpans(clusterFailureTexts) 319 320 // Fill out jCluster.tests 321 for i, cluster := range clusters { 322 jCluster.Tests[i] = test{ 323 Name: cluster.Key, 324 Jobs: testsGroupByJob(cluster.Failures, builds), 325 } 326 } 327 328 jsonClusters = append(jsonClusters, jCluster) 329 } 330 } 331 332 return jsonClusters 333 } 334 335 // job represents a job name and a collection of associated build numbers. 336 type job struct { 337 Name string `json:"name"` 338 BuildNumbers []string `json:"builds"` 339 } 340 341 // build represents a specific instance of a build. 342 type build struct { 343 Path string `json:"path"` 344 Started int `json:"started"` 345 Elapsed int `json:"elapsed"` 346 TestsRun int `json:"tests_run"` 347 TestsFailed int `json:"tests_failed"` 348 Result string `json:"result"` 349 Executor string `json:"executor"` 350 Job string `json:"job"` 351 Number int `json:"number"` 352 PR string `json:"pr"` 353 Key string `json:"key"` // Often nonexistent 354 } 355 356 /* 357 testsGroupByJob takes a group of failures and a map of builds and returns the list of build numbers 358 that belong to each failure's job. 359 360 builds is a mapping from build paths to build objects. 361 */ 362 func testsGroupByJob(failures []failure, builds map[string]build) []job { 363 // groups maps job names to sets of failures' build numbers (as strings). 364 groups := make(map[string]sets.String) 365 366 // For each failure, grab its build's job name. Map the job name to the failure's build number. 367 for _, flr := range failures { 368 // Try to grab the build from builds if it exists 369 if bld, ok := builds[flr.Build]; ok { 370 // If the JSON build's "number" field was not null 371 if bld.Number != 0 { 372 // Create the set if one doesn't exist for the given job 373 if _, ok := groups[bld.Job]; !ok { 374 groups[bld.Job] = make(sets.String, 1) 375 } 376 groups[bld.Job].Insert(strconv.Itoa(bld.Number)) 377 } 378 } 379 } 380 381 // Sort groups in two stages. 382 // First, sort each build number set in descending order. 383 // Then, sort the jobs by the number of build numbers in each job's build number slice, descending. 384 385 // First stage 386 // sortedBuildNumbers is essentially groups, but with the build numbers sorted. 387 sortedBuildNumbers := make(map[string][]string, len(groups)) 388 // Create the slice to hold the set elements, fill it, and sort it 389 for jobName, buildNumberSet := range groups { 390 // Initialize the int slice 391 sortedBuildNumbers[jobName] = make([]string, len(buildNumberSet)) 392 393 // Fill it 394 iter := 0 395 for buildNumber := range buildNumberSet { 396 sortedBuildNumbers[jobName][iter] = buildNumber 397 iter++ 398 } 399 400 // Sort it. Use > instead of < in less function to sort descending. 401 sort.Slice(sortedBuildNumbers[jobName], func(i, j int) bool { return sortedBuildNumbers[jobName][i] > sortedBuildNumbers[jobName][j] }) 402 } 403 404 // Second stage 405 sortedGroups := make([]job, 0, len(groups)) 406 407 // Fill sortedGroups 408 for newJobName, newBuildNumbers := range sortedBuildNumbers { 409 sortedGroups = append(sortedGroups, job{newJobName, newBuildNumbers}) 410 } 411 // Sort it 412 sort.Slice(sortedGroups, func(i, j int) bool { 413 iGroupLen := len(sortedGroups[i].BuildNumbers) 414 jGroupLen := len(sortedGroups[j].BuildNumbers) 415 416 // If they're the same length, sort by job name alphabetically 417 if iGroupLen == jGroupLen { 418 return sortedGroups[i].Name < sortedGroups[j].Name 419 } 420 421 // Use > instead of < to sort descending. 422 return iGroupLen > jGroupLen 423 }) 424 425 return sortedGroups 426 } 427 428 /* 429 columnarBuilds represents a collection of build objects where the i-th build's property p can be 430 found at p[i]. 431 432 For example, the 4th (0-indexed) build's start time can be found in started[4], while its elapsed 433 time can be found in elapsed[4]. 434 */ 435 type columnarBuilds struct { 436 Started []int `json:"started"` 437 TestsFailed []int `json:"tests_failed"` 438 Elapsed []int `json:"elapsed"` 439 TestsRun []int `json:"tests_run"` 440 Result []string `json:"result"` 441 Executor []string `json:"executor"` 442 PR []string `json:"pr"` 443 } 444 445 // currentIndex returns the index of the next build to be stored (and, by extension, the number of 446 // builds currently stored). 447 func (cb *columnarBuilds) currentIndex() int { 448 return len(cb.Started) 449 } 450 451 // insert adds a build into the columnarBuilds object. 452 func (cb *columnarBuilds) insert(b build) { 453 cb.Started = append(cb.Started, b.Started) 454 cb.TestsFailed = append(cb.TestsFailed, b.TestsFailed) 455 cb.Elapsed = append(cb.Elapsed, b.Elapsed) 456 cb.TestsRun = append(cb.TestsRun, b.TestsRun) 457 cb.Result = append(cb.Result, b.Result) 458 cb.Executor = append(cb.Executor, b.Executor) 459 cb.PR = append(cb.PR, b.PR) 460 } 461 462 // newColumnarBuilds creates a columnarBuilds object with the correct number of columns. The number 463 // of columns is the same as the number of builds being converted to columnar form. 464 func newColumnarBuilds(columns int) columnarBuilds { 465 // Start the length at 0 because columnarBuilds.currentIndex() relies on the length. 466 return columnarBuilds{ 467 Started: make([]int, 0, columns), 468 TestsFailed: make([]int, 0, columns), 469 Elapsed: make([]int, 0, columns), 470 TestsRun: make([]int, 0, columns), 471 Result: make([]string, 0, columns), 472 Executor: make([]string, 0, columns), 473 PR: make([]string, 0, columns), 474 } 475 } 476 477 /* 478 jobCollection represents a collection of jobs. It can either be a map[int]int (a mapping from 479 build numbers to indexes of builds in the columnar representation) or a []int (a condensed form 480 of the mapping for dense sequential mappings from builds to indexes; see buildsToColumns() comment). 481 This is necessary because the outputted JSON is unstructured, and has some fields that can be 482 either a map or a slice. 483 */ 484 type jobCollection interface{} 485 486 /* 487 columns represents a collection of builds in columnar form, plus the necessary maps to decode it. 488 489 jobs maps job names to their location in the columnar form. 490 491 cols is the collection of builds in columnar form. 492 493 jobPaths maps a job name to a build path, minus the last path segment. 494 */ 495 type columns struct { 496 Jobs map[string]jobCollection `json:"jobs"` 497 Cols columnarBuilds `json:"cols"` 498 JobPaths map[string]string `json:"job_paths"` 499 } 500 501 // buildsToColumns converts a map (from build paths to builds) into a columnar form. This compresses 502 // much better with gzip. See columnarBuilds for more information on the columnar form. 503 func buildsToColumns(builds map[string]build) columns { 504 // The function result 505 // result.jobs maps job names to either map[int]int or []int. See jobCollection. 506 result := columns{make(map[string]jobCollection), newColumnarBuilds(len(builds)), make(map[string]string)} 507 508 // Sort the builds before making them columnar 509 sortedBuilds := make([]build, 0, len(builds)) 510 // Fill the slice 511 for _, bld := range builds { 512 sortedBuilds = append(sortedBuilds, bld) 513 } 514 // Sort the slice 515 sort.Slice(sortedBuilds, func(i, j int) bool { 516 // Sort by job name, then by build number 517 if sortedBuilds[i].Job == sortedBuilds[j].Job { 518 return sortedBuilds[i].Number < sortedBuilds[j].Number 519 } 520 return sortedBuilds[i].Job < sortedBuilds[j].Job 521 }) 522 523 // Add the builds to result.cols 524 for _, bld := range sortedBuilds { 525 // If there was no build number when the build was retrieved from the JSON 526 if bld.Number == 0 { 527 continue 528 } 529 530 // Get the index within cols's slices of the next inserted build 531 index := result.Cols.currentIndex() 532 533 // Add the build 534 result.Cols.insert(bld) 535 536 // job maps build numbers to their indexes in the columnar representation 537 var job map[int]int 538 if _, ok := result.Jobs[bld.Job]; !ok { 539 result.Jobs[bld.Job] = make(map[int]int) 540 } 541 // We can safely assert map[int]int here because replacement of maps with slices only 542 // happens later 543 job = result.Jobs[bld.Job].(map[int]int) 544 545 // Store the job path 546 if len(job) == 0 { 547 result.JobPaths[bld.Job] = bld.Path[:strings.LastIndex(bld.Path, "/")] 548 } 549 550 // Store the column number (index) so we know in which column to find which build 551 job[bld.Number] = index 552 } 553 554 // Sort build numbers and compress some data 555 for jobName, indexes := range result.Jobs { 556 // Sort the build numbers 557 sortedBuildNumbers := make([]int, 0, len(indexes.(map[int]int))) 558 for key := range indexes.(map[int]int) { 559 sortedBuildNumbers = append(sortedBuildNumbers, key) 560 } 561 sort.Ints(sortedBuildNumbers) 562 563 base := indexes.(map[int]int)[sortedBuildNumbers[0]] 564 count := len(sortedBuildNumbers) 565 566 // Optimization: if we have a dense sequential mapping of builds=>indexes, 567 // store only the first build number, the run length, and the first index number. 568 allTrue := true 569 for i, buildNumber := range sortedBuildNumbers { 570 if indexes.(map[int]int)[buildNumber] != i+base { 571 allTrue = false 572 break 573 } 574 } 575 if (sortedBuildNumbers[len(sortedBuildNumbers)-1] == sortedBuildNumbers[0]+count-1) && allTrue { 576 result.Jobs[jobName] = []int{sortedBuildNumbers[0], count, base} 577 for _, n := range sortedBuildNumbers { 578 if !(n <= sortedBuildNumbers[0]+len(sortedBuildNumbers)) { 579 klog.Fatal(jobName, n, result.Jobs[jobName], len(sortedBuildNumbers), sortedBuildNumbers) 580 } 581 } 582 } 583 } 584 return result 585 }