k8s.io/test-infra/triage@v0.0.0-20240520184403-27c6b4c223d8/summarize/output.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  /*
    18  Contains functions that prepare data for output.
    19  */
    20  
    21  package summarize
    22  
    23  import (
    24  	"fmt"
    25  	"regexp"
    26  	"sort"
    27  	"strconv"
    28  	"strings"
    29  
    30  	"k8s.io/apimachinery/pkg/util/sets"
    31  	"k8s.io/klog/v2"
    32  	"k8s.io/test-infra/triage/utils"
    33  )
    34  
    35  // jsonOutput represents the output as it will be written to the JSON.
    36  type jsonOutput struct {
    37  	Clustered []jsonCluster `json:"clustered"`
    38  	Builds    columns       `json:"builds"`
    39  }
    40  
    41  // render accepts a map from build paths to builds, and the global clusters, and renders them in a
    42  // format consumable by the web page.
    43  func render(builds map[string]build, clustered nestedFailuresGroups, maxFailureTextLength int) jsonOutput {
    44  	clusteredSorted := clustered.sortByMostAggregatedFailures()
    45  
    46  	flattenedClusters := make([]flattenedGlobalCluster, len(clusteredSorted))
    47  
    48  	for i, pair := range clusteredSorted {
    49  		k := pair.Key
    50  		clusters := pair.Group
    51  
    52  		flattenedClusters[i] = flattenedGlobalCluster{
    53  			k,
    54  			makeNgramCountsDigest(k),
    55  			clusters.sortByMostFailures(),
    56  		}
    57  	}
    58  
    59  	return jsonOutput{
    60  		clustersToDisplay(flattenedClusters, builds, maxFailureTextLength),
    61  		buildsToColumns(builds),
    62  	}
    63  }
    64  
    65  // sigLabelRE matches '[sig-x]', so long as x does not contain a closing bracket.
    66  var sigLabelRE = regexp.MustCompile(`\[sig-([^]]*)\]`)
    67  
    68  /*
    69  annotateOwners assigns ownership to a cluster based on the share of hits in the last day. It modifies
    70  the data parameter in place.
    71  
    72  owners maps SIG names to collections of SIG-specific prefixes.
    73  */
    74  func annotateOwners(data *jsonOutput, builds map[string]build, owners map[string][]string) error {
    75  	var ownerRE *regexp.Regexp = nil
    76  	if owners != nil {
    77  		// Dynamically create a regular expression based on the value of owners.
    78  		/*
    79  			namedOwnerREs is a collection of regular expressions of the form
    80  				(?P<signame>prefixA|prefixB|prefixC)
    81  			where signame is the name of a SIG (such as 'sig-testing') with '-' replaced with '_' for
    82  			compatibility with regex capture group name rules. There can be any number of prefixes
    83  			following the capture group name.
    84  		*/
    85  		namedOwnerREs := make([]string, 0, len(owners))
    86  		for sig, prefixes := range owners {
    87  			// prefixREs is a collection of non-empty prefixes with any special regex characters quoted
    88  			prefixREs := make([]string, 0, len(prefixes))
    89  			for _, prefix := range prefixes {
    90  				if prefix != "" {
    91  					prefixREs = append(prefixREs, regexp.QuoteMeta(prefix))
    92  				}
    93  			}
    94  
    95  			namedOwnerREs = append(namedOwnerREs,
    96  				fmt.Sprintf("(?P<%s>%s)",
    97  					strings.Replace(sig, "-", "_", -1), // Regex group names can't have '-', we'll substitute back later
    98  					strings.Join(prefixREs, "|")))
    99  		}
   100  
   101  		// ownerRE is the final regex created from the values of namedOwnerREs, placed into a
   102  		// non-capturing group
   103  		var err error
   104  		ownerRE, err = regexp.Compile(fmt.Sprintf(`(?:%s)`, strings.Join(namedOwnerREs, "|")))
   105  		if err != nil {
   106  			return fmt.Errorf("Could not compile ownerRE from provided SIG names and prefixes: %s", err)
   107  		}
   108  	}
   109  
   110  	jobPaths := data.Builds.JobPaths
   111  	yesterday := 0
   112  	if len(data.Builds.Cols.Started) > 0 {
   113  		yesterday = utils.Max(data.Builds.Cols.Started...) - (60 * 60 * 24)
   114  	}
   115  
   116  	// Determine the owner for each cluster
   117  	for i := range data.Clustered {
   118  		cluster := &data.Clustered[i]
   119  		// Maps owner names to hits (I think hits yesterday and hits today, respectively)
   120  		ownerCounts := make(map[string][]int)
   121  
   122  		// For each test, determine the owner with the most hits
   123  		for _, test := range cluster.Tests {
   124  			var owner string
   125  			if submatches := sigLabelRE.FindStringSubmatch(test.Name); submatches != nil {
   126  				owner = submatches[1] // Get the first (and only) submatch of sigLabelRE
   127  			} else if ownerRE != nil {
   128  				normalizedTestName := normalizeName(test.Name)
   129  
   130  				// Determine whether there were any named groups with matches for normalizedTestName,
   131  				// and if so what the first named group with a match is
   132  				namedGroupMatchExists := false
   133  				firstMatchingGroupName := ""
   134  				// Names of the named capturing groups, which are really the names of the owners
   135  				groupNames := ownerRE.SubexpNames()
   136  			outer:
   137  				for _, submatches := range ownerRE.FindAllStringSubmatch(normalizedTestName, -1) {
   138  					for i, submatch := range submatches {
   139  						// If the group is named and there was a match
   140  						if groupNames[i] != "" && submatch != "" {
   141  							namedGroupMatchExists = true
   142  							firstMatchingGroupName = groupNames[i]
   143  							break outer
   144  						}
   145  					}
   146  				}
   147  
   148  				ownerIndex := ownerRE.FindStringIndex(normalizedTestName)
   149  
   150  				if ownerIndex == nil || // If no match was found for the owner, or
   151  					ownerIndex[0] != 0 || // the test name did not begin with the owner name, or
   152  					!namedGroupMatchExists { // there were no named groups that matched
   153  					continue
   154  				}
   155  
   156  				// Get the name of the first named group with a non-empty match, and assign it to owner
   157  				owner = firstMatchingGroupName
   158  			}
   159  
   160  			owner = strings.Replace(owner, "_", "-", -1) // Substitute '_' back to '-'
   161  
   162  			if _, ok := ownerCounts[owner]; !ok {
   163  				ownerCounts[owner] = []int{0, 0}
   164  			}
   165  			counts := ownerCounts[owner]
   166  
   167  			for _, job := range test.Jobs {
   168  				if strings.Contains(job.Name, ":") { // non-standard CI
   169  					continue
   170  				}
   171  
   172  				jobPath := jobPaths[job.Name]
   173  				for _, build := range job.BuildNumbers {
   174  					bucketKey := fmt.Sprintf("%s/%s", jobPath, build)
   175  					if _, ok := builds[bucketKey]; !ok {
   176  						continue
   177  					} else if builds[bucketKey].Started > yesterday {
   178  						counts[0]++
   179  					} else {
   180  						counts[1]++
   181  					}
   182  				}
   183  			}
   184  		}
   185  
   186  		if len(ownerCounts) != 0 {
   187  			// Utility function to find the owner with the most hits yesterday, then most hits today,
   188  			// then first name alphabetically. Returns true if current owner wins, false otherwise.
   189  			newOwnerHasMoreHits := func(topOwner string, topOwnerCounts []int, currentOwner string, currentCounts []int) bool {
   190  				if currentCounts[0] == topOwnerCounts[0] {
   191  					if currentCounts[1] == topOwnerCounts[1] {
   192  						// Which has the earlier name alphabetically
   193  						return currentOwner < topOwner
   194  					}
   195  					return currentCounts[1] > topOwnerCounts[1]
   196  				}
   197  				return currentCounts[0] > topOwnerCounts[0]
   198  			}
   199  
   200  			// Find the owner with the most hits
   201  			var topOwner string
   202  			topCounts := []int{0, 0}
   203  			for owner, counts := range ownerCounts {
   204  				if newOwnerHasMoreHits(topOwner, topCounts, owner, counts) {
   205  					topOwner = owner
   206  					topCounts = counts
   207  				}
   208  			}
   209  			cluster.Owner = topOwner
   210  		} else {
   211  			cluster.Owner = "testing"
   212  		}
   213  	}
   214  	return nil
   215  }
   216  
   217  // renderSlice returns clusters whose owner field is the owner parameter or whose id field has a
   218  // prefix of the prefix parameter, and the columnar form of the jobs belonging to those clusters.
   219  // If parameters prefix and owner are both the empty string, the function will return empty objects.
   220  func renderSlice(data jsonOutput, builds map[string]build, prefix string, owner string) ([]jsonCluster, columns) {
   221  	clustered := make([]jsonCluster, 0)
   222  	// Maps build paths to builds
   223  	buildsOut := make(map[string]build)
   224  	jobs := make(sets.String)
   225  
   226  	// For each cluster whose owner field is the owner parameter, or whose id field has a prefix of
   227  	// the prefix parameter, add its tests' jobs to the jobs set.
   228  	for _, cluster := range data.Clustered {
   229  		if owner != "" && cluster.Owner == owner {
   230  			clustered = append(clustered, cluster)
   231  		} else if prefix != "" && strings.HasPrefix(cluster.ID, prefix) {
   232  			clustered = append(clustered, cluster)
   233  		} else {
   234  			continue
   235  		}
   236  
   237  		for _, tst := range cluster.Tests {
   238  			for _, jb := range tst.Jobs {
   239  				jobs.Insert(jb.Name)
   240  			}
   241  		}
   242  	}
   243  
   244  	// Add builds whose job is in jobs to buildsOut
   245  	for _, bld := range builds {
   246  		if jobs.Has(bld.Job) {
   247  			buildsOut[bld.Path] = bld
   248  		}
   249  	}
   250  
   251  	return clustered, buildsToColumns(buildsOut)
   252  }
   253  
   254  // flattenedGlobalCluster is the key and value of a specific global cluster (as clusterText and
   255  // sortedTests, respectively), plus the result of calling makeNgramCountsDigest on the key.
   256  type flattenedGlobalCluster struct {
   257  	clusterText       string
   258  	ngramCountsDigest string
   259  	sortedTests       []failuresGroupPair
   260  }
   261  
   262  // test represents a test name and a collection of associated jobs.
   263  type test struct {
   264  	Name string `json:"name"`
   265  	Jobs []job  `json:"jobs"`
   266  }
   267  
   268  /*
   269  jsonCluster represents a global cluster as it will be written to the JSON.
   270  
   271  	key:   the cluster text
   272  	id:    the result of calling makeNgramCountsDigest() on key
   273  	text:  a failure text from one of the cluster's failures
   274  	spans: common spans between all of the cluster's failure texts
   275  	tests: the build numbers that belong to the cluster's failures as per testGroupByJob()
   276  	owner: the SIG that owns the cluster, determined by annotateOwners()
   277  */
   278  type jsonCluster struct {
   279  	Key   string `json:"key"`
   280  	ID    string `json:"id"`
   281  	Text  string `json:"text"`
   282  	Spans []int  `json:"spans"`
   283  	Tests []test `json:"tests"`
   284  	Owner string `json:"owner"`
   285  }
   286  
   287  // clustersToDisplay transposes and sorts the flattened output of clusterGlobal.
   288  // builds maps a build path to a build object.
   289  func clustersToDisplay(clustered []flattenedGlobalCluster, builds map[string]build, maxFailureTextLength int) []jsonCluster {
   290  	jsonClusters := make([]jsonCluster, 0, len(clustered))
   291  
   292  	for _, flattened := range clustered {
   293  		key := flattened.clusterText
   294  		keyID := flattened.ngramCountsDigest
   295  		clusters := flattened.sortedTests
   296  
   297  		// Determine the number of failures across all clusters
   298  		numClusterFailures := 0
   299  		for _, cluster := range clusters {
   300  			numClusterFailures += len(cluster.Failures)
   301  		}
   302  
   303  		if numClusterFailures > 1 {
   304  			jCluster := jsonCluster{
   305  				Key:   key,
   306  				ID:    keyID,
   307  				Text:  truncate(clusters[0].Failures[0].FailureText, maxFailureTextLength),
   308  				Tests: make([]test, len(clusters)),
   309  			}
   310  
   311  			// Get all of the failure texts from all clusters
   312  			clusterFailureTexts := make([]string, 0, numClusterFailures)
   313  			for _, cluster := range clusters {
   314  				for _, flr := range cluster.Failures {
   315  					clusterFailureTexts = append(clusterFailureTexts, truncate(flr.FailureText, maxFailureTextLength))
   316  				}
   317  			}
   318  			jCluster.Spans = commonSpans(clusterFailureTexts)
   319  
   320  			// Fill out jCluster.tests
   321  			for i, cluster := range clusters {
   322  				jCluster.Tests[i] = test{
   323  					Name: cluster.Key,
   324  					Jobs: testsGroupByJob(cluster.Failures, builds),
   325  				}
   326  			}
   327  
   328  			jsonClusters = append(jsonClusters, jCluster)
   329  		}
   330  	}
   331  
   332  	return jsonClusters
   333  }
   334  
   335  // job represents a job name and a collection of associated build numbers.
   336  type job struct {
   337  	Name         string   `json:"name"`
   338  	BuildNumbers []string `json:"builds"`
   339  }
   340  
   341  // build represents a specific instance of a build.
   342  type build struct {
   343  	Path        string `json:"path"`
   344  	Started     int    `json:"started"`
   345  	Elapsed     int    `json:"elapsed"`
   346  	TestsRun    int    `json:"tests_run"`
   347  	TestsFailed int    `json:"tests_failed"`
   348  	Result      string `json:"result"`
   349  	Executor    string `json:"executor"`
   350  	Job         string `json:"job"`
   351  	Number      int    `json:"number"`
   352  	PR          string `json:"pr"`
   353  	Key         string `json:"key"` // Often nonexistent
   354  }
   355  
   356  /*
   357  testsGroupByJob takes a group of failures and a map of builds and returns the list of build numbers
   358  that belong to each failure's job.
   359  
   360  builds is a mapping from build paths to build objects.
   361  */
   362  func testsGroupByJob(failures []failure, builds map[string]build) []job {
   363  	// groups maps job names to sets of failures' build numbers (as strings).
   364  	groups := make(map[string]sets.String)
   365  
   366  	// For each failure, grab its build's job name. Map the job name to the failure's build number.
   367  	for _, flr := range failures {
   368  		// Try to grab the build from builds if it exists
   369  		if bld, ok := builds[flr.Build]; ok {
   370  			// If the JSON build's "number" field was not null
   371  			if bld.Number != 0 {
   372  				// Create the set if one doesn't exist for the given job
   373  				if _, ok := groups[bld.Job]; !ok {
   374  					groups[bld.Job] = make(sets.String, 1)
   375  				}
   376  				groups[bld.Job].Insert(strconv.Itoa(bld.Number))
   377  			}
   378  		}
   379  	}
   380  
   381  	// Sort groups in two stages.
   382  	// First, sort each build number set in descending order.
   383  	// Then, sort the jobs by the number of build numbers in each job's build number slice, descending.
   384  
   385  	// First stage
   386  	// sortedBuildNumbers is essentially groups, but with the build numbers sorted.
   387  	sortedBuildNumbers := make(map[string][]string, len(groups))
   388  	// Create the slice to hold the set elements, fill it, and sort it
   389  	for jobName, buildNumberSet := range groups {
   390  		// Initialize the int slice
   391  		sortedBuildNumbers[jobName] = make([]string, len(buildNumberSet))
   392  
   393  		// Fill it
   394  		iter := 0
   395  		for buildNumber := range buildNumberSet {
   396  			sortedBuildNumbers[jobName][iter] = buildNumber
   397  			iter++
   398  		}
   399  
   400  		// Sort it. Use > instead of < in less function to sort descending.
   401  		sort.Slice(sortedBuildNumbers[jobName], func(i, j int) bool { return sortedBuildNumbers[jobName][i] > sortedBuildNumbers[jobName][j] })
   402  	}
   403  
   404  	// Second stage
   405  	sortedGroups := make([]job, 0, len(groups))
   406  
   407  	// Fill sortedGroups
   408  	for newJobName, newBuildNumbers := range sortedBuildNumbers {
   409  		sortedGroups = append(sortedGroups, job{newJobName, newBuildNumbers})
   410  	}
   411  	// Sort it
   412  	sort.Slice(sortedGroups, func(i, j int) bool {
   413  		iGroupLen := len(sortedGroups[i].BuildNumbers)
   414  		jGroupLen := len(sortedGroups[j].BuildNumbers)
   415  
   416  		// If they're the same length, sort by job name alphabetically
   417  		if iGroupLen == jGroupLen {
   418  			return sortedGroups[i].Name < sortedGroups[j].Name
   419  		}
   420  
   421  		// Use > instead of < to sort descending.
   422  		return iGroupLen > jGroupLen
   423  	})
   424  
   425  	return sortedGroups
   426  }
   427  
   428  /*
   429  columnarBuilds represents a collection of build objects where the i-th build's property p can be
   430  found at p[i].
   431  
   432  For example, the 4th (0-indexed) build's start time can be found in started[4], while its elapsed
   433  time can be found in elapsed[4].
   434  */
   435  type columnarBuilds struct {
   436  	Started     []int    `json:"started"`
   437  	TestsFailed []int    `json:"tests_failed"`
   438  	Elapsed     []int    `json:"elapsed"`
   439  	TestsRun    []int    `json:"tests_run"`
   440  	Result      []string `json:"result"`
   441  	Executor    []string `json:"executor"`
   442  	PR          []string `json:"pr"`
   443  }
   444  
   445  // currentIndex returns the index of the next build to be stored (and, by extension, the number of
   446  // builds currently stored).
   447  func (cb *columnarBuilds) currentIndex() int {
   448  	return len(cb.Started)
   449  }
   450  
   451  // insert adds a build into the columnarBuilds object.
   452  func (cb *columnarBuilds) insert(b build) {
   453  	cb.Started = append(cb.Started, b.Started)
   454  	cb.TestsFailed = append(cb.TestsFailed, b.TestsFailed)
   455  	cb.Elapsed = append(cb.Elapsed, b.Elapsed)
   456  	cb.TestsRun = append(cb.TestsRun, b.TestsRun)
   457  	cb.Result = append(cb.Result, b.Result)
   458  	cb.Executor = append(cb.Executor, b.Executor)
   459  	cb.PR = append(cb.PR, b.PR)
   460  }
   461  
   462  // newColumnarBuilds creates a columnarBuilds object with the correct number of columns. The number
   463  // of columns is the same as the number of builds being converted to columnar form.
   464  func newColumnarBuilds(columns int) columnarBuilds {
   465  	// Start the length at 0 because columnarBuilds.currentIndex() relies on the length.
   466  	return columnarBuilds{
   467  		Started:     make([]int, 0, columns),
   468  		TestsFailed: make([]int, 0, columns),
   469  		Elapsed:     make([]int, 0, columns),
   470  		TestsRun:    make([]int, 0, columns),
   471  		Result:      make([]string, 0, columns),
   472  		Executor:    make([]string, 0, columns),
   473  		PR:          make([]string, 0, columns),
   474  	}
   475  }
   476  
   477  /*
   478  jobCollection represents a collection of jobs. It can either be a map[int]int (a mapping from
   479  build numbers to indexes of builds in the columnar representation) or a []int (a condensed form
   480  of the mapping for dense sequential mappings from builds to indexes; see buildsToColumns() comment).
   481  This is necessary because the outputted JSON is unstructured, and has some fields that can be
   482  either a map or a slice.
   483  */
   484  type jobCollection interface{}
   485  
   486  /*
   487  columns represents a collection of builds in columnar form, plus the necessary maps to decode it.
   488  
   489  jobs maps job names to their location in the columnar form.
   490  
   491  cols is the collection of builds in columnar form.
   492  
   493  jobPaths maps a job name to a build path, minus the last path segment.
   494  */
   495  type columns struct {
   496  	Jobs     map[string]jobCollection `json:"jobs"`
   497  	Cols     columnarBuilds           `json:"cols"`
   498  	JobPaths map[string]string        `json:"job_paths"`
   499  }
   500  
   501  // buildsToColumns converts a map (from build paths to builds) into a columnar form. This compresses
   502  // much better with gzip. See columnarBuilds for more information on the columnar form.
   503  func buildsToColumns(builds map[string]build) columns {
   504  	// The function result
   505  	// result.jobs maps job names to either map[int]int or []int. See jobCollection.
   506  	result := columns{make(map[string]jobCollection), newColumnarBuilds(len(builds)), make(map[string]string)}
   507  
   508  	// Sort the builds before making them columnar
   509  	sortedBuilds := make([]build, 0, len(builds))
   510  	// Fill the slice
   511  	for _, bld := range builds {
   512  		sortedBuilds = append(sortedBuilds, bld)
   513  	}
   514  	// Sort the slice
   515  	sort.Slice(sortedBuilds, func(i, j int) bool {
   516  		// Sort by job name, then by build number
   517  		if sortedBuilds[i].Job == sortedBuilds[j].Job {
   518  			return sortedBuilds[i].Number < sortedBuilds[j].Number
   519  		}
   520  		return sortedBuilds[i].Job < sortedBuilds[j].Job
   521  	})
   522  
   523  	// Add the builds to result.cols
   524  	for _, bld := range sortedBuilds {
   525  		// If there was no build number when the build was retrieved from the JSON
   526  		if bld.Number == 0 {
   527  			continue
   528  		}
   529  
   530  		// Get the index within cols's slices of the next inserted build
   531  		index := result.Cols.currentIndex()
   532  
   533  		// Add the build
   534  		result.Cols.insert(bld)
   535  
   536  		// job maps build numbers to their indexes in the columnar representation
   537  		var job map[int]int
   538  		if _, ok := result.Jobs[bld.Job]; !ok {
   539  			result.Jobs[bld.Job] = make(map[int]int)
   540  		}
   541  		// We can safely assert map[int]int here because replacement of maps with slices only
   542  		// happens later
   543  		job = result.Jobs[bld.Job].(map[int]int)
   544  
   545  		// Store the job path
   546  		if len(job) == 0 {
   547  			result.JobPaths[bld.Job] = bld.Path[:strings.LastIndex(bld.Path, "/")]
   548  		}
   549  
   550  		// Store the column number (index) so we know in which column to find which build
   551  		job[bld.Number] = index
   552  	}
   553  
   554  	// Sort build numbers and compress some data
   555  	for jobName, indexes := range result.Jobs {
   556  		// Sort the build numbers
   557  		sortedBuildNumbers := make([]int, 0, len(indexes.(map[int]int)))
   558  		for key := range indexes.(map[int]int) {
   559  			sortedBuildNumbers = append(sortedBuildNumbers, key)
   560  		}
   561  		sort.Ints(sortedBuildNumbers)
   562  
   563  		base := indexes.(map[int]int)[sortedBuildNumbers[0]]
   564  		count := len(sortedBuildNumbers)
   565  
   566  		// Optimization: if we have a dense sequential mapping of builds=>indexes,
   567  		// store only the first build number, the run length, and the first index number.
   568  		allTrue := true
   569  		for i, buildNumber := range sortedBuildNumbers {
   570  			if indexes.(map[int]int)[buildNumber] != i+base {
   571  				allTrue = false
   572  				break
   573  			}
   574  		}
   575  		if (sortedBuildNumbers[len(sortedBuildNumbers)-1] == sortedBuildNumbers[0]+count-1) && allTrue {
   576  			result.Jobs[jobName] = []int{sortedBuildNumbers[0], count, base}
   577  			for _, n := range sortedBuildNumbers {
   578  				if !(n <= sortedBuildNumbers[0]+len(sortedBuildNumbers)) {
   579  					klog.Fatal(jobName, n, result.Jobs[jobName], len(sortedBuildNumbers), sortedBuildNumbers)
   580  				}
   581  			}
   582  		}
   583  	}
   584  	return result
   585  }