github.com/GoogleCloudPlatform/testgrid@v0.0.174/pkg/summarizer/flakiness.go (about)

     1  /*
     2  Copyright 2020 The TestGrid Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package summarizer
    18  
    19  import (
    20  	"regexp"
    21  
    22  	"github.com/GoogleCloudPlatform/testgrid/internal/result"
    23  	statepb "github.com/GoogleCloudPlatform/testgrid/pb/state"
    24  	summarypb "github.com/GoogleCloudPlatform/testgrid/pb/summary"
    25  	statuspb "github.com/GoogleCloudPlatform/testgrid/pb/test_status"
    26  	"github.com/GoogleCloudPlatform/testgrid/pkg/summarizer/analyzers"
    27  	"github.com/GoogleCloudPlatform/testgrid/pkg/summarizer/common"
    28  )
    29  
    30  const (
    31  	minRuns = 0
    32  	// DefaultInterval is the default number of days of analysis
    33  	DefaultInterval = 7
    34  )
    35  
    36  var (
    37  	infraRegex      = regexp.MustCompile(`^\w+$`)
    38  	testMethodRegex = regexp.MustCompile(`@TESTGRID@`)
    39  )
    40  
    41  type flakinessAnalyzer interface {
    42  	GetFlakiness(gridMetrics []*common.GridMetrics, relevantFilteredStatus map[string][]statuspb.TestStatus, minRuns int, startDate int, endDate int, tab string) *summarypb.HealthinessInfo
    43  }
    44  
    45  // CalculateHealthiness extracts the test run data from each row (which represents a test)
    46  // of the Grid and then analyzes it with an implementation of flakinessAnalyzer, which has
    47  // implementations in the subdir naive and can be injected as needed.
    48  func CalculateHealthiness(grid *statepb.Grid, startTime int, endTime int, tab string) *summarypb.HealthinessInfo {
    49  	gridMetrics, relevantFilteredStatus := parseGrid(grid, startTime, endTime)
    50  	analyzer := analyzers.FlipAnalyzer{
    51  		RelevantStatus: relevantFilteredStatus,
    52  	}
    53  	return analyzer.GetFlakiness(gridMetrics, minRuns, startTime, endTime, tab)
    54  }
    55  
    56  // CalculateTrend populates the ChangeFromLastInterval fields of each TestInfo by comparing
    57  // the current flakiness to the flakiness calculated for the last interval. Interval length
    58  // is a config value that is 7 days by default. The Trend enum defaults to UNKNOWN, so there
    59  // is no need to explicitly assign UNKNOWN when a test appears in currentHealthiness but not
    60  // in previousHealthiness.
    61  func CalculateTrend(currentHealthiness, previousHealthiness *summarypb.HealthinessInfo) {
    62  	previousFlakiness := map[string]float32{}
    63  	// Create a map for faster lookup and avoiding repeated iteration through Tests
    64  	for _, test := range previousHealthiness.Tests {
    65  		previousFlakiness[test.DisplayName] = test.Flakiness
    66  	}
    67  
    68  	for i, test := range currentHealthiness.Tests {
    69  		if value, ok := previousFlakiness[test.DisplayName]; ok {
    70  			currentHealthiness.Tests[i].ChangeFromLastInterval = getTrend(test.Flakiness, value)
    71  			currentHealthiness.Tests[i].PreviousFlakiness = []float32{value}
    72  		}
    73  	}
    74  }
    75  
    76  func getTrend(currentFlakiness, previousFlakiness float32) summarypb.TestInfo_Trend {
    77  	if currentFlakiness < previousFlakiness {
    78  		return summarypb.TestInfo_DOWN
    79  	}
    80  	if currentFlakiness > previousFlakiness {
    81  		return summarypb.TestInfo_UP
    82  	}
    83  	return summarypb.TestInfo_NO_CHANGE
    84  }
    85  
    86  func parseGrid(grid *statepb.Grid, startTime int, endTime int) ([]*common.GridMetrics, map[string][]analyzers.StatusCategory) {
    87  	// Get the relevant data for flakiness from each Grid (which represents
    88  	// a dashboard tab) as a list of GridMetrics structs
    89  
    90  	// TODO (itsazhuhere@): consider refactoring/using summary.go's gridMetrics function
    91  	// as it does very similar data collection.
    92  
    93  	// Multiply by 1000 because currently Column.Started is in milliseconds; this is used
    94  	// for comparisons later. startTime and endTime will be used in a Timestamp later that
    95  	// requires seconds, so we would like to impact that at little as possible.
    96  	startTime *= 1000
    97  	endTime *= 1000
    98  
    99  	// We create maps because result.Map returns a map where we can access each result
   100  	// through the test name, and at each instance we can increment our types.Result
   101  	// using the same key. At the end we can filter out those types.Result that had
   102  	// 0 of all counts.
   103  	gridMetricsMap := make(map[string]*common.GridMetrics, 0)
   104  	gridRows := make(map[string]*statepb.Row)
   105  
   106  	// For each filtered test, status of non-infra-failure tests
   107  	rowStatuses := make(map[string][]analyzers.StatusCategory)
   108  
   109  	for i, row := range grid.Rows {
   110  		gridRows[row.Name] = grid.Rows[i]
   111  		gridMetricsMap[row.Name] = common.NewGridMetrics(row.Name)
   112  		rowStatuses[row.Name] = []analyzers.StatusCategory{}
   113  	}
   114  
   115  	// result.Map is written in a way that assumes each test/row name is unique
   116  	rowResults := result.Map(grid.Rows)
   117  	failingColumns := failingColumns(len(grid.Columns), grid.Rows)
   118  
   119  	for key, f := range rowResults {
   120  		if !isValidTestName(key) {
   121  			continue
   122  		}
   123  		rowToMessageIndex := 0
   124  		i := -1
   125  		for {
   126  			nextRowResult, more := f()
   127  			if !more {
   128  				break
   129  			}
   130  			i++
   131  			if i >= len(grid.Columns) {
   132  				break
   133  			}
   134  			rowResult := result.Coalesce(nextRowResult, result.ShowRunning)
   135  
   136  			// We still need to increment rowToMessageIndex even if we want to skip counting
   137  			// this column.
   138  			if !isWithinTimeFrame(grid.Columns[i], startTime, endTime) {
   139  				switch rowResult {
   140  				case statuspb.TestStatus_NO_RESULT:
   141  					// Ignore NO_RESULT (e.g. blank cell)
   142  				default:
   143  					rowToMessageIndex++
   144  				}
   145  				continue
   146  			}
   147  			switch rowResult {
   148  			case statuspb.TestStatus_NO_RESULT:
   149  				continue
   150  			case statuspb.TestStatus_FAIL:
   151  				message := gridRows[key].Messages[rowToMessageIndex]
   152  				if isInfraFailure(message) {
   153  					gridMetricsMap[key].FailedInfraCount++
   154  					gridMetricsMap[key].InfraFailures[message]++
   155  				} else {
   156  					gridMetricsMap[key].Failed++
   157  					if !failingColumns[i] {
   158  						rowStatuses[key] = append(rowStatuses[key], analyzers.StatusFail)
   159  					}
   160  				}
   161  			case statuspb.TestStatus_PASS:
   162  				gridMetricsMap[key].Passed++
   163  				rowStatuses[key] = append(rowStatuses[key], analyzers.StatusPass)
   164  			case statuspb.TestStatus_FLAKY:
   165  				rowStatuses[key] = append(rowStatuses[key], analyzers.StatusFlaky)
   166  				getValueOfFlakyMetric(gridMetricsMap[key])
   167  			}
   168  			rowToMessageIndex++
   169  		}
   170  	}
   171  	gridMetrics := make([]*common.GridMetrics, 0)
   172  	for _, metric := range gridMetricsMap {
   173  		if metric.Failed > 0 || metric.Passed > 0 || metric.FlakyCount > 0 {
   174  			gridMetrics = append(gridMetrics, metric)
   175  		}
   176  	}
   177  	return gridMetrics, rowStatuses
   178  }
   179  
   180  // failingColumns iterates over the grid in column-major order
   181  // and returns a slice of bool indicating whether a column is 100% failing.
   182  func failingColumns(numColumns int, rows []*statepb.Row) []bool {
   183  	// Convert to map of iterators to handle run-length encoding.
   184  	rowResults := result.Map(rows)
   185  	out := make([]bool, numColumns)
   186  	if len(rows) <= 1 {
   187  		// If we only have one test, don't do this metric.
   188  		return out
   189  	}
   190  	for i := 0; i < numColumns; i++ {
   191  		out[i] = true
   192  		for _, row := range rowResults {
   193  			rr, more := row()
   194  			if !more {
   195  				continue
   196  			}
   197  			crr := result.Coalesce(rr, true)
   198  			if crr == statuspb.TestStatus_PASS || crr == statuspb.TestStatus_FLAKY {
   199  				out[i] = false
   200  			}
   201  		}
   202  	}
   203  	return out
   204  }
   205  
   206  func isInfraFailure(message string) bool {
   207  	return (message != "" && infraRegex.MatchString(message))
   208  }
   209  
   210  func getValueOfFlakyMetric(gridMetrics *common.GridMetrics) {
   211  	// TODO (itszhuhere@): add a way to get exact flakiness from a Row_FLAKY cell
   212  	// For now we will leave it as 50%, because:
   213  	// a) gridMetrics.flakiness and .flakyCount are currently not used by anything
   214  	// and
   215  	// b) there's no easy way to get the exact flakiness measurement from prow or whatever else
   216  	// and potentially
   217  	// c) GKE does not currently enable retry on flakes so it isn't as important right now
   218  	// Keep in mind that flakiness is measured as out of 100, i.e. 23 not .23
   219  	flakiness := 50.0
   220  	gridMetrics.FlakyCount++
   221  	// Formula for adding one new value to mean is mean + (newValue - mean) / newCount
   222  	gridMetrics.AverageFlakiness += (flakiness - gridMetrics.AverageFlakiness) / float64(gridMetrics.FlakyCount)
   223  }
   224  
   225  func isWithinTimeFrame(column *statepb.Column, startTime, endTime int) bool {
   226  	return column.Started >= float64(startTime) && column.Started <= float64(endTime)
   227  }
   228  
   229  func isValidTestName(name string) bool {
   230  	// isValidTestName filters out test names, currently only tests with
   231  	// @TESTGRID@ which would otherwise make some summaries unnecessarily large
   232  	return !testMethodRegex.MatchString(name)
   233  }