go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/inputbuffer/input_segment.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/inputbuffer/input_segment.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package inputbuffer
    16  
    17  import (
    18  	"time"
    19  
    20  	"google.golang.org/protobuf/types/known/timestamppb"
    21  
    22  	cpb "go.chromium.org/luci/analysis/internal/changepoints/proto"
    23  )
    24  
    25  // Segment is a representation of segments in input buffer.
    26  // It is only use in-memory. It will not be stored in spanner or bigquery.
    27  type Segment struct {
    28  	// Start index in the input buffer history, inclusively.
    29  	// As in the history slice, verdicts are store oldest first, so StartIndex
    30  	// corresponds to the oldest verdict in the segment.
    31  	StartIndex int
    32  	// End index in the input buffer history, inclusively.
    33  	// As in the history slice, verdicts are store oldest first, so EndIndex
    34  	// corresponds to the newest verdict in the segment.
    35  	EndIndex int
    36  	// Counts the statistics of the segment.
    37  	// Note that this includes all verdicts, as opposed to Segment.FinalizedCount
    38  	// which only includes finalized verdicts.
    39  	Counts *cpb.Counts
    40  	// The hour the most recent verdict with an unexpected test result
    41  	// was produced.
    42  	// Note that this includes all verdicts, as opposed to Segment.FinalizedCount
    43  	// which only includes finalized verdicts.
    44  	MostRecentUnexpectedResultHourAllVerdicts *timestamppb.Timestamp
    45  
    46  	// The following fields are copied from the Segment proto.
    47  
    48  	// Whether the segment is the first segment in the input buffer.
    49  	HasStartChangepoint bool
    50  	// The earliest commit position included in the segment.
    51  	StartPosition int64
    52  	// The earliest hour a verdict with the given start_position was recorded.
    53  	StartHour *timestamppb.Timestamp
    54  	// The end commit position of the segment.
    55  	// If set, the invariant end_position >= start_position holds.
    56  	EndPosition int64
    57  	// The latest hour a verdict with the last commit position in the segment
    58  	// was recorded.
    59  	EndHour *timestamppb.Timestamp
    60  	// The lower bound of the change point position at the start of the segment
    61  	// in a 99% two-tailed confidence interval. Inclusive.
    62  	// Only set if has_start_changepoint is set. If set, the invariant
    63  	// previous_segment.start_position <= start_position_lower_bound_99th <= start_position.
    64  	StartPositionLowerBound99Th int64
    65  	// The upper bound of the change point position at the start of the segment
    66  	// in a 99% two-tailed confidence interval. Inclusive.
    67  	// Only set if has_start_changepoint is set. If set, the invariant
    68  	// start_position <= start_position_upper_bound_99th <= end_position
    69  	// holds.
    70  	StartPositionUpperBound99Th int64
    71  }
    72  
    73  func (s *Segment) Length() int {
    74  	return s.EndIndex - s.StartIndex + 1
    75  }
    76  
    77  // EvictedSegment represents a segment or segment part which was evicted
    78  // from the input buffer.
    79  type EvictedSegment struct {
    80  	// The segment (either full or partial) which is being evicted.
    81  	// A segment may be partial for one or both of the following reasons:
    82  	// - The eviction is occuring because of limited input buffer space
    83  	//   (not because of a finalized changepoint), so only a fraction
    84  	//   of the segment needs to be evicted.
    85  	// - Previously, part of the segment was evicted (for the above
    86  	//   reason), so subsequent evictions are necessarily only
    87  	//   in relation to the remaining part of that segment.
    88  	//
    89  	// The consumer generally does not need to be concerned about which
    90  	// of these cases applies, and should always process evicted segments
    91  	// in commit position order, merging them with any previously
    92  	// evicted finalizing segment (if any).
    93  	Segment *cpb.Segment
    94  
    95  	// The verdicts which are being evicted. These correspond to the
    96  	// Segment above. Not in any particular order.
    97  	Verdicts []PositionVerdict
    98  }
    99  
   100  // SegmentedInputBuffer wraps the input buffer and the segments it contains.
   101  type SegmentedInputBuffer struct {
   102  	InputBuffer *Buffer
   103  	// The Segments are disjoint and are sorted by StartIndex ascendingly.
   104  	Segments []*Segment
   105  }
   106  
   107  // ChangePoint records the index position of a change point, together with its
   108  // confidence interval.
   109  type ChangePoint struct {
   110  	// NominalIndex is nominal index of the change point in history.
   111  	NominalIndex int
   112  	// LowerBound99ThIndex and UpperBound99ThIndex are indices (in history) of
   113  	// the 99% confidence interval of the change point.
   114  	LowerBound99ThIndex int
   115  	UpperBound99ThIndex int
   116  }
   117  
   118  // Segmentize generates segments based on the input buffer and
   119  // the change points detected.
   120  // Input buffer verdicts are sorted by commit position (oldest first), then
   121  // by result time (oldest first) and MUST have been returned by a call to
   122  // MergeBuffer(...) immediately prior to this Segmentize call (i.e. without
   123  // mutating the input buffer or the merge buffer.)
   124  // changePoints is the change points for history. It is
   125  // sorted in ascending order (smallest index first).
   126  func (ib *Buffer) Segmentize(history []PositionVerdict, changePoints []ChangePoint) *SegmentedInputBuffer {
   127  	// Exit early if we have empty history.
   128  	if len(history) == 0 {
   129  		return &SegmentedInputBuffer{
   130  			InputBuffer: ib,
   131  			Segments:    []*Segment{},
   132  		}
   133  	}
   134  
   135  	segments := make([]*Segment, len(changePoints)+1)
   136  	// Go from back to front, for easier processing of the confidence interval.
   137  	segmentEndIndex := len(history) - 1
   138  	for i := len(changePoints) - 1; i >= 0; i-- {
   139  		// Add the segment starting from change point.
   140  		changePoint := changePoints[i]
   141  		segmentStartIndex := changePoint.NominalIndex
   142  		sw := inputBufferSegment(segmentStartIndex, segmentEndIndex, history)
   143  		sw.HasStartChangepoint = true
   144  		sw.StartPositionLowerBound99Th = int64(history[changePoint.LowerBound99ThIndex].CommitPosition)
   145  		sw.StartPositionUpperBound99Th = int64(history[changePoint.UpperBound99ThIndex].CommitPosition)
   146  		segments[i+1] = sw
   147  		segmentEndIndex = segmentStartIndex - 1
   148  	}
   149  
   150  	// Add the first segment.
   151  	sw := inputBufferSegment(0, segmentEndIndex, history)
   152  	segments[0] = sw
   153  
   154  	return &SegmentedInputBuffer{
   155  		InputBuffer: ib,
   156  		Segments:    segments,
   157  	}
   158  }
   159  
   160  // inputBufferSegment returns a Segment from startIndex (inclusively) to
   161  // endIndex (inclusively).
   162  func inputBufferSegment(startIndex, endIndex int, history []PositionVerdict) *Segment {
   163  	if startIndex > endIndex {
   164  		panic("invalid segment index: startIndex > endIndex")
   165  	}
   166  	return &Segment{
   167  		StartIndex:    startIndex,
   168  		EndIndex:      endIndex,
   169  		StartPosition: int64(history[startIndex].CommitPosition),
   170  		EndPosition:   int64(history[endIndex].CommitPosition),
   171  		StartHour:     timestamppb.New(history[startIndex].Hour),
   172  		EndHour:       timestamppb.New(history[endIndex].Hour),
   173  		Counts:        segmentCounts(history[startIndex : endIndex+1]),
   174  		MostRecentUnexpectedResultHourAllVerdicts: mostRecentUnexpectedResultHour(history[startIndex : endIndex+1]),
   175  	}
   176  }
   177  
   178  // EvictSegments evicts segments from the segmented input buffer.
   179  //
   180  // Returned EvictedSegments are sorted from the oldest commit position
   181  // to the newest.
   182  //
   183  // A segment will be evicted if:
   184  //  1. The changepoint that ends the segment has been finalized,
   185  //     because half of the input buffer is newer than the ending commit
   186  //     position). In this case, the entire remainder of the segment will
   187  //     be evicted.
   188  //  2. There is storage pressure in the input buffer (it is at risk of
   189  //     containing too many verdicts). In this case, a segment will be
   190  //     partially evicted, and that segment will be 'finalizing'.
   191  //
   192  // Note that if the last segment evicted is a finalized segment, this function
   193  // will add an extra finalizing segment to the end of evicted segments. This is
   194  // to keep track of the confidence interval of the starting commit position of
   195  // the segment after the finalized segment. It is needed because after a
   196  // finalized segment is evicted, its verdicts disappear from the input buffer
   197  // and we can no longer calculate the confidence interval of the start of the
   198  // next segment.
   199  //
   200  // As a result, the result of this function will contain all finalized segments,
   201  // except for the last segment (if any), which is finalizing.
   202  //
   203  // The segments remaining after eviction will be in sib.Segments.
   204  func (sib *SegmentedInputBuffer) EvictSegments() []EvictedSegment {
   205  	evictedSegments := []EvictedSegment{}
   206  	remainingSegments := []*Segment{}
   207  
   208  	// Evict finalized segments.
   209  	segmentIndex := 0
   210  	for ; segmentIndex < len(sib.Segments); segmentIndex++ {
   211  		inSeg := sib.Segments[segmentIndex]
   212  		// Update the start and end index of inSeg.
   213  		// Note that after eviction of previous finalized segments, inSeg is the
   214  		// first remaining segment of the input buffer.
   215  		inSeg.EndIndex -= inSeg.StartIndex
   216  		inSeg.StartIndex = 0
   217  		if !sib.InputBuffer.isSegmentFinalized(inSeg) {
   218  			break
   219  		}
   220  		seg := sib.InputBuffer.evictFinalizedSegment(inSeg)
   221  		evictedSegments = append(evictedSegments, seg)
   222  	}
   223  
   224  	// If the buffer is full, evict part of it to the finalizing segment.
   225  	shouldEvict, endPos := sib.InputBuffer.EvictionRange()
   226  	remainingLength := 0
   227  	if shouldEvict {
   228  		inSeg := sib.Segments[segmentIndex]
   229  		evicted, remaining := sib.InputBuffer.evictFinalizingSegment(endPos, inSeg)
   230  		evictedSegments = append(evictedSegments, evicted)
   231  		remainingSegments = append(remainingSegments, remaining)
   232  		remainingLength = remaining.Length()
   233  		segmentIndex++
   234  	}
   235  
   236  	// The remaining segments are active segments.
   237  	offset := 0
   238  	if segmentIndex < len(sib.Segments) {
   239  		offset = sib.Segments[segmentIndex].StartIndex - remainingLength
   240  	}
   241  	for ; segmentIndex < len(sib.Segments); segmentIndex++ {
   242  		inSeg := sib.Segments[segmentIndex]
   243  		// Offset the indices of the segment due to previously evicted segments.
   244  		inSeg.StartIndex -= offset
   245  		inSeg.EndIndex -= offset
   246  		remainingSegments = append(remainingSegments, inSeg)
   247  	}
   248  
   249  	sib.Segments = remainingSegments
   250  
   251  	// If the last segment is finalized, we also add a finalizing segment
   252  	// to the end of the evicted segments, to record the start position
   253  	// (and confidence interval) of the following segment.
   254  	l := len(evictedSegments)
   255  	if l > 0 && evictedSegments[l-1].Segment.State == cpb.SegmentState_FINALIZED {
   256  		firstRemainingSeg := remainingSegments[0]
   257  		evictedSegments = append(evictedSegments, EvictedSegment{
   258  			Segment: &cpb.Segment{
   259  				State:                        cpb.SegmentState_FINALIZING,
   260  				HasStartChangepoint:          true,
   261  				StartPosition:                firstRemainingSeg.StartPosition,
   262  				StartHour:                    firstRemainingSeg.StartHour,
   263  				StartPositionLowerBound_99Th: firstRemainingSeg.StartPositionLowerBound99Th,
   264  				StartPositionUpperBound_99Th: firstRemainingSeg.StartPositionUpperBound99Th,
   265  				FinalizedCounts:              &cpb.Counts{},
   266  			},
   267  			Verdicts: []PositionVerdict{},
   268  		})
   269  	}
   270  	return evictedSegments
   271  }
   272  
   273  // isSegmentFinalized returns true if the segment is finalized, i.e.
   274  // the ending commit position of the segment is in the oldest half of the
   275  // buffer.
   276  // It means not much refinement can be made to the segment.
   277  func (ib *Buffer) isSegmentFinalized(seg *Segment) bool {
   278  	capacity := ib.HotBufferCapacity + ib.ColdBufferCapacity
   279  	// The number of verdicts which have commit positions newer than the segment.
   280  	// Note that verdicts are stored in the input buffer from oldest to newest,
   281  	// so those after seg.EndIndex are newer than the segment.
   282  	verdictsNewerThanSegment := (ib.Size() - seg.EndIndex)
   283  	return verdictsNewerThanSegment >= (capacity / 2)
   284  }
   285  
   286  // evictFinalizedSegment removes all verdicts of segment from input buffer.
   287  // This has an assumption that the segment verdicts are at the beginning
   288  // of the hot and cold buffers.
   289  // Returns a segment containing the information about the verdicts being evicted.
   290  func (ib *Buffer) evictFinalizedSegment(seg *Segment) EvictedSegment {
   291  	// Evict hot buffer.
   292  	evictEndIndex := -1
   293  	for i, v := range ib.HotBuffer.Verdicts {
   294  		if v.CommitPosition <= int(seg.EndPosition) {
   295  			evictEndIndex = i
   296  		} else {
   297  			break
   298  		}
   299  	}
   300  	var evictedVerdicts []PositionVerdict
   301  	// EvictBefore(...) will modify the Verdicts in-place, we should
   302  	// copy verdicts to a new slice to avoid them being overwritten.
   303  	evictedVerdicts = append(evictedVerdicts, ib.HotBuffer.Verdicts[:evictEndIndex+1]...)
   304  
   305  	ib.HotBuffer.EvictBefore(evictEndIndex + 1)
   306  
   307  	// Evict cold buffer.
   308  	evictEndIndex = -1
   309  	for i, v := range ib.ColdBuffer.Verdicts {
   310  		if v.CommitPosition <= int(seg.EndPosition) {
   311  			evictEndIndex = i
   312  		} else {
   313  			break
   314  		}
   315  	}
   316  	if evictEndIndex > -1 {
   317  		ib.IsColdBufferDirty = true
   318  		// EvictBefore(...) will modify the Verdicts in-place, we should
   319  		// copy verdicts to a new slice to avoid them being overwritten.
   320  		evictedVerdicts = append(evictedVerdicts, ib.ColdBuffer.Verdicts[:evictEndIndex+1]...)
   321  		ib.ColdBuffer.EvictBefore(evictEndIndex + 1)
   322  	}
   323  
   324  	// Return evicted segment.
   325  	segment := &cpb.Segment{
   326  		State:                          cpb.SegmentState_FINALIZED,
   327  		FinalizedCounts:                seg.Counts,
   328  		HasStartChangepoint:            seg.HasStartChangepoint,
   329  		StartPosition:                  seg.StartPosition,
   330  		StartHour:                      seg.StartHour,
   331  		EndPosition:                    seg.EndPosition,
   332  		EndHour:                        seg.EndHour,
   333  		StartPositionLowerBound_99Th:   seg.StartPositionLowerBound99Th,
   334  		StartPositionUpperBound_99Th:   seg.StartPositionUpperBound99Th,
   335  		MostRecentUnexpectedResultHour: seg.MostRecentUnexpectedResultHourAllVerdicts,
   336  	}
   337  	return EvictedSegment{
   338  		Segment:  segment,
   339  		Verdicts: evictedVerdicts,
   340  	}
   341  }
   342  
   343  // evictFinalizingSegment evicts part of the finalizing segment when
   344  // there is space pressure in the input buffer.
   345  // Note that space pressure is defined by the cold buffer meeting
   346  // capacity and can only occur after a compaction from the hot buffer
   347  // to the cold buffer (i.e. the hot buffer is empty and the cold buffer
   348  // overflows).
   349  // Returns evicted and remaining segments.
   350  func (ib *Buffer) evictFinalizingSegment(endPos int, seg *Segment) (evicted EvictedSegment, remaining *Segment) {
   351  	if len(ib.HotBuffer.Verdicts) > 0 {
   352  		// This indicates a logic error.
   353  		panic("hot buffer is not empty during eviction")
   354  	}
   355  
   356  	remainingCount := segmentCounts(ib.ColdBuffer.Verdicts[endPos+1 : seg.EndIndex+1])
   357  	evictedMostRecentHour := mostRecentUnexpectedResultHour(ib.ColdBuffer.Verdicts[:endPos+1])
   358  	remainingMostRecentHour := mostRecentUnexpectedResultHour(ib.ColdBuffer.Verdicts[endPos+1 : seg.EndIndex+1])
   359  
   360  	// EvictBefore(...) will modify the Verdicts in-place, we should
   361  	// copy verdicts to a new slice to avoid them being overwritten.
   362  	evictedVerdicts := append([]PositionVerdict(nil), ib.ColdBuffer.Verdicts[:endPos+1]...)
   363  	evictedCount := segmentCounts(evictedVerdicts)
   364  	ib.ColdBuffer.EvictBefore(endPos + 1)
   365  	ib.IsColdBufferDirty = true
   366  	// Evicted segment.
   367  	evicted = EvictedSegment{
   368  		Segment: &cpb.Segment{
   369  			State:                          cpb.SegmentState_FINALIZING,
   370  			FinalizedCounts:                evictedCount,
   371  			HasStartChangepoint:            seg.HasStartChangepoint,
   372  			StartPosition:                  seg.StartPosition,
   373  			StartHour:                      seg.StartHour,
   374  			StartPositionLowerBound_99Th:   seg.StartPositionLowerBound99Th,
   375  			StartPositionUpperBound_99Th:   seg.StartPositionUpperBound99Th,
   376  			MostRecentUnexpectedResultHour: evictedMostRecentHour,
   377  		},
   378  		Verdicts: evictedVerdicts,
   379  	}
   380  
   381  	// Remaining segment.
   382  	remaining = &Segment{
   383  		StartIndex:  0,
   384  		EndIndex:    seg.EndIndex - endPos - 1,
   385  		Counts:      remainingCount,
   386  		EndPosition: seg.EndPosition,
   387  		EndHour:     seg.EndHour,
   388  		MostRecentUnexpectedResultHourAllVerdicts: remainingMostRecentHour,
   389  	}
   390  
   391  	return evicted, remaining
   392  }
   393  
   394  // segmentCount counts the statistics of history.
   395  func segmentCounts(history []PositionVerdict) *cpb.Counts {
   396  	counts := &cpb.Counts{}
   397  	for _, verdict := range history {
   398  		counts.TotalVerdicts++
   399  		if verdict.IsSimpleExpectedPass {
   400  			counts.TotalRuns++
   401  			counts.TotalResults++
   402  			counts.ExpectedPassedResults++
   403  		} else {
   404  			verdictHasExpectedResults := false
   405  			verdictHasUnexpectedResults := false
   406  			for _, run := range verdict.Details.Runs {
   407  				// Verdict-level statistics.
   408  				verdictHasExpectedResults = verdictHasExpectedResults || (run.Expected.Count() > 0)
   409  				verdictHasUnexpectedResults = verdictHasUnexpectedResults || (run.Unexpected.Count() > 0)
   410  
   411  				if run.IsDuplicate {
   412  					continue
   413  				}
   414  				// Result-level statistics (ignores duplicate runs).
   415  				counts.TotalResults += int64(run.Expected.Count() + run.Unexpected.Count())
   416  				counts.UnexpectedResults += int64(run.Unexpected.Count())
   417  				counts.ExpectedPassedResults += int64(run.Expected.PassCount)
   418  				counts.ExpectedFailedResults += int64(run.Expected.FailCount)
   419  				counts.ExpectedCrashedResults += int64(run.Expected.CrashCount)
   420  				counts.ExpectedAbortedResults += int64(run.Expected.AbortCount)
   421  				counts.UnexpectedPassedResults += int64(run.Unexpected.PassCount)
   422  				counts.UnexpectedFailedResults += int64(run.Unexpected.FailCount)
   423  				counts.UnexpectedCrashedResults += int64(run.Unexpected.CrashCount)
   424  				counts.UnexpectedAbortedResults += int64(run.Unexpected.AbortCount)
   425  
   426  				// Run-level statistics (ignores duplicate runs).
   427  				counts.TotalRuns++
   428  				// flaky run.
   429  				isFlakyRun := run.Expected.Count() > 0 && run.Unexpected.Count() > 0
   430  				if isFlakyRun {
   431  					counts.FlakyRuns++
   432  				}
   433  				// unexpected unretried run.
   434  				isUnexpectedUnretried := run.Unexpected.Count() == 1 && run.Expected.Count() == 0
   435  				if isUnexpectedUnretried {
   436  					counts.UnexpectedUnretriedRuns++
   437  				}
   438  				// unexpected after retries run.
   439  				isUnexpectedAfterRetries := run.Unexpected.Count() > 1 && run.Expected.Count() == 0
   440  				if isUnexpectedAfterRetries {
   441  					counts.UnexpectedAfterRetryRuns++
   442  				}
   443  			}
   444  			if verdictHasUnexpectedResults && !verdictHasExpectedResults {
   445  				counts.UnexpectedVerdicts++
   446  			}
   447  			if verdictHasUnexpectedResults && verdictHasExpectedResults {
   448  				counts.FlakyVerdicts++
   449  			}
   450  		}
   451  	}
   452  	return counts
   453  }
   454  
   455  // mostRecentUnexpectedResultHour return the hours for the most recent
   456  // verdict that contains unexpected result.
   457  func mostRecentUnexpectedResultHour(history []PositionVerdict) *timestamppb.Timestamp {
   458  	latest := time.Unix(0, 0)
   459  	found := false
   460  	// history is sorted by commit position, not hour, so we need to do a loop.
   461  	for _, verdict := range history {
   462  		for _, run := range verdict.Details.Runs {
   463  			if run.IsDuplicate {
   464  				continue
   465  			}
   466  			if run.Unexpected.Count() > 0 {
   467  				if verdict.Hour.Unix() > latest.Unix() {
   468  					latest = verdict.Hour
   469  					found = true
   470  				}
   471  				break
   472  			}
   473  		}
   474  	}
   475  	if !found {
   476  		return nil
   477  	}
   478  	return timestamppb.New(latest)
   479  }