go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/inputbuffer/input_segment.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package inputbuffer 16 17 import ( 18 "time" 19 20 "google.golang.org/protobuf/types/known/timestamppb" 21 22 cpb "go.chromium.org/luci/analysis/internal/changepoints/proto" 23 ) 24 25 // Segment is a representation of segments in input buffer. 26 // It is only use in-memory. It will not be stored in spanner or bigquery. 27 type Segment struct { 28 // Start index in the input buffer history, inclusively. 29 // As in the history slice, verdicts are store oldest first, so StartIndex 30 // corresponds to the oldest verdict in the segment. 31 StartIndex int 32 // End index in the input buffer history, inclusively. 33 // As in the history slice, verdicts are store oldest first, so EndIndex 34 // corresponds to the newest verdict in the segment. 35 EndIndex int 36 // Counts the statistics of the segment. 37 // Note that this includes all verdicts, as opposed to Segment.FinalizedCount 38 // which only includes finalized verdicts. 39 Counts *cpb.Counts 40 // The hour the most recent verdict with an unexpected test result 41 // was produced. 42 // Note that this includes all verdicts, as opposed to Segment.FinalizedCount 43 // which only includes finalized verdicts. 44 MostRecentUnexpectedResultHourAllVerdicts *timestamppb.Timestamp 45 46 // The following fields are copied from the Segment proto. 47 48 // Whether the segment is the first segment in the input buffer. 49 HasStartChangepoint bool 50 // The earliest commit position included in the segment. 51 StartPosition int64 52 // The earliest hour a verdict with the given start_position was recorded. 53 StartHour *timestamppb.Timestamp 54 // The end commit position of the segment. 55 // If set, the invariant end_position >= start_position holds. 56 EndPosition int64 57 // The latest hour a verdict with the last commit position in the segment 58 // was recorded. 59 EndHour *timestamppb.Timestamp 60 // The lower bound of the change point position at the start of the segment 61 // in a 99% two-tailed confidence interval. Inclusive. 62 // Only set if has_start_changepoint is set. If set, the invariant 63 // previous_segment.start_position <= start_position_lower_bound_99th <= start_position. 64 StartPositionLowerBound99Th int64 65 // The upper bound of the change point position at the start of the segment 66 // in a 99% two-tailed confidence interval. Inclusive. 67 // Only set if has_start_changepoint is set. If set, the invariant 68 // start_position <= start_position_upper_bound_99th <= end_position 69 // holds. 70 StartPositionUpperBound99Th int64 71 } 72 73 func (s *Segment) Length() int { 74 return s.EndIndex - s.StartIndex + 1 75 } 76 77 // EvictedSegment represents a segment or segment part which was evicted 78 // from the input buffer. 79 type EvictedSegment struct { 80 // The segment (either full or partial) which is being evicted. 81 // A segment may be partial for one or both of the following reasons: 82 // - The eviction is occuring because of limited input buffer space 83 // (not because of a finalized changepoint), so only a fraction 84 // of the segment needs to be evicted. 85 // - Previously, part of the segment was evicted (for the above 86 // reason), so subsequent evictions are necessarily only 87 // in relation to the remaining part of that segment. 88 // 89 // The consumer generally does not need to be concerned about which 90 // of these cases applies, and should always process evicted segments 91 // in commit position order, merging them with any previously 92 // evicted finalizing segment (if any). 93 Segment *cpb.Segment 94 95 // The verdicts which are being evicted. These correspond to the 96 // Segment above. Not in any particular order. 97 Verdicts []PositionVerdict 98 } 99 100 // SegmentedInputBuffer wraps the input buffer and the segments it contains. 101 type SegmentedInputBuffer struct { 102 InputBuffer *Buffer 103 // The Segments are disjoint and are sorted by StartIndex ascendingly. 104 Segments []*Segment 105 } 106 107 // ChangePoint records the index position of a change point, together with its 108 // confidence interval. 109 type ChangePoint struct { 110 // NominalIndex is nominal index of the change point in history. 111 NominalIndex int 112 // LowerBound99ThIndex and UpperBound99ThIndex are indices (in history) of 113 // the 99% confidence interval of the change point. 114 LowerBound99ThIndex int 115 UpperBound99ThIndex int 116 } 117 118 // Segmentize generates segments based on the input buffer and 119 // the change points detected. 120 // Input buffer verdicts are sorted by commit position (oldest first), then 121 // by result time (oldest first) and MUST have been returned by a call to 122 // MergeBuffer(...) immediately prior to this Segmentize call (i.e. without 123 // mutating the input buffer or the merge buffer.) 124 // changePoints is the change points for history. It is 125 // sorted in ascending order (smallest index first). 126 func (ib *Buffer) Segmentize(history []PositionVerdict, changePoints []ChangePoint) *SegmentedInputBuffer { 127 // Exit early if we have empty history. 128 if len(history) == 0 { 129 return &SegmentedInputBuffer{ 130 InputBuffer: ib, 131 Segments: []*Segment{}, 132 } 133 } 134 135 segments := make([]*Segment, len(changePoints)+1) 136 // Go from back to front, for easier processing of the confidence interval. 137 segmentEndIndex := len(history) - 1 138 for i := len(changePoints) - 1; i >= 0; i-- { 139 // Add the segment starting from change point. 140 changePoint := changePoints[i] 141 segmentStartIndex := changePoint.NominalIndex 142 sw := inputBufferSegment(segmentStartIndex, segmentEndIndex, history) 143 sw.HasStartChangepoint = true 144 sw.StartPositionLowerBound99Th = int64(history[changePoint.LowerBound99ThIndex].CommitPosition) 145 sw.StartPositionUpperBound99Th = int64(history[changePoint.UpperBound99ThIndex].CommitPosition) 146 segments[i+1] = sw 147 segmentEndIndex = segmentStartIndex - 1 148 } 149 150 // Add the first segment. 151 sw := inputBufferSegment(0, segmentEndIndex, history) 152 segments[0] = sw 153 154 return &SegmentedInputBuffer{ 155 InputBuffer: ib, 156 Segments: segments, 157 } 158 } 159 160 // inputBufferSegment returns a Segment from startIndex (inclusively) to 161 // endIndex (inclusively). 162 func inputBufferSegment(startIndex, endIndex int, history []PositionVerdict) *Segment { 163 if startIndex > endIndex { 164 panic("invalid segment index: startIndex > endIndex") 165 } 166 return &Segment{ 167 StartIndex: startIndex, 168 EndIndex: endIndex, 169 StartPosition: int64(history[startIndex].CommitPosition), 170 EndPosition: int64(history[endIndex].CommitPosition), 171 StartHour: timestamppb.New(history[startIndex].Hour), 172 EndHour: timestamppb.New(history[endIndex].Hour), 173 Counts: segmentCounts(history[startIndex : endIndex+1]), 174 MostRecentUnexpectedResultHourAllVerdicts: mostRecentUnexpectedResultHour(history[startIndex : endIndex+1]), 175 } 176 } 177 178 // EvictSegments evicts segments from the segmented input buffer. 179 // 180 // Returned EvictedSegments are sorted from the oldest commit position 181 // to the newest. 182 // 183 // A segment will be evicted if: 184 // 1. The changepoint that ends the segment has been finalized, 185 // because half of the input buffer is newer than the ending commit 186 // position). In this case, the entire remainder of the segment will 187 // be evicted. 188 // 2. There is storage pressure in the input buffer (it is at risk of 189 // containing too many verdicts). In this case, a segment will be 190 // partially evicted, and that segment will be 'finalizing'. 191 // 192 // Note that if the last segment evicted is a finalized segment, this function 193 // will add an extra finalizing segment to the end of evicted segments. This is 194 // to keep track of the confidence interval of the starting commit position of 195 // the segment after the finalized segment. It is needed because after a 196 // finalized segment is evicted, its verdicts disappear from the input buffer 197 // and we can no longer calculate the confidence interval of the start of the 198 // next segment. 199 // 200 // As a result, the result of this function will contain all finalized segments, 201 // except for the last segment (if any), which is finalizing. 202 // 203 // The segments remaining after eviction will be in sib.Segments. 204 func (sib *SegmentedInputBuffer) EvictSegments() []EvictedSegment { 205 evictedSegments := []EvictedSegment{} 206 remainingSegments := []*Segment{} 207 208 // Evict finalized segments. 209 segmentIndex := 0 210 for ; segmentIndex < len(sib.Segments); segmentIndex++ { 211 inSeg := sib.Segments[segmentIndex] 212 // Update the start and end index of inSeg. 213 // Note that after eviction of previous finalized segments, inSeg is the 214 // first remaining segment of the input buffer. 215 inSeg.EndIndex -= inSeg.StartIndex 216 inSeg.StartIndex = 0 217 if !sib.InputBuffer.isSegmentFinalized(inSeg) { 218 break 219 } 220 seg := sib.InputBuffer.evictFinalizedSegment(inSeg) 221 evictedSegments = append(evictedSegments, seg) 222 } 223 224 // If the buffer is full, evict part of it to the finalizing segment. 225 shouldEvict, endPos := sib.InputBuffer.EvictionRange() 226 remainingLength := 0 227 if shouldEvict { 228 inSeg := sib.Segments[segmentIndex] 229 evicted, remaining := sib.InputBuffer.evictFinalizingSegment(endPos, inSeg) 230 evictedSegments = append(evictedSegments, evicted) 231 remainingSegments = append(remainingSegments, remaining) 232 remainingLength = remaining.Length() 233 segmentIndex++ 234 } 235 236 // The remaining segments are active segments. 237 offset := 0 238 if segmentIndex < len(sib.Segments) { 239 offset = sib.Segments[segmentIndex].StartIndex - remainingLength 240 } 241 for ; segmentIndex < len(sib.Segments); segmentIndex++ { 242 inSeg := sib.Segments[segmentIndex] 243 // Offset the indices of the segment due to previously evicted segments. 244 inSeg.StartIndex -= offset 245 inSeg.EndIndex -= offset 246 remainingSegments = append(remainingSegments, inSeg) 247 } 248 249 sib.Segments = remainingSegments 250 251 // If the last segment is finalized, we also add a finalizing segment 252 // to the end of the evicted segments, to record the start position 253 // (and confidence interval) of the following segment. 254 l := len(evictedSegments) 255 if l > 0 && evictedSegments[l-1].Segment.State == cpb.SegmentState_FINALIZED { 256 firstRemainingSeg := remainingSegments[0] 257 evictedSegments = append(evictedSegments, EvictedSegment{ 258 Segment: &cpb.Segment{ 259 State: cpb.SegmentState_FINALIZING, 260 HasStartChangepoint: true, 261 StartPosition: firstRemainingSeg.StartPosition, 262 StartHour: firstRemainingSeg.StartHour, 263 StartPositionLowerBound_99Th: firstRemainingSeg.StartPositionLowerBound99Th, 264 StartPositionUpperBound_99Th: firstRemainingSeg.StartPositionUpperBound99Th, 265 FinalizedCounts: &cpb.Counts{}, 266 }, 267 Verdicts: []PositionVerdict{}, 268 }) 269 } 270 return evictedSegments 271 } 272 273 // isSegmentFinalized returns true if the segment is finalized, i.e. 274 // the ending commit position of the segment is in the oldest half of the 275 // buffer. 276 // It means not much refinement can be made to the segment. 277 func (ib *Buffer) isSegmentFinalized(seg *Segment) bool { 278 capacity := ib.HotBufferCapacity + ib.ColdBufferCapacity 279 // The number of verdicts which have commit positions newer than the segment. 280 // Note that verdicts are stored in the input buffer from oldest to newest, 281 // so those after seg.EndIndex are newer than the segment. 282 verdictsNewerThanSegment := (ib.Size() - seg.EndIndex) 283 return verdictsNewerThanSegment >= (capacity / 2) 284 } 285 286 // evictFinalizedSegment removes all verdicts of segment from input buffer. 287 // This has an assumption that the segment verdicts are at the beginning 288 // of the hot and cold buffers. 289 // Returns a segment containing the information about the verdicts being evicted. 290 func (ib *Buffer) evictFinalizedSegment(seg *Segment) EvictedSegment { 291 // Evict hot buffer. 292 evictEndIndex := -1 293 for i, v := range ib.HotBuffer.Verdicts { 294 if v.CommitPosition <= int(seg.EndPosition) { 295 evictEndIndex = i 296 } else { 297 break 298 } 299 } 300 var evictedVerdicts []PositionVerdict 301 // EvictBefore(...) will modify the Verdicts in-place, we should 302 // copy verdicts to a new slice to avoid them being overwritten. 303 evictedVerdicts = append(evictedVerdicts, ib.HotBuffer.Verdicts[:evictEndIndex+1]...) 304 305 ib.HotBuffer.EvictBefore(evictEndIndex + 1) 306 307 // Evict cold buffer. 308 evictEndIndex = -1 309 for i, v := range ib.ColdBuffer.Verdicts { 310 if v.CommitPosition <= int(seg.EndPosition) { 311 evictEndIndex = i 312 } else { 313 break 314 } 315 } 316 if evictEndIndex > -1 { 317 ib.IsColdBufferDirty = true 318 // EvictBefore(...) will modify the Verdicts in-place, we should 319 // copy verdicts to a new slice to avoid them being overwritten. 320 evictedVerdicts = append(evictedVerdicts, ib.ColdBuffer.Verdicts[:evictEndIndex+1]...) 321 ib.ColdBuffer.EvictBefore(evictEndIndex + 1) 322 } 323 324 // Return evicted segment. 325 segment := &cpb.Segment{ 326 State: cpb.SegmentState_FINALIZED, 327 FinalizedCounts: seg.Counts, 328 HasStartChangepoint: seg.HasStartChangepoint, 329 StartPosition: seg.StartPosition, 330 StartHour: seg.StartHour, 331 EndPosition: seg.EndPosition, 332 EndHour: seg.EndHour, 333 StartPositionLowerBound_99Th: seg.StartPositionLowerBound99Th, 334 StartPositionUpperBound_99Th: seg.StartPositionUpperBound99Th, 335 MostRecentUnexpectedResultHour: seg.MostRecentUnexpectedResultHourAllVerdicts, 336 } 337 return EvictedSegment{ 338 Segment: segment, 339 Verdicts: evictedVerdicts, 340 } 341 } 342 343 // evictFinalizingSegment evicts part of the finalizing segment when 344 // there is space pressure in the input buffer. 345 // Note that space pressure is defined by the cold buffer meeting 346 // capacity and can only occur after a compaction from the hot buffer 347 // to the cold buffer (i.e. the hot buffer is empty and the cold buffer 348 // overflows). 349 // Returns evicted and remaining segments. 350 func (ib *Buffer) evictFinalizingSegment(endPos int, seg *Segment) (evicted EvictedSegment, remaining *Segment) { 351 if len(ib.HotBuffer.Verdicts) > 0 { 352 // This indicates a logic error. 353 panic("hot buffer is not empty during eviction") 354 } 355 356 remainingCount := segmentCounts(ib.ColdBuffer.Verdicts[endPos+1 : seg.EndIndex+1]) 357 evictedMostRecentHour := mostRecentUnexpectedResultHour(ib.ColdBuffer.Verdicts[:endPos+1]) 358 remainingMostRecentHour := mostRecentUnexpectedResultHour(ib.ColdBuffer.Verdicts[endPos+1 : seg.EndIndex+1]) 359 360 // EvictBefore(...) will modify the Verdicts in-place, we should 361 // copy verdicts to a new slice to avoid them being overwritten. 362 evictedVerdicts := append([]PositionVerdict(nil), ib.ColdBuffer.Verdicts[:endPos+1]...) 363 evictedCount := segmentCounts(evictedVerdicts) 364 ib.ColdBuffer.EvictBefore(endPos + 1) 365 ib.IsColdBufferDirty = true 366 // Evicted segment. 367 evicted = EvictedSegment{ 368 Segment: &cpb.Segment{ 369 State: cpb.SegmentState_FINALIZING, 370 FinalizedCounts: evictedCount, 371 HasStartChangepoint: seg.HasStartChangepoint, 372 StartPosition: seg.StartPosition, 373 StartHour: seg.StartHour, 374 StartPositionLowerBound_99Th: seg.StartPositionLowerBound99Th, 375 StartPositionUpperBound_99Th: seg.StartPositionUpperBound99Th, 376 MostRecentUnexpectedResultHour: evictedMostRecentHour, 377 }, 378 Verdicts: evictedVerdicts, 379 } 380 381 // Remaining segment. 382 remaining = &Segment{ 383 StartIndex: 0, 384 EndIndex: seg.EndIndex - endPos - 1, 385 Counts: remainingCount, 386 EndPosition: seg.EndPosition, 387 EndHour: seg.EndHour, 388 MostRecentUnexpectedResultHourAllVerdicts: remainingMostRecentHour, 389 } 390 391 return evicted, remaining 392 } 393 394 // segmentCount counts the statistics of history. 395 func segmentCounts(history []PositionVerdict) *cpb.Counts { 396 counts := &cpb.Counts{} 397 for _, verdict := range history { 398 counts.TotalVerdicts++ 399 if verdict.IsSimpleExpectedPass { 400 counts.TotalRuns++ 401 counts.TotalResults++ 402 counts.ExpectedPassedResults++ 403 } else { 404 verdictHasExpectedResults := false 405 verdictHasUnexpectedResults := false 406 for _, run := range verdict.Details.Runs { 407 // Verdict-level statistics. 408 verdictHasExpectedResults = verdictHasExpectedResults || (run.Expected.Count() > 0) 409 verdictHasUnexpectedResults = verdictHasUnexpectedResults || (run.Unexpected.Count() > 0) 410 411 if run.IsDuplicate { 412 continue 413 } 414 // Result-level statistics (ignores duplicate runs). 415 counts.TotalResults += int64(run.Expected.Count() + run.Unexpected.Count()) 416 counts.UnexpectedResults += int64(run.Unexpected.Count()) 417 counts.ExpectedPassedResults += int64(run.Expected.PassCount) 418 counts.ExpectedFailedResults += int64(run.Expected.FailCount) 419 counts.ExpectedCrashedResults += int64(run.Expected.CrashCount) 420 counts.ExpectedAbortedResults += int64(run.Expected.AbortCount) 421 counts.UnexpectedPassedResults += int64(run.Unexpected.PassCount) 422 counts.UnexpectedFailedResults += int64(run.Unexpected.FailCount) 423 counts.UnexpectedCrashedResults += int64(run.Unexpected.CrashCount) 424 counts.UnexpectedAbortedResults += int64(run.Unexpected.AbortCount) 425 426 // Run-level statistics (ignores duplicate runs). 427 counts.TotalRuns++ 428 // flaky run. 429 isFlakyRun := run.Expected.Count() > 0 && run.Unexpected.Count() > 0 430 if isFlakyRun { 431 counts.FlakyRuns++ 432 } 433 // unexpected unretried run. 434 isUnexpectedUnretried := run.Unexpected.Count() == 1 && run.Expected.Count() == 0 435 if isUnexpectedUnretried { 436 counts.UnexpectedUnretriedRuns++ 437 } 438 // unexpected after retries run. 439 isUnexpectedAfterRetries := run.Unexpected.Count() > 1 && run.Expected.Count() == 0 440 if isUnexpectedAfterRetries { 441 counts.UnexpectedAfterRetryRuns++ 442 } 443 } 444 if verdictHasUnexpectedResults && !verdictHasExpectedResults { 445 counts.UnexpectedVerdicts++ 446 } 447 if verdictHasUnexpectedResults && verdictHasExpectedResults { 448 counts.FlakyVerdicts++ 449 } 450 } 451 } 452 return counts 453 } 454 455 // mostRecentUnexpectedResultHour return the hours for the most recent 456 // verdict that contains unexpected result. 457 func mostRecentUnexpectedResultHour(history []PositionVerdict) *timestamppb.Timestamp { 458 latest := time.Unix(0, 0) 459 found := false 460 // history is sorted by commit position, not hour, so we need to do a loop. 461 for _, verdict := range history { 462 for _, run := range verdict.Details.Runs { 463 if run.IsDuplicate { 464 continue 465 } 466 if run.Unexpected.Count() > 0 { 467 if verdict.Hour.Unix() > latest.Unix() { 468 latest = verdict.Hour 469 found = true 470 } 471 break 472 } 473 } 474 } 475 if !found { 476 return nil 477 } 478 return timestamppb.New(latest) 479 }