go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/testvariantbranch/test_variant_branch.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package testvariantbranch handles test variant branch of change point analysis. 16 package testvariantbranch 17 18 import ( 19 "sort" 20 "time" 21 22 "google.golang.org/protobuf/proto" 23 24 "go.chromium.org/luci/analysis/internal/changepoints/inputbuffer" 25 cpb "go.chromium.org/luci/analysis/internal/changepoints/proto" 26 pb "go.chromium.org/luci/analysis/proto/v1" 27 ) 28 29 const ( 30 // Each test variant branch retains at most 100 finalized segments. 31 maxFinalizedSegmentsToRetain = 100 32 33 // We only retain finalized segments for the last 5 years. 34 // For simplicity, we consider a year has 365 days. 35 // For testibility, we calculate the 5 years from the last ingestion time 36 // of the test variant branch (this means we may over-retain some segments). 37 maxHoursToRetain = 5 * 365 * 24 38 39 // StatisticsRetentionDays is the number of days to keep statistics about 40 // evicted verdicts. See Statistics proto for more. 41 // 42 // This is a minimum period driven by functional and operational requirements, 43 // our deletion logic will tend to keep retain data for longer (but this is 44 // OK as it is not user data). 45 StatisticsRetentionDays = 11 46 ) 47 48 // Entry represents one row in the TestVariantBranch spanner table. 49 // See go/luci-test-variant-analysis-design for details. 50 type Entry struct { 51 // IsNew is a boolean to denote if the TestVariantBranch is new or already 52 // existed in Spanner. 53 // It is used for reducing the number of mutations. For example, the Variant 54 // field is only inserted once. 55 IsNew bool 56 Project string 57 TestID string 58 VariantHash string 59 Variant *pb.Variant 60 RefHash []byte 61 SourceRef *pb.SourceRef 62 InputBuffer *inputbuffer.Buffer 63 // If this is true, it means we should trigger a write of FinalizingSegment 64 // to Spanner. 65 IsFinalizingSegmentDirty bool 66 // The finalizing segment, if any. 67 // The count for the finalizing segment should only include the verdicts 68 // that are not in the input buffer anymore. 69 FinalizingSegment *cpb.Segment 70 // If this is true, it means we should trigger a write of FinalizedSegments 71 // to Spanner. 72 IsFinalizedSegmentsDirty bool 73 // The finalized segments for the test variant branch. 74 FinalizedSegments *cpb.Segments 75 // If true, it means we should trigger a write of Statistics to Spanner. 76 IsStatisticsDirty bool 77 // Statistics about verdicts which have been evicted from the input buffer. 78 Statistics *cpb.Statistics 79 } 80 81 // New creates a new empty test variant branch entry, with a preallocated input buffer. 82 func New() *Entry { 83 tvb := &Entry{} 84 tvb.InputBuffer = inputbuffer.New() 85 return tvb 86 } 87 88 // Clear resets a test variant branch entry to an empty state, similar to 89 // after a call to New(). 90 func (tvb *Entry) Clear() { 91 tvb.IsNew = false 92 tvb.Project = "" 93 tvb.TestID = "" 94 tvb.VariantHash = "" 95 tvb.Variant = nil 96 tvb.RefHash = nil 97 tvb.SourceRef = nil 98 tvb.InputBuffer.Clear() 99 tvb.IsFinalizingSegmentDirty = false 100 tvb.FinalizingSegment = nil 101 tvb.IsFinalizedSegmentsDirty = false 102 tvb.FinalizedSegments = nil 103 tvb.IsStatisticsDirty = false 104 tvb.Statistics = nil 105 } 106 107 // Copy makes a deep copy of a test variant branch entry. 108 func (tvb *Entry) Copy() *Entry { 109 if tvb == nil { 110 return nil 111 } 112 refHashCopy := make([]byte, len(tvb.RefHash)) 113 copy(refHashCopy, tvb.RefHash) 114 115 return &Entry{ 116 IsNew: tvb.IsNew, 117 Project: tvb.Project, 118 TestID: tvb.TestID, 119 VariantHash: tvb.VariantHash, 120 Variant: proto.Clone(tvb.Variant).(*pb.Variant), 121 RefHash: refHashCopy, 122 SourceRef: proto.Clone(tvb.SourceRef).(*pb.SourceRef), 123 InputBuffer: tvb.InputBuffer.Copy(), 124 IsFinalizingSegmentDirty: tvb.IsFinalizingSegmentDirty, 125 FinalizingSegment: proto.Clone(tvb.FinalizingSegment).(*cpb.Segment), 126 IsFinalizedSegmentsDirty: tvb.IsFinalizedSegmentsDirty, 127 FinalizedSegments: proto.Clone(tvb.FinalizedSegments).(*cpb.Segments), 128 IsStatisticsDirty: tvb.IsStatisticsDirty, 129 Statistics: proto.Clone(tvb.Statistics).(*cpb.Statistics), 130 } 131 } 132 133 // InsertToInputBuffer inserts data of a new test variant into the input 134 // buffer. 135 func (tvb *Entry) InsertToInputBuffer(pv inputbuffer.PositionVerdict) { 136 tvb.InputBuffer.InsertVerdict(pv) 137 } 138 139 // InsertFinalizedSegment inserts a segment to the end of finalized segments. 140 func (tvb *Entry) InsertFinalizedSegment(segment *cpb.Segment) { 141 if tvb.FinalizedSegments == nil { 142 tvb.FinalizedSegments = &cpb.Segments{} 143 } 144 // Assert that segment is finalized. 145 if segment.State != cpb.SegmentState_FINALIZED { 146 panic("insert non-finalized segment to FinalizedSegments") 147 } 148 // Assert that inserted segment is later than existing segments. 149 l := len(tvb.FinalizedSegments.Segments) 150 if l > 0 && tvb.FinalizedSegments.Segments[l-1].EndPosition >= segment.StartPosition { 151 panic("insert older segment to FinalizedSegments") 152 } 153 tvb.FinalizedSegments.Segments = append(tvb.FinalizedSegments.Segments, segment) 154 tvb.IsFinalizedSegmentsDirty = true 155 } 156 157 // UpdateOutputBuffer updates the output buffer with the evicted segments from 158 // the input buffer. 159 // evictedSegments should contain only finalized segments, except for the 160 // last segment (if any), which must be a finalizing segment. 161 // evictedSegments is sorted in ascending order of commit position (oldest 162 // segment first). 163 func (tvb *Entry) UpdateOutputBuffer(evictedSegments []inputbuffer.EvictedSegment) { 164 // Nothing to update. 165 if len(evictedSegments) == 0 { 166 return 167 } 168 verifyEvictedSegments(evictedSegments) 169 // If there is a finalizing segment in the output buffer, this finalizing 170 // segment should be "combined" with the first evicted segment. 171 segmentIndex := 0 172 if tvb.FinalizingSegment != nil { 173 segmentIndex = 1 174 combinedSegment := combineSegment(tvb.FinalizingSegment, evictedSegments[0].Segment) 175 tvb.IsFinalizingSegmentDirty = true 176 if combinedSegment.State == cpb.SegmentState_FINALIZING { 177 // Replace the finalizing segment. 178 tvb.FinalizingSegment = combinedSegment 179 } else { // Finalized state. 180 tvb.FinalizingSegment = nil 181 tvb.InsertFinalizedSegment(combinedSegment) 182 } 183 } 184 185 for ; segmentIndex < len(evictedSegments); segmentIndex++ { 186 segment := evictedSegments[segmentIndex] 187 if segment.Segment.State == cpb.SegmentState_FINALIZED { 188 tvb.InsertFinalizedSegment(segment.Segment) 189 } else { // Finalizing segment. 190 tvb.FinalizingSegment = segment.Segment 191 tvb.IsFinalizingSegmentDirty = true 192 } 193 } 194 195 var evictedVerdicts []inputbuffer.PositionVerdict 196 for _, segments := range evictedSegments { 197 evictedVerdicts = append(evictedVerdicts, segments.Verdicts...) 198 } 199 tvb.Statistics = applyStatisticsRetention(insertVerdictsIntoStatistics(tvb.Statistics, evictedVerdicts)) 200 tvb.IsStatisticsDirty = true 201 202 // Assert that finalizing segment is after finalized segments. 203 tvb.verifyOutputBuffer() 204 } 205 206 func verifyEvictedSegments(evictedSegments []inputbuffer.EvictedSegment) { 207 // Verify that evictedSegments contain all FINALIZED segment, except for 208 // the last segment. 209 for i, seg := range evictedSegments { 210 if i != len(evictedSegments)-1 { 211 if seg.Segment.State != cpb.SegmentState_FINALIZED { 212 panic("evictedSegments should contains all finalized segments, except the last one") 213 } 214 } else { 215 if seg.Segment.State != cpb.SegmentState_FINALIZING { 216 panic("last segment of evicted segments should be finalizing") 217 } 218 } 219 } 220 } 221 222 // verifyOutputBuffer verifies that the finalizing segment is older than any 223 // finalized segment. 224 // Panic if it is not the case. 225 func (tvb *Entry) verifyOutputBuffer() { 226 finalizedSegments := tvb.FinalizedSegments.GetSegments() 227 l := len(finalizedSegments) 228 if tvb.FinalizingSegment == nil || l == 0 { 229 return 230 } 231 if finalizedSegments[l-1].EndPosition >= tvb.FinalizingSegment.StartPosition { 232 panic("finalizing segment should be older than finalized segments") 233 } 234 } 235 236 // ApplyRetentionPolicyForFinalizedSegments applies retention policy 237 // to finalized segments. 238 // The following retention policy applies to finalized segments: 239 // - At most 100 finalized segments can be stored. 240 // - Finalized segments are retained for 5 years from when they closed. 241 // 242 // fromTime is the time when the 5 year period is calculated from. 243 // 244 // The retention policy to delete test variant branches without 245 // test results in 90 days will be enforced separately with a cron job. 246 func (tvb *Entry) ApplyRetentionPolicyForFinalizedSegments(fromTime time.Time) { 247 finalizedSegments := tvb.FinalizedSegments.GetSegments() 248 if len(finalizedSegments) == 0 { 249 return 250 } 251 252 // We keep the finalized segments from this index. 253 // Note that finalized segments are ordered by commit position (lowest first) 254 // so theory (although it's rare), a later segment may have 255 // smaller end hour than an earlier segment. Therefore, we may over-retain 256 // some segments. 257 startIndexToKeep := 0 258 if len(finalizedSegments) > maxFinalizedSegmentsToRetain { 259 startIndexToKeep = len(finalizedSegments) - maxFinalizedSegmentsToRetain 260 } 261 for i := startIndexToKeep; i < len(finalizedSegments); i++ { 262 segment := finalizedSegments[i] 263 if segment.EndHour.AsTime().Add(time.Hour * maxHoursToRetain).After(fromTime) { 264 startIndexToKeep = i 265 break 266 } 267 } 268 269 if startIndexToKeep > 0 { 270 tvb.IsFinalizedSegmentsDirty = true 271 tvb.FinalizedSegments.Segments = finalizedSegments[startIndexToKeep:] 272 } 273 } 274 275 // combineSegment combines the finalizing segment from the output buffer with 276 // another partial segment evicted from the input buffer. 277 func combineSegment(finalizingSegment, evictedSegment *cpb.Segment) *cpb.Segment { 278 result := &cpb.Segment{ 279 State: evictedSegment.State, 280 // Use the start position information provided by prior evictions. 281 HasStartChangepoint: finalizingSegment.HasStartChangepoint, 282 StartPosition: finalizingSegment.StartPosition, 283 StartHour: finalizingSegment.StartHour, 284 StartPositionLowerBound_99Th: finalizingSegment.StartPositionLowerBound_99Th, 285 StartPositionUpperBound_99Th: finalizingSegment.StartPositionUpperBound_99Th, 286 // Use end position information provided by later evictions. 287 EndPosition: evictedSegment.EndPosition, 288 EndHour: evictedSegment.EndHour, 289 // Combine counts. 290 FinalizedCounts: AddCounts(finalizingSegment.FinalizedCounts, evictedSegment.FinalizedCounts), 291 } 292 result.MostRecentUnexpectedResultHour = finalizingSegment.MostRecentUnexpectedResultHour 293 if result.MostRecentUnexpectedResultHour.GetSeconds() < evictedSegment.MostRecentUnexpectedResultHour.GetSeconds() { 294 result.MostRecentUnexpectedResultHour = evictedSegment.MostRecentUnexpectedResultHour 295 } 296 return result 297 } 298 299 // AddCounts returns the sum of 2 statistics counts. 300 func AddCounts(count1 *cpb.Counts, count2 *cpb.Counts) *cpb.Counts { 301 return &cpb.Counts{ 302 TotalResults: count1.TotalResults + count2.TotalResults, 303 UnexpectedResults: count1.UnexpectedResults + count2.UnexpectedResults, 304 ExpectedPassedResults: count1.ExpectedPassedResults + count2.ExpectedPassedResults, 305 ExpectedFailedResults: count1.ExpectedFailedResults + count2.ExpectedFailedResults, 306 ExpectedCrashedResults: count1.ExpectedCrashedResults + count2.ExpectedCrashedResults, 307 ExpectedAbortedResults: count1.ExpectedAbortedResults + count2.ExpectedAbortedResults, 308 UnexpectedPassedResults: count1.UnexpectedPassedResults + count2.UnexpectedPassedResults, 309 UnexpectedFailedResults: count1.UnexpectedFailedResults + count2.UnexpectedFailedResults, 310 UnexpectedCrashedResults: count1.UnexpectedCrashedResults + count2.UnexpectedCrashedResults, 311 UnexpectedAbortedResults: count1.UnexpectedAbortedResults + count2.UnexpectedAbortedResults, 312 TotalRuns: count1.TotalRuns + count2.TotalRuns, 313 UnexpectedUnretriedRuns: count1.UnexpectedUnretriedRuns + count2.UnexpectedUnretriedRuns, 314 UnexpectedAfterRetryRuns: count1.UnexpectedAfterRetryRuns + count2.UnexpectedAfterRetryRuns, 315 FlakyRuns: count1.FlakyRuns + count2.FlakyRuns, 316 TotalVerdicts: count1.TotalVerdicts + count2.TotalVerdicts, 317 UnexpectedVerdicts: count1.UnexpectedVerdicts + count2.UnexpectedVerdicts, 318 FlakyVerdicts: count1.FlakyVerdicts + count2.FlakyVerdicts, 319 } 320 } 321 322 // insertVerdictsIntoStatistics updates the given statistics to include 323 // the given evicted verdicts. Retention policies are applied. 324 func insertVerdictsIntoStatistics(stats *cpb.Statistics, verdicts []inputbuffer.PositionVerdict) *cpb.Statistics { 325 bucketByHour := make(map[int64]*cpb.Statistics_HourBucket) 326 for _, bucket := range stats.GetHourlyBuckets() { 327 // Copy hourly bucket to avoid mutating the passed statistics object. 328 bucketByHour[bucket.Hour] = &cpb.Statistics_HourBucket{ 329 Hour: bucket.Hour, 330 UnexpectedVerdicts: bucket.UnexpectedVerdicts, 331 FlakyVerdicts: bucket.FlakyVerdicts, 332 TotalVerdicts: bucket.TotalVerdicts, 333 } 334 } 335 336 for _, v := range verdicts { 337 // Find or create hourly bucket. 338 hour := v.Hour.Unix() / 3600 339 bucket, ok := bucketByHour[hour] 340 if !ok { 341 bucket = &cpb.Statistics_HourBucket{Hour: hour} 342 bucketByHour[hour] = bucket 343 } 344 345 // Add verdict to hourly bucket. 346 bucket.TotalVerdicts++ 347 if !v.IsSimpleExpectedPass { 348 verdictHasExpectedResults := false 349 verdictHasUnexpectedResults := false 350 for _, run := range v.Details.Runs { 351 verdictHasExpectedResults = verdictHasExpectedResults || (run.Expected.Count() > 0) 352 verdictHasUnexpectedResults = verdictHasUnexpectedResults || (run.Unexpected.Count() > 0) 353 } 354 if verdictHasUnexpectedResults && !verdictHasExpectedResults { 355 bucket.UnexpectedVerdicts++ 356 } 357 if verdictHasUnexpectedResults && verdictHasExpectedResults { 358 bucket.FlakyVerdicts++ 359 } 360 } 361 } 362 363 buckets := make([]*cpb.Statistics_HourBucket, 0, len(bucketByHour)) 364 for _, bucket := range bucketByHour { 365 buckets = append(buckets, bucket) 366 } 367 368 // Sort in ascending order (oldest hour first). 369 sort.Slice(buckets, func(i, j int) bool { 370 return buckets[i].Hour < buckets[j].Hour 371 }) 372 373 return &cpb.Statistics{ 374 HourlyBuckets: buckets, 375 } 376 } 377 378 // applyStatisticsRetention applies the retention policies 379 // to statistics data. 380 func applyStatisticsRetention(stats *cpb.Statistics) *cpb.Statistics { 381 buckets := stats.HourlyBuckets 382 383 // Apply data deletion policies. 384 if len(buckets) > 0 { 385 lastHour := buckets[len(buckets)-1].Hour 386 deleteBeforeIndex := -1 387 for i, bucket := range buckets { 388 // Retain buckets which are within the retention interval 389 // of the most recent bucket hour. The most recent bucket 390 // hour will always be less recent than time.Now(), so 391 // this will tend to retain somewhat more data than necessary. 392 // 393 // We use this logic instead of one that depends on time.Now() 394 // as it is simpler from a testability perspective than a 395 // system time-dependant function. 396 if bucket.Hour > lastHour-StatisticsRetentionDays*24 { 397 break 398 } 399 deleteBeforeIndex = i 400 } 401 buckets = buckets[deleteBeforeIndex+1:] 402 } 403 return &cpb.Statistics{HourlyBuckets: buckets} 404 } 405 406 // MergedStatistics returns statistics about the verdicts ingested for 407 // given test variant branch. Statistics comprise data from both the 408 // input buffer and the output buffer. 409 func (tvb *Entry) MergedStatistics() *cpb.Statistics { 410 verdicts := make([]inputbuffer.PositionVerdict, 0, inputbuffer.DefaultColdBufferCapacity+inputbuffer.DefaultHotBufferCapacity) 411 verdicts = append(verdicts, tvb.InputBuffer.ColdBuffer.Verdicts...) 412 verdicts = append(verdicts, tvb.InputBuffer.HotBuffer.Verdicts...) 413 return insertVerdictsIntoStatistics(tvb.Statistics, verdicts) 414 }