go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/proto/v1/test_variants.proto (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 syntax = "proto3"; 16 17 package luci.analysis.v1; 18 19 option go_package = "go.chromium.org/luci/analysis/proto/v1;analysispb"; 20 21 import "google/protobuf/timestamp.proto"; 22 import "go.chromium.org/luci/analysis/proto/v1/common.proto"; 23 import "go.chromium.org/luci/analysis/proto/v1/sources.proto"; 24 25 // Provides methods to obtain statistics about test variants. 26 service TestVariants { 27 // Queries the failure rate of specified test variants, returning 28 // signals indicating if the test variant is flaky and/or 29 // deterministically failing. Intended for use by recipes to 30 // inform exoneration decisions. 31 // 32 // This RPC is used to support version one of exoneration. 33 // It will be replaced by QueryStability over time. 34 // 35 // Changes to this RPC should comply with https://google.aip.dev/231. 36 rpc QueryFailureRate(QueryTestVariantFailureRateRequest) 37 returns (QueryTestVariantFailureRateResponse) {}; 38 39 // Queries the stability of specified test variants. 40 // Intended for use by recipes to inform exoneration decisions, 41 // and by UI to show test stability. 42 rpc QueryStability(QueryTestVariantStabilityRequest) 43 returns (QueryTestVariantStabilityResponse) {}; 44 } 45 46 message QueryTestVariantFailureRateRequest { 47 // The LUCI Project for which test variants should be looked up. 48 string project = 1; 49 50 // The list of test variants to retrieve results for. 51 // At most 100 test variants may be specified in one request. 52 // It is an error to request the same test variant twice. 53 repeated TestVariantIdentifier test_variants = 2; 54 } 55 56 // The identity of a test variant. 57 message TestVariantIdentifier { 58 // A unique identifier of the test in a LUCI project. 59 string test_id = 1; 60 61 // Description of one specific way of running the test, 62 // e.g. a specific bucket, builder and a test suite. 63 Variant variant = 2; 64 65 // The variant hash. Alternative to specifying the variant. 66 // Prefer to specify the full variant (if available), as the 67 // variant hashing implementation is an implementation detail 68 // and may change. 69 string variant_hash = 3; 70 } 71 72 message QueryTestVariantFailureRateResponse { 73 // Interval defines the time buckets used for time interval 74 // data. 75 message Interval { 76 // The interval being defined. age=1 is the most recent 77 // interval, age=2 is the interval immediately before that, 78 // and so on. 79 int32 interval_age = 1; 80 81 // The start time of the interval (inclusive). 82 google.protobuf.Timestamp start_time = 2; 83 84 // The end time of the interval (exclusive). 85 google.protobuf.Timestamp end_time = 3; 86 } 87 88 // The time buckets used for time interval data. 89 // 90 // Currently each interval represents 24 weekday hours, including the 91 // weekend contained in that range (if any). This is to compensate 92 // for the typically reduced testing that is seen over weekends. 93 // So interval with age=1 is the last 24 hours of weekday data 94 // before the time the query is made, age=2 is the 24 hours of 95 // weekday data before that, and so on. 96 // In total, there will be 5 intervals, numbered 1 to 5. 97 // 98 // 24 hours of weekday data before X is defined to be 99 // the smallest period ending at X which includes exactly 24 100 // hours of a weekday in UTC. Therefore: 101 // If X is on a weekend (in UTC), the returned data will 102 // cover all of the weekend up to X and all of previous Friday (in UTC). 103 // If X is on a Monday (in UTC), the returned data will cover all 104 // of the weekend, up to a time on Friday that corresponds to 105 // X's time on Monday (e.g. if X is Monday at 8am, the period goes 106 // back to Friday at 8am). 107 // Otherwise, X is on a Tuesday to Friday (in UTC), the period 108 // will cover the last 24 hours. 109 repeated Interval intervals = 1; 110 111 // The test variant failure rate analysis requested. 112 // Test variants are returned in the order they were requested. 113 repeated TestVariantFailureRateAnalysis test_variants = 2; 114 } 115 116 // Signals relevant to determining whether a test variant should be 117 // exonerated in presubmit. 118 message TestVariantFailureRateAnalysis { 119 // A unique identifier of the test in a LUCI project. 120 string test_id = 1; 121 122 // Description of one specific way of running the test, 123 // e.g. a specific bucket, builder and a test suite. 124 // Only populated if populated on the request. 125 Variant variant = 2; 126 127 // The variant hash. 128 // Only populated if populated on the request. 129 string variant_hash = 3; 130 131 message IntervalStats { 132 // The age of the interval. 1 is the most recent interval, 133 // 2 is the interval immediately before that, and so on. 134 // Cross reference with the intervals field on the 135 // QueryTestVariantFailureRateResponse response to 136 // identify the exact time interval this represents. 137 int32 interval_age = 1; 138 139 // The number of verdicts which had only expected runs. 140 // An expected run is a run (e.g. swarming task) which has at least 141 // one expected result, excluding skipped results. 142 int32 total_run_expected_verdicts = 2; 143 144 // The number of verdicts which had both expected and 145 // unexpected runs. 146 // An expected run is a run (e.g. swarming task) which has at least 147 // one expected result, excluding skips. 148 // An unexpected run is a run which had only unexpected 149 // results (and at least one unexpected result), excluding skips. 150 int32 total_run_flaky_verdicts = 3; 151 152 // The number of verdicts which had only unexpected runs. 153 // An unexpected run is a run (e.g. swarming task) which had only 154 // unexpected results (and at least one unexpected result), 155 // excluding skips. 156 int32 total_run_unexpected_verdicts = 4; 157 } 158 159 // Statistics broken down by time interval. Intervals will be ordered 160 // by recency, starting at the most recent interval (age = 1). 161 // 162 // The following filtering applies to verdicts used in time interval data: 163 // - Verdicts are filtered to at most one per unique CL under test, 164 // with verdicts for multi-CL tryjob runs excluded. 165 repeated IntervalStats interval_stats = 4; 166 167 // VerdictExample describes a verdict that is part of a statistic. 168 message VerdictExample { 169 // The partition time of the verdict. This the time associated with the 170 // test result for test history purposes, usually the build or presubmit 171 // run start time. 172 google.protobuf.Timestamp partition_time = 1; 173 174 // The identity of the ingested invocation. 175 string ingested_invocation_id = 2; 176 177 // The changelist(s) tested, if any. 178 repeated Changelist changelists = 3; 179 } 180 181 // Examples of verdicts which had both expected and unexpected runs. 182 // 183 // Ordered by recency, starting at the most recent example at offset 0. 184 // 185 // Limited to at most 10. Further limited to only verdicts produced 186 // since 5 weekdays ago (this corresponds to the exact same time range 187 // as for which interval data is provided). 188 repeated VerdictExample run_flaky_verdict_examples = 5; 189 190 message RecentVerdict { 191 // The partition time of the verdict. This the time associated with the 192 // test result for test history purposes, usually the build or presubmit 193 // run start time. 194 google.protobuf.Timestamp partition_time = 1; 195 196 // The identity of the ingested invocation. 197 string ingested_invocation_id = 2; 198 199 // The changelist(s) tested, if any. 200 repeated Changelist changelists = 3; 201 202 // Whether the verdict had an unexpected run. 203 // An unexpected run is a run (e.g. swarming task) which 204 // had only unexpected results, after excluding skips. 205 // 206 // Example: a verdict includes the result of two 207 // swarming tasks (i.e. two runs), which each contain two 208 // test results. 209 // One of the two test runs has two unexpected failures. 210 // Therefore, the verdict has an unexpected run. 211 bool has_unexpected_runs = 4; 212 } 213 214 // The most recent verdicts for the test variant. 215 // 216 // The following filtering applies to verdicts used in this field: 217 // - Verdicts are filtered to at most one per unique CL under test, 218 // with verdicts for multi-CL tryjob runs excluded. 219 // - Verdicts for CLs authored by automation are excluded, to avoid a 220 // single repeatedly failing automatic uprev process populating 221 // this list with 10 failures. 222 // Ordered by recency, starting at the most recent verdict at offset 0. 223 // 224 // Limited to at most 10. Further limited to only verdicts produced 225 // since 5 weekdays ago (this corresponds to the exact same time range 226 // as for which interval data is provided). 227 repeated RecentVerdict recent_verdicts = 6; 228 } 229 230 message QueryTestVariantStabilityRequest { 231 // The LUCI Project for which test variants should be looked up. 232 string project = 1; 233 234 // The test variant positions to query. 235 repeated TestVariantPosition test_variants = 2; 236 237 // Represents a test variant at a particular source position. 238 message TestVariantPosition { 239 // The unique identifier of the test in a LUCI project. 240 string test_id = 1; 241 242 // Description of one specific way of running the test, 243 // e.g. a specific bucket, builder and test suite. 244 Variant variant = 2; 245 246 // The variant hash. Alternative to specifying the variant. 247 // Prefer to specify the full variant (if available), as the 248 // variant hashing implementation is an implementation detail 249 // and may change. 250 string variant_hash = 3; 251 252 // The source positions to obtain stability relevant to. 253 // 254 // The base sources (e.g. base git commit branch and position) 255 // is mandatory, except for the commit hash, which is ignored. 256 // 257 // If any changelists are specified then any stability analysis 258 // will exclude prior results for that changelist from the 259 // analysis. 260 // 261 // is_dirty is ignored. 262 Sources sources = 4; 263 } 264 } 265 266 message QueryTestVariantStabilityResponse { 267 // The requested test variant stability analysis. 268 repeated TestVariantStabilityAnalysis test_variants = 1; 269 270 // The criteria used to determine if tests are stable. 271 // This is as configured in the project's LUCI Analysis configuration. 272 TestStabilityCriteria criteria = 2; 273 } 274 275 // Criteria used to determine test stability. This criteria is used 276 // to inform test exoneration in presubmit via the 277 // TestVariants.QueryStability RPC. 278 // 279 // Criteria is applied using a data source which contains 280 // the last 14 days' of test result data for all test variants, 281 // with certain filterings applied. 282 // 283 // See go/luci-exoneration-v2 as well each criteria below for more details. 284 message TestStabilityCriteria { 285 // The failure rate criteria to apply. Mandatory. 286 FailureRateCriteria failure_rate = 1; 287 288 // The failure rate criteria detects consistently failing 289 // and highly flaky tests (e.g. 95%+ failing) by looking for 290 // a high number of failures at the queried position of the 291 // test's history. 292 // 293 // The criteria obtains from the last 14 days' of filtered test data 294 // a set of (up to) 20 test runs centered on the queried commit 295 // position (10 prior and 10 after) and applies criteria 296 // to this in various ways. 297 // The 20 test runs are sorted by commit position and then time. 298 // 299 // See go/luci-exoneration-v2 for more detail. 300 message FailureRateCriteria { 301 // The number of unexpected test runs that must be 302 // found in a sliding window of size 10 containing the 303 // queried position to begin exoneration. 304 // 6 is a good starting value. 305 // 306 // The criteria is applied over sliding windows of size 307 // 10 around the query position. Assuming the full 20 test 308 // runs are obtained, this means 11 window positions are considered. 309 // If any window satisifes the threshold, the criteria is met 310 // and the test is considered unstable. 311 // 312 // In the event that 10 test runs cannot be found in the last 313 // 14 days of test history, a window sized to the available 314 // test runs is used but the criteria is not scaled. 315 int32 failure_threshold = 1; 316 317 // The number of consecutive unexpected test runs, which if 318 // present at the leading or trailing part of the (up to) 20 319 // test verdicts, will trigger exoneration. 320 // 3 is a good starting value. 321 // 322 // The consecutive failures must also touch the query position. 323 // 324 // This is designed to create a fast path to exoneration for 325 // 100% failing tests which produce a strong and consistent 326 // failing signal, leveraging the statistical significance 327 // of consecutive failures. If this threshold is met, 328 // the failure_threshold above does NOT need to be met. 329 // 330 // E.g. the following scenario WILL trigger this criteria for 331 // a threshold of four or less. 332 // 333 // History: >F F F F< P P P P P P P 334 // ^ 335 // Query position 336 // 337 // The following scenario WILL NOT trigger this criteria: 338 // 339 // History:>P F F F F< P P P P P P P 340 // ^ 341 // Query position 342 // 343 // (N.B. Direction of history is irrelevant as criteria is 344 // applied symmetrically. Either the left or right could 345 // represent 'later' by commit position.) 346 int32 consecutive_failure_threshold = 2; 347 } 348 349 // The flake rate criteria to apply. Mandatory. 350 FlakeRateCriteria flake_rate = 2; 351 352 // The flake rate criteria detects flaky tests by looking for 353 // examples where a test has obtained expected and unexpected 354 // test runs for the same sources under test. 355 // 356 // If there are more flaky source verdicts found than a threshold, 357 // the test is considered flaky. 358 // 359 // The analysis window is all source verdicts for 7 days' worth 360 // of commit positions either side of the queried position. 361 // The conversion between time and commit position is discussed 362 // in go/luci-exoneration-v2. 363 // 364 // In the event that an unsatisfactory number of source positions 365 // are found using this method, the window is enlarged to possibly 366 // include any verdict in the last 14 days. This is to improve 367 // detection performance on tests with a low volume of results. 368 message FlakeRateCriteria { 369 // The minimum number of source verdicts desired 370 // for the analysis window. 371 // 372 // As standard, all source verdicts for sources 373 // +/- 7 days from the queried position are used. 374 // 375 // However, if the number of verdicts is not equal 376 // to or greater than min_window, all source verdicts 377 // from the last 14 days will be used. This is designed 378 // to prioritise adequate flake detection performance 379 // for test variants with low result volumes, at the 380 // cost of data recency. 381 // 382 // If the number of source verdicts in the last 14 days 383 // is less than min_window, then whatever source verdicts 384 // are available are still used. 385 // 386 // 100 is a good starting value. 387 int32 min_window = 1; 388 389 // The minimum number of flaky source verdicts required 390 // to trigger the criteria. 2 is a good starting value. 391 int32 flake_threshold = 2; 392 393 // The minimum flake rate required to trigger the criteria, 394 // as a proportion of all source verdicts. This must be a 395 // value between 0.0 and 1.0. 396 // 0.01 (1%) is a good starting value. 397 // 398 // Both flake_threshold AND the flake_rate_threshold must be met 399 // for a test to be considered unstable. 400 // 401 // Note that not even the most flaky (50% flaky) test would 402 // be expected to produce more than a 25% flake rate if 403 // failures are retried once. This is because its expected 404 // outcomes are: 405 // - Pass on first try = 50% 406 // - Fail on first try, pass on second try = 25% (flaky) 407 // - Fail on both tries = 25% 408 double flake_rate_threshold = 3; 409 } 410 } 411 412 // Stability analysis for a test variant at a particular source position. 413 message TestVariantStabilityAnalysis { 414 // A unique identifier of the test in a LUCI project. 415 string test_id = 1; 416 417 // Description of one specific way of running the test, 418 // e.g. a specific bucket, builder and a test suite. 419 // Only populated if populated on the request. 420 Variant variant = 2; 421 422 // The variant hash. 423 // Only populated if populated on the request. 424 string variant_hash = 3; 425 426 // Information related to the application of failure rate 427 // criteria, if this criteria was considered. 428 FailureRate failure_rate = 4; 429 430 message FailureRate { 431 // Whether the failure rate criteria was met. If set, this means the 432 // test is unstable by this criteria. 433 bool is_met = 1; 434 435 // Debug information follows. 436 437 // The maximum number of failures observed in any analysis window. 438 int32 unexpected_test_runs = 2; 439 440 // The number of consecutive unexpected test runs from the leading 441 // and/or trailing part of test history, which touches the 442 // the query position. 443 // If there is no such sequence, this is 0. 444 int32 consecutive_unexpected_test_runs = 3; 445 446 message RecentVerdict { 447 // The commit position of the source verdict on the queried branch. 448 int64 position = 1; 449 450 // The changelist(s) tested, if any. 451 repeated Changelist changelists = 2; 452 453 // The invocations included in this source verdict. 454 repeated string invocations = 3; 455 456 // The number of unexpected runs associated with the verdict. 457 // An unexpected run is a run (e.g. swarming task) which 458 // had only unexpected results, after excluding skips. 459 // Presubmit results are limited to contributing 1 unexpected 460 // run to the analysis by design. Postsubmit results can have more. 461 int32 unexpected_runs = 4; 462 463 // The total number of test runs associated with the verdict. 464 // Presubmit results are limited to contributing 1 unexpected 465 // run to the analysis by design. Postsubmit results can have more. 466 int32 total_runs = 5; 467 } 468 469 // Relevant source verdicts used in the analysis. Limited to 20 runs, 470 // which may span between 1 and 20 source verdicts. 471 repeated RecentVerdict recent_verdicts = 4; 472 } 473 474 // Information related to the application of flake rate 475 // criteria, if this criteria was considered. 476 FlakeRate flake_rate = 5; 477 478 message FlakeRate { 479 // Whether the flake rate criteria was met. If set, this means the 480 // test was deemed unstable by this criteria. 481 bool is_met = 1; 482 483 // Debug information follows. 484 485 // The total number of run-flaky verdicts observed. 486 int32 run_flaky_verdicts = 2; 487 488 // The total number of verdicts in the run flaky verdicts analysis window. 489 int32 total_verdicts = 3; 490 491 // VerdictExample describes a source verdict that is part of a statistic. 492 // Note that a source verdict may contain data from multiple test verdicts, 493 // such as in the case of retried presubmit runs on the same patchset. 494 message VerdictExample { 495 // The commit position of the verdict on the queried branch. 496 int64 position = 1; 497 498 // The changelist(s) tested, if any. 499 repeated Changelist changelists = 2; 500 501 // The invocations included in this source verdict. 502 repeated string invocations = 3; 503 } 504 505 // Examples of source verdicts which had both expected and unexpected runs, 506 // that contributed to run_flaky_verdicts. 507 // 508 // Ordered by recency, starting at the most recent example. 509 // 510 // Limited to at most 10 examples. 511 repeated VerdictExample flake_examples = 4; 512 513 // The least source position included in the analysis window. Inclusive. 514 // If the analysis window is empty (e.g. because there is no data), this is zero. 515 int64 start_position = 5; 516 517 // The greatest source position included in the analysis window. Inclusive. 518 // If the analysis window is empty (e.g. because there is no data), this is zero. 519 int64 end_position = 6; 520 } 521 }