github.com/abayer/test-infra@v0.0.5/metrics/configs/flakes-config.yaml

github.com/abayer/test-infra@v0.0.5/metrics/configs/flakes-config.yaml (about)

     1  metric: flakes
     2  description: Calculates flakiness for each job for the past week and the flakiest tests for each job.
     3  query: |
     4    #standardSQL
     5    select
     6      job,
     7      build_consistency,
     8      commit_consistency,
     9      flakes,
    10      runs,
    11      commits,
    12      array(
    13        select as struct
    14          i.n name,
    15          count(i.failures) flakes
    16        from tt.tests i
    17        group by name
    18        having name not in ('Test', 'DiffResources', 'DumpClusterLogs', 'DumpFederationLogs')  /* uninteresting tests */
    19        order by flakes desc
    20        limit 3 /* top three flakiest tests in this job */
    21      ) flakiest
    22    from (
    23      select
    24        job, /* name of job */
    25        round(sum(if(flaked=1,passed,runs))/sum(runs),3) build_consistency, /* percentage of runs that did not flake */
    26        round(1-sum(flaked)/count(distinct commit),3) commit_consistency, /* percentage of commits that did not flake */
    27        sum(flaked) flakes, /* number of times it flaked */
    28        sum(runs) runs, /* number of times the job ran */
    29        count(distinct commit) commits, /* number of commits tested */
    30        array_concat_agg(tests) tests /* array of flaking tests in this job */
    31      from (
    32        select
    33          job,
    34          commit,
    35          if(passed = runs or passed = 0, 0, 1) flaked, /* consistent: always pass or always fail */
    36          passed,
    37          safe_cast(runs as int64) runs,
    38          array(
    39            select as struct
    40              i.name n, /* test name */
    41              countif(i.failed) failures /* number of times it flaked */
    42            from tt.tests i
    43            group by n
    44            having failures > 0 and failures < tt.runs /* same consistency metric */
    45            order by failures desc
    46          ) tests
    47        from (
    48          select
    49            job,
    50            commit,
    51            sum(if(result='SUCCESS',1,0)) passed,
    52            count(result) runs,  /* count the number of times we ran a job on this commit for this PR */
    53            array_concat_agg(test) tests /* create an array of tests structs */
    54          from (
    55            SELECT
    56              job,
    57              if(substr(job, 0, 3) = 'pr:', 'pull', 'ci') kind,  /* pull or ci */
    58              version, /* bootstrap git version, empty for ci  */
    59              if(substr(job, 0, 3) = 'pr:',
    60                regexp_extract(
    61                  (
    62                    select i.value
    63                    from t.metadata i
    64                    where i.key = 'repos'
    65                  ),
    66                  r'[^,]+,\d+:([a-f0-9]+)"'
    67                ),
    68                version
    69              ) commit,  /* repo commit for PR or version for CI */
    70              result,  /* SUCCESS if the build passed */
    71              test  /* repeated tuple of tests */
    72            FROM `k8s-gubernator.build.week` as t
    73            where
    74              datetime(started) > datetime_sub(current_datetime(), interval 7 DAY)
    75              and version != 'unknown'
    76              and (
    77                (substr(job, 0, 3) = 'ci-' and version != 'unknown') or
    78                exists(
    79                  select as struct
    80                    i
    81                  from t.metadata i
    82                  where i.key = 'repos' and
    83                  array_length(split(replace(i.value,', ', ''), ',')) = 2 /*serial pr jobs only (# of PR refs +1 == 2)*/
    84                )
    85              )
    86          )
    87          group by job, commit
    88        ) as tt
    89      ) as tt
    90      group by job /* summarize info for this job across all commits/builds */
    91    ) as tt
    92    order by flakes desc, commit_consistency, build_consistency, job /* flakiest jobs first */
    93  
    94  jqfilter: |
    95    [(.[] | select(.job | contains("pr:")) | {(.job): {
    96        consistency: (.commit_consistency|tonumber),
    97        flakes: (.flakes|tonumber),
    98        flakiest: ([(.flakiest[] | select(.flakes|tonumber >= 4) | {
    99          (.name): (.flakes|tonumber)}) ])| add
   100    }})] | add
   101  
   102  # No backfilling is used since this metric is only used to display a table with the currently flaky jobs/tests.
   103  measurements:
   104    jq: |
   105      [(.[] | {
   106        measurement: "flakes",
   107        tags: {
   108          job: (.job)
   109        },
   110        fields: {
   111          consistency: (.commit_consistency|tonumber),
   112          flakes: (.flakes|tonumber),
   113          flakiest: (if (.flakiest | has(0)) then (.flakiest[0].flakes + " flakes: " + .flakiest[0].name) else ("") end),
   114          flakier: (if (.flakiest | has(1)) then (.flakiest[1].flakes + " flakes: " + .flakiest[1].name) else ("") end),
   115          flaky: (if (.flakiest | has(2)) then (.flakiest[2].flakes + " flakes: " + .flakiest[2].name) else ("") end)
   116        }
   117      })]