github.com/abayer/test-infra@v0.0.5/metrics/configs/failures-config.yaml (about) 1 metric: failures 2 description: This query finds jobs that have been failing continuously for a long time. 3 query: | 4 #standardSQL 5 select /* Find jobs that have not passed in a long time */ 6 jobs.job, 7 latest_pass, /* how recently did this job pass */ 8 weekly_builds, /* how many times a week does it run */ 9 first_run, /* when is the first time it ran */ 10 latest_run, /* when is the most recent run */ 11 DATE_DIFF(current_date(), if(latest_pass is null, first_run, date(latest_pass)), DAY) broken_days 12 from ( 13 select /* filter to jobs that ran this week */ 14 job, 15 count(1) weekly_builds 16 from `k8s-gubernator.build.all` 17 where 18 started > timestamp_sub(current_timestamp(), interval 7 day) 19 group by job 20 order by job 21 ) jobs 22 left join ( 23 select /* find the oldest, newest run of each job */ 24 job, 25 date(min(started)) first_run, 26 date(max(started)) latest_run 27 from `k8s-gubernator.build.all` 28 group by job 29 ) runs 30 on jobs.job = runs.job 31 left join ( 32 select /* find the most recent time each job passed (may not be this week) */ 33 job, 34 max(started) latest_pass 35 from `k8s-gubernator.build.all` 36 where 37 result = 'SUCCESS' 38 group by job 39 ) passes 40 on jobs.job = passes.job 41 order by broken_days desc, latest_pass, first_run, weekly_builds desc, jobs.job 42 43 jqfilter: | 44 [(.[] | select((.latest_pass|length) == 0 or (.broken_days|tonumber) > 30) 45 | {(.job): { 46 failing_days: (.broken_days|tonumber) 47 }})] | add 48 49 measurements: 50 jq: | 51 [(.[] | { 52 measurement: "failures", 53 tags: { 54 job: (.job) 55 }, 56 fields: { 57 job: (.job), 58 failing_days: (.broken_days|tonumber) 59 }})]