github.com/GoogleCloudPlatform/testgrid@v0.0.174/terraform/modules/alerts/main.tf (about) 1 # Copyright 2022 The TestGrid Authors. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 locals {} 16 17 resource "google_monitoring_alert_policy" "probers" { 18 project = var.project 19 provider = google-beta // To include `condition_monitoring_query_language` 20 display_name = "HostDown" 21 combiner = "OR" 22 23 conditions { 24 display_name = "Host is unreachable" 25 condition_monitoring_query_language { 26 duration = "120s" 27 query = <<-EOT 28 fetch uptime_url 29 | metric 'monitoring.googleapis.com/uptime_check/check_passed' 30 | align next_older(1m) 31 | filter resource.project_id == '${var.project}' 32 | every 1m 33 | group_by [resource.host], 34 [value_check_passed_not_count_true: count_true(not(value.check_passed))] 35 | condition val() > 1 '1' 36 EOT 37 trigger { 38 count = 1 39 } 40 } 41 } 42 43 documentation { 44 content = "Host Down" 45 mime_type = "text/markdown" 46 } 47 48 # gcloud beta monitoring channels list --project=oss-prow 49 notification_channels = ["projects/${var.project}/notificationChannels/${var.notification_channel_id}"] 50 } 51 52 resource "google_monitoring_alert_policy" "pubsub-unack-too-old" { 53 project = var.project 54 provider = google-beta // To include `condition_monitoring_query_language` 55 for_each = var.pubsub_topics 56 display_name = "pubsub-unack-too-old/${var.project}/${each.key}" 57 combiner = "OR" # required 58 59 conditions { 60 display_name = "pubsub-unack-too-old/${var.project}/${each.key}" 61 62 condition_monitoring_query_language { 63 duration = "60s" 64 query = <<-EOT 65 fetch pubsub_subscription 66 | metric 'pubsub.googleapis.com/subscription/oldest_unacked_message_age' 67 | filter 68 (metadata.system_labels.topic_id == '${each.key}') 69 | group_by 30m, 70 [value_oldest_unacked_message_age_mean: 71 mean(value.oldest_unacked_message_age)] 72 | every 30m 73 | condition val() > 1.08e+07 's' 74 EOT 75 trigger { 76 count = 1 77 } 78 } 79 } 80 81 documentation { 82 content = "${var.project}: TestGrid is not acknowledging PubSub messages in time.\n\nOncall Playbook: http://go/test-infra-playbook" 83 mime_type = "text/markdown" 84 } 85 86 # gcloud beta monitoring channels list --project=oss-REPLACE 87 notification_channels = ["projects/${var.project}/notificationChannels/${var.notification_channel_id}"] 88 }