github.com/GoogleCloudPlatform/testgrid@v0.0.174/terraform/modules/alerts/main.tf (about)

     1  # Copyright 2022 The TestGrid Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  locals {}
    16  
    17  resource "google_monitoring_alert_policy" "probers" {
    18    project      = var.project
    19    provider     = google-beta  // To include `condition_monitoring_query_language`
    20    display_name = "HostDown"
    21    combiner     = "OR"
    22  
    23    conditions {
    24      display_name = "Host is unreachable"
    25      condition_monitoring_query_language {
    26        duration = "120s"
    27        query    = <<-EOT
    28        fetch uptime_url
    29        | metric 'monitoring.googleapis.com/uptime_check/check_passed'
    30        | align next_older(1m)
    31        | filter resource.project_id == '${var.project}'
    32        | every 1m
    33        | group_by [resource.host],
    34            [value_check_passed_not_count_true: count_true(not(value.check_passed))]
    35        | condition val() > 1 '1'
    36        EOT
    37        trigger {
    38          count = 1
    39        }
    40      }
    41    }
    42  
    43    documentation {
    44      content   = "Host Down"
    45      mime_type = "text/markdown"
    46    }
    47  
    48    # gcloud beta monitoring channels list --project=oss-prow
    49    notification_channels = ["projects/${var.project}/notificationChannels/${var.notification_channel_id}"]
    50  }
    51  
    52  resource "google_monitoring_alert_policy" "pubsub-unack-too-old" {
    53    project      = var.project
    54    provider     = google-beta  // To include `condition_monitoring_query_language`
    55    for_each     = var.pubsub_topics
    56    display_name = "pubsub-unack-too-old/${var.project}/${each.key}"
    57    combiner     = "OR" # required
    58  
    59    conditions {
    60      display_name = "pubsub-unack-too-old/${var.project}/${each.key}"
    61      
    62      condition_monitoring_query_language {
    63        duration = "60s"
    64        query    = <<-EOT
    65        fetch pubsub_subscription
    66        | metric 'pubsub.googleapis.com/subscription/oldest_unacked_message_age'
    67        | filter
    68            (metadata.system_labels.topic_id == '${each.key}')
    69        | group_by 30m,
    70            [value_oldest_unacked_message_age_mean:
    71            mean(value.oldest_unacked_message_age)]
    72        | every 30m
    73        | condition val() > 1.08e+07 's'
    74        EOT
    75        trigger {
    76          count = 1
    77        }
    78      }
    79    }
    80  
    81    documentation {
    82      content   = "${var.project}: TestGrid is not acknowledging PubSub messages in time.\n\nOncall Playbook: http://go/test-infra-playbook"
    83      mime_type = "text/markdown"
    84    }
    85  
    86    # gcloud beta monitoring channels list --project=oss-REPLACE
    87    notification_channels = ["projects/${var.project}/notificationChannels/${var.notification_channel_id}"]
    88  }