k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/prometheus/manifests/dashboards/slo.dashboard.py

k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/prometheus/manifests/dashboards/slo.dashboard.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2019 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  from grafanalib import core as g
    18  import defaults as d
    19  
    20  
    21  def api_call_latency(title, metric, verb, scope, threshold):
    22      return d.Graph(
    23          title=title,
    24          targets=[
    25              d.Target(expr=str(threshold), legendFormat="threshold"),
    26              d.Target(
    27                  expr='quantile_over_time(0.99, %(metric)s{quantile="0.99", verb=~"%(verb)s", scope=~"%(scope)s"}[12h])'
    28                  % {"metric": metric, "verb": verb, "scope": scope}
    29              ),
    30          ],
    31          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    32      )
    33  
    34  
    35  def create_slo_panel(metric="apiserver:apiserver_request_latency:histogram_quantile"):
    36      return [
    37          api_call_latency(
    38              title="Read-only API call latency (scope=resource, threshold=1s)",
    39              metric=metric,
    40              verb="GET",
    41              scope="resource",
    42              threshold=1,
    43          ),
    44          api_call_latency(
    45              title="Read-only API call latency (scope=namespace, threshold=5s)",
    46              metric=metric,
    47              verb="LIST",
    48              scope="namespace",
    49              threshold=5,
    50          ),
    51          api_call_latency(
    52              title="Read-only API call latency (scope=cluster, threshold=30s)",
    53              metric=metric,
    54              verb="LIST",
    55              scope="cluster",
    56              threshold=30,
    57          ),
    58          api_call_latency(
    59              title="Mutating API call latency (threshold=1s)",
    60              metric=metric,
    61              verb=d.any_of("CREATE", "DELETE", "PATCH", "POST", "PUT"),
    62              scope=d.any_of("namespace", "cluster"),
    63              threshold=1,
    64          ),
    65      ]
    66  
    67  
    68  # The final dashboard must be named 'dashboard' so that grafanalib will find it.
    69  dashboard = d.Dashboard(
    70      title="SLO",
    71      rows=[
    72          d.Row(title="SLO", panels=create_slo_panel()),
    73          d.Row(
    74              title="Experimental: SLO (window 1m)",
    75              panels=create_slo_panel(
    76                  metric="apiserver:apiserver_request_latency_1m:histogram_quantile"
    77              ),
    78          ),
    79      ],
    80  ).auto_panel_ids()