k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/prometheus/manifests/dashboards/network.dashboard.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2019 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  from grafanalib import core as g
    18  import defaults as d
    19  
    20  
    21  NETWORK_PROGRAMMING_PANEL = [
    22      d.Graph(
    23          title="SLI: Network programming latency",
    24          description=(
    25              "NetworkProgrammingLatency is defined as the time it took to "
    26              + "program the network - from the time  the service or pod has "
    27              + "changed to the time the change was propagated and the proper "
    28              + "kube-proxy rules were synced. Exported for each endpoints object "
    29              + "that were part of the rules sync."
    30          ),
    31          targets=d.show_quantiles(
    32              (
    33                  "quantile_over_time("
    34                  + "0.99, "
    35                  + 'kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{{quantile="{quantile}"}}[24h])'
    36              ),
    37              legend="{{quantile}}",
    38          ),
    39          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    40      ),
    41      d.Graph(
    42          title="Network programming latency",
    43          description=(
    44              "NetworkProgrammingLatency is defined as the time it took to "
    45              + "program the network - from the time  the service or pod has "
    46              + "changed to the time the change was propagated and the proper "
    47              + "kube-proxy rules were synced. Exported for each endpoints object "
    48              + "that were part of the rules sync."
    49          ),
    50          targets=d.show_quantiles(
    51              'kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{{quantile="{quantile}"}}',
    52              legend="{{quantile}}",
    53          ),
    54          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    55      ),
    56      d.Graph(
    57          title="kube-proxy: sync rules duation",
    58          description="Latency of one round of kube-proxy syncing proxy rules.",
    59          targets=d.show_quantiles(
    60              "histogram_quantile({quantile}, sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket[5m])) by (le))"
    61          ),
    62          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    63      ),
    64      d.simple_graph(
    65          "kube-proxy: rate of service changes",
    66          "sum(rate(kubeproxy_sync_proxy_rules_service_changes_total[5m]))",
    67          description="Rate of service changes that the proxy has seen over 5m",
    68          legend="rate",
    69      ),
    70      d.simple_graph(
    71          "kube-proxy: pending service changes",
    72          "sum(kubeproxy_sync_proxy_rules_service_changes_pending)",
    73          description="Number of pending service changes that have not yet been synced to the proxy.",
    74          legend="pending changes",
    75      ),
    76      d.simple_graph(
    77          "kube-proxy: rate of endpoint changes",
    78          "sum(rate(kubeproxy_sync_proxy_rules_endpoint_changes_total[5m]))",
    79          description="Rate of endpoint changes that the proxy has seen over 5m",
    80          legend="rate",
    81      ),
    82      d.simple_graph(
    83          "kube-proxy: pending endpoint changes",
    84          "sum(kubeproxy_sync_proxy_rules_endpoint_changes_pending)",
    85          description="Number of pending endpoint changes that have not yet been synced to the proxy.",
    86          legend="pending changes",
    87      ),
    88  ]
    89  
    90  NETWORK_LATENCY_PANEL = [
    91      d.Graph(
    92          title="Network latency",
    93          targets=d.show_quantiles(
    94              'probes:in_cluster_network_latency:histogram_quantile{{quantile="{quantile}"}}',
    95              legend="{{quantile}}",
    96          ),
    97          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    98          nullPointMode="null",
    99      ),
   100      d.Graph(
   101          title="probes: ping rate",
   102          targets=[
   103              d.Target(
   104                  expr='sum(rate(probes_in_cluster_network_latency_ping_count{namespace="probes", job="ping-client"}[1m])) by (job)',
   105                  legendFormat="rate",
   106              ),
   107              d.Target(
   108                  expr='sum(rate(probes_in_cluster_network_latency_error{namespace="probes", job="ping-client"}[1m])) by (job)',
   109                  legendFormat="error rate",
   110              ),
   111          ],
   112          nullPointMode="null",
   113      ),
   114      d.Graph(
   115          title="probe: # running",
   116          targets=[
   117              d.TargetWithInterval(
   118                  expr='count(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container, namespace)'
   119              )
   120          ],
   121          nullPointMode="null",
   122      ),
   123      d.Graph(
   124          title="probes: memory usage",
   125          targets=[
   126              d.Target(
   127                  expr='min(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)',
   128                  legendFormat="min {{container}}",
   129              ),
   130              d.Target(
   131                  expr='avg(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)',
   132                  legendFormat="avg {{container}}",
   133              ),
   134              d.Target(
   135                  expr='max(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)',
   136                  legendFormat="max {{container}}",
   137              ),
   138          ],
   139          nullPointMode="null",
   140      ),
   141  ]
   142  
   143  
   144  dashboard = d.Dashboard(
   145      title="Network",
   146      rows=[
   147          d.Row(title="Network progamming latency", panels=NETWORK_PROGRAMMING_PANEL),
   148          d.Row(title="In-cluster network latency", panels=NETWORK_LATENCY_PANEL),
   149      ],
   150  ).auto_panel_ids()