k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/prometheus/manifests/dashboards/network.dashboard.py (about) 1 #!/usr/bin/env python3 2 3 # Copyright 2019 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 from grafanalib import core as g 18 import defaults as d 19 20 21 NETWORK_PROGRAMMING_PANEL = [ 22 d.Graph( 23 title="SLI: Network programming latency", 24 description=( 25 "NetworkProgrammingLatency is defined as the time it took to " 26 + "program the network - from the time the service or pod has " 27 + "changed to the time the change was propagated and the proper " 28 + "kube-proxy rules were synced. Exported for each endpoints object " 29 + "that were part of the rules sync." 30 ), 31 targets=d.show_quantiles( 32 ( 33 "quantile_over_time(" 34 + "0.99, " 35 + 'kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{{quantile="{quantile}"}}[24h])' 36 ), 37 legend="{{quantile}}", 38 ), 39 yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), 40 ), 41 d.Graph( 42 title="Network programming latency", 43 description=( 44 "NetworkProgrammingLatency is defined as the time it took to " 45 + "program the network - from the time the service or pod has " 46 + "changed to the time the change was propagated and the proper " 47 + "kube-proxy rules were synced. Exported for each endpoints object " 48 + "that were part of the rules sync." 49 ), 50 targets=d.show_quantiles( 51 'kubeproxy:kubeproxy_network_programming_duration:histogram_quantile{{quantile="{quantile}"}}', 52 legend="{{quantile}}", 53 ), 54 yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), 55 ), 56 d.Graph( 57 title="kube-proxy: sync rules duation", 58 description="Latency of one round of kube-proxy syncing proxy rules.", 59 targets=d.show_quantiles( 60 "histogram_quantile({quantile}, sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket[5m])) by (le))" 61 ), 62 yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), 63 ), 64 d.simple_graph( 65 "kube-proxy: rate of service changes", 66 "sum(rate(kubeproxy_sync_proxy_rules_service_changes_total[5m]))", 67 description="Rate of service changes that the proxy has seen over 5m", 68 legend="rate", 69 ), 70 d.simple_graph( 71 "kube-proxy: pending service changes", 72 "sum(kubeproxy_sync_proxy_rules_service_changes_pending)", 73 description="Number of pending service changes that have not yet been synced to the proxy.", 74 legend="pending changes", 75 ), 76 d.simple_graph( 77 "kube-proxy: rate of endpoint changes", 78 "sum(rate(kubeproxy_sync_proxy_rules_endpoint_changes_total[5m]))", 79 description="Rate of endpoint changes that the proxy has seen over 5m", 80 legend="rate", 81 ), 82 d.simple_graph( 83 "kube-proxy: pending endpoint changes", 84 "sum(kubeproxy_sync_proxy_rules_endpoint_changes_pending)", 85 description="Number of pending endpoint changes that have not yet been synced to the proxy.", 86 legend="pending changes", 87 ), 88 ] 89 90 NETWORK_LATENCY_PANEL = [ 91 d.Graph( 92 title="Network latency", 93 targets=d.show_quantiles( 94 'probes:in_cluster_network_latency:histogram_quantile{{quantile="{quantile}"}}', 95 legend="{{quantile}}", 96 ), 97 yAxes=g.single_y_axis(format=g.SECONDS_FORMAT), 98 nullPointMode="null", 99 ), 100 d.Graph( 101 title="probes: ping rate", 102 targets=[ 103 d.Target( 104 expr='sum(rate(probes_in_cluster_network_latency_ping_count{namespace="probes", job="ping-client"}[1m])) by (job)', 105 legendFormat="rate", 106 ), 107 d.Target( 108 expr='sum(rate(probes_in_cluster_network_latency_error{namespace="probes", job="ping-client"}[1m])) by (job)', 109 legendFormat="error rate", 110 ), 111 ], 112 nullPointMode="null", 113 ), 114 d.Graph( 115 title="probe: # running", 116 targets=[ 117 d.TargetWithInterval( 118 expr='count(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container, namespace)' 119 ) 120 ], 121 nullPointMode="null", 122 ), 123 d.Graph( 124 title="probes: memory usage", 125 targets=[ 126 d.Target( 127 expr='min(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)', 128 legendFormat="min {{container}}", 129 ), 130 d.Target( 131 expr='avg(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)', 132 legendFormat="avg {{container}}", 133 ), 134 d.Target( 135 expr='max(container_memory_usage_bytes{namespace="probes", container=~"ping-client|ping-server"}) by (container)', 136 legendFormat="max {{container}}", 137 ), 138 ], 139 nullPointMode="null", 140 ), 141 ] 142 143 144 dashboard = d.Dashboard( 145 title="Network", 146 rows=[ 147 d.Row(title="Network progamming latency", panels=NETWORK_PROGRAMMING_PANEL), 148 d.Row(title="In-cluster network latency", panels=NETWORK_LATENCY_PANEL), 149 ], 150 ).auto_panel_ids()