k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/prometheus/manifests/dashboards/master_panels.py

k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/prometheus/manifests/dashboards/master_panels.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2022 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  from grafanalib import core as g
    18  import defaults as d
    19  
    20  
    21  def api_call_latency_panel(expression):
    22      def api_call_latency(title, verb, scope, threshold):
    23          return d.Graph(
    24              title=title,
    25              targets=[
    26                  d.Target(expr=str(threshold), legendFormat="threshold"),
    27                  d.Target(
    28                      expr=d.one_line(expression % {"verb": verb, "scope": scope}
    29                                      ),
    30                      # TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed
    31                      # legendFormat="{{verb}} {{scope}}/{{resource}}",
    32                  ),
    33              ],
    34              yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    35          )
    36  
    37      return [
    38          api_call_latency(
    39              title="GET resource latency (percentaile=99, scope=resource, threshold=1s)",
    40              verb="GET",
    41              scope="resource",
    42              threshold=1,
    43          ),
    44          api_call_latency(
    45              title="LIST namespace latency (percentaile=99, scope=namespace, threshold=5s)",
    46              verb="LIST",
    47              scope="namespace",
    48              threshold=5,
    49          ),
    50          api_call_latency(
    51              title="LIST cluster latency (percentaile=99, scope=cluster, threshold=30s)",
    52              verb="LIST",
    53              scope="cluster",
    54              threshold=30,
    55          ),
    56          api_call_latency(
    57              title="Mutating API call latency (threshold=1s)",
    58              verb=d.any_of("CREATE", "DELETE", "PATCH", "POST", "PUT"),
    59              scope=d.any_of("namespace", "cluster", "resource"),
    60              threshold=1,
    61          ),
    62      ]
    63  
    64  API_CALL_LATENCY_PANELS = api_call_latency_panel("""
    65  apiserver:apiserver_request_latency_1m:histogram_quantile{
    66    quantile="0.99",
    67    verb=~"%(verb)s",
    68    scope=~"%(scope)s",
    69    resource=~"${resource:regex}s*",
    70    subresource!~"exec|proxy",
    71  }""")
    72  
    73  QUANTILE_API_CALL_LATENCY_PANELS = api_call_latency_panel("""
    74  quantile_over_time(0.99,
    75  apiserver:apiserver_request_latency_1m:histogram_quantile{
    76    quantile="0.99",
    77    verb=~"%(verb)s",
    78    scope=~"%(scope)s",
    79    resource=~"${resource:regex}s*",
    80    subresource!~"exec|proxy",
    81  }[5d])""")
    82  
    83  APF_PANELS = [
    84      d.simple_graph(
    85          "Requests waiting time",
    86          "histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{instance=~\".*(${instance:pipe})\"}[1m]))  by (le, instance, priority_level))",
    87          legend="{{instance}} {{priority_level}}",
    88          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    89      ),
    90      d.simple_graph(
    91          "Execution time",
    92          "histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{instance=~\".*(${instance:pipe})\"}[1m]))  by (le, instance, priority_level))",
    93          legend="{{instance}} {{priority_level}}",
    94          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
    95      ),
    96      d.simple_graph(
    97          "Total execution time per second",
    98          "sum(irate(apiserver_flowcontrol_request_execution_seconds_sum{instance=~\".*(${instance:pipe})\"}[1m]))  by (instance, priority_level)",
    99          legend="{{instance}} {{priority_level}}",
   100          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   101      ),
   102      d.simple_graph(
   103          "Requests rate by priority level",
   104          "sum(irate(apiserver_flowcontrol_dispatched_requests_total{instance=~\".*(${instance:pipe})\"}[1m])) by (instance, priority_level)",
   105          legend="{{instance}} {{priority_level}}",
   106          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   107      ),
   108      d.simple_graph(
   109          "Concurrency in use",
   110          "sum(apiserver_flowcontrol_request_concurrency_in_use{instance=~\".*(${instance:pipe})\"}) by (instance, priority_level)",
   111          legend="{{instance}} {{priority_level}}",
   112          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   113      ),
   114      d.simple_graph(
   115          "Current executing requests",
   116          "sum(apiserver_flowcontrol_current_executing_requests{instance=~\".*(${instance:pipe})\"}) by (instance, priority_level)",
   117          legend="{{instance}} {{priority_level}}",
   118          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   119      ),
   120      d.simple_graph(
   121          "Inqueue requests",
   122          "sum(apiserver_flowcontrol_current_inqueue_requests{instance=~\".*(${instance:pipe})\"}) by (instance, priority_level)",
   123          legend="{{instance}} {{priority_level}}",
   124          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   125      ),
   126      d.simple_graph(
   127          "Nominal number of execution seats",
   128          "avg(apiserver_flowcontrol_nominal_limit_seats{instance=~\".*(${instance:pipe})\"}) by (priority_level)",
   129          legend="{{priority_level}}",
   130      ),
   131      d.simple_graph(
   132          "Lower bound on number of execution seats",
   133          "avg(apiserver_flowcontrol_lower_limit_seats{instance=~\".*(${instance:pipe})\"}) by (priority_level)",
   134          legend="{{priority_level}}",
   135      ),
   136      d.simple_graph(
   137          "Upper bound on number of execution seats",
   138          "avg(apiserver_flowcontrol_upper_limit_seats{instance=~\".*(${instance:pipe})\"}) by (priority_level)",
   139          legend="{{priority_level}}",
   140      ),
   141      d.simple_graph(
   142          "Number of seats Priority Level could use divided by nominal seats (50th percentile)",
   143          "histogram_quantile(0.5, rate(apiserver_flowcontrol_demand_seats_bucket{instance=~\".*(${instance:pipe})\"}[10s]))",
   144          legend="{{instance}} {{priority_level}}",
   145      ),
   146      d.simple_graph(
   147          "High watermark of demand seats over last adjustment period",
   148          "apiserver_flowcontrol_demand_seats_high_watermark{instance=~\".*(${instance:pipe})\"}",
   149          legend="{{instance}} {{priority_level}}",
   150      ),
   151      d.simple_graph(
   152          "Smoothed seat demands",
   153          "apiserver_flowcontrol_demand_seats_smoothed{instance=~\".*(${instance:pipe})\"}",
   154          legend="{{instance}} {{priority_level}}",
   155      ),
   156      d.simple_graph(
   157          "Current seat limit for each Priority Level",
   158          "apiserver_flowcontrol_current_limit_seats{instance=~\".*(${instance:pipe})\"}",
   159          legend="{{instance}} {{priority_level}}",
   160      ),
   161  ]
   162  
   163  HEALTH_PANELS = [
   164      d.simple_graph(
   165          "Unhealthy nodes",
   166          "sum(node_collector_unhealthy_nodes_in_zone) by (zone)",
   167          legend="{{zone}}",
   168      ),
   169      d.simple_graph(
   170          "Pod creations",
   171          'sum(irate(apiserver_request_total{verb="POST", resource="pods", subresource=""}[1m]))',
   172          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   173      ),
   174      d.simple_graph(
   175          "Pod bindings",
   176          'sum(irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding"}[1m]))',
   177          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   178      ),
   179      # It's not clear which "Component restarts" shows more accurate results.
   180      d.simple_graph(
   181          "Component restarts",
   182          "sum(rate(process_start_time_seconds[1m]) > bool 0) by (job, endpoint)",
   183      ),
   184      d.simple_graph(
   185          "Component restarts 2",
   186          'sum(min_over_time(container_start_time_seconds{container!="",container!="POD"}[2m])) by (container)',
   187      ),
   188      d.simple_graph(
   189          "Active component", "sum(leader_election_master_status) by (name, instance)"
   190      ),
   191  ]
   192  
   193  ETCD_PANELS = [
   194      d.simple_graph("etcd leader", "etcd_server_is_leader", legend="{{instance}}"),
   195      d.simple_graph(
   196          "etcd bytes sent",
   197          "rate(etcd_network_client_grpc_sent_bytes_total[1m])",
   198          yAxes=g.single_y_axis(format=g.BYTES_PER_SEC_FORMAT),
   199          legend="{{instance}}",
   200      ),
   201      d.simple_graph(
   202          "etcd operations rate",
   203          d.one_line(
   204              """
   205  sum(
   206    rate(
   207      etcd_request_duration_seconds_count{
   208        operation=~"${etcd_operation:regex}",
   209        type=~".*(${etcd_type:pipe})"
   210      }[1m]
   211    )
   212  ) by (operation, type)
   213  """
   214          ),
   215          yAxes=g.single_y_axis(format=g.OPS_FORMAT),
   216          legend="{{operation}} {{type}}",
   217      ),
   218      d.simple_graph(
   219          "etcd get latency by type (99th percentile)",
   220          d.one_line(
   221              """
   222  histogram_quantile(
   223    0.99,
   224    sum(
   225      rate(
   226        etcd_request_duration_seconds_bucket{
   227          operation=~"${etcd_operation:regex}",
   228          type=~".*(${etcd_type:pipe})"
   229        }[1m]
   230      )
   231    ) by (le, operation, type, instance)
   232  )
   233  """
   234          ),
   235          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   236          legend="{{operation}} {{type}} on {{instance}}",
   237      ),
   238      d.simple_graph(
   239          "etcd get latency by type (50th percentile)",
   240          d.one_line(
   241              """
   242  histogram_quantile(
   243    0.50,
   244    sum(
   245      rate(
   246        etcd_request_duration_seconds_bucket{
   247          operation=~"${etcd_operation:regex}",
   248          type=~".*(${etcd_type:pipe})"
   249        }[1m]
   250      )
   251    ) by (le, operation, type, instance)
   252  )
   253  """
   254          ),
   255          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   256      ),
   257      d.simple_graph("etcd instance id", "sum(etcd_server_id) by (instance, server_id)"),
   258      d.simple_graph(
   259          "etcd network latency (99th percentile)",
   260          "histogram_quantile(0.99, sum(rate(etcd_network_peer_round_trip_time_seconds_bucket[1m])) by (le, instance, To))",
   261          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   262      ),
   263      d.simple_graph(
   264          "etcd compaction keys",
   265          "delta(etcd_debugging_mvcc_db_compaction_keys_total[1m])",
   266      ),
   267      d.simple_graph(
   268          "etcd compaction pause sum duration",
   269          "delta(etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds_sum[1m])",
   270          yAxes=g.single_y_axis(format=g.MILLISECONDS_FORMAT),
   271      ),
   272      d.simple_graph(
   273          "etcd compaction pause num chunks",
   274          "delta(etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds_count[1m])",
   275      ),
   276      d.simple_graph(
   277          "etcd_disk_backend_commit_duration_seconds",
   278          "histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket[1m])) by (le, instance))",
   279          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   280      ),
   281      d.simple_graph(
   282          "etcd wal fsync duration",
   283          "histogram_quantile(1.0, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket[1m])) by (le, endpoint))",
   284          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   285      ),
   286      d.Graph(
   287          title="etcd compaction max pause",
   288          points=True,
   289          lines=False,
   290          targets=[
   291              d.Target(
   292                  expr="histogram_quantile(1.0, sum(rate(etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds_bucket[1m])) by (le, instance))"
   293              )
   294          ],
   295          yAxes=g.single_y_axis(format=g.MILLISECONDS_FORMAT),
   296      ),
   297      d.simple_graph(
   298          "etcd objects",
   299          "sum(etcd_object_counts) by (resource, instance)",
   300          legend="{{instance}}: {{resource}}",
   301      ),
   302      d.simple_graph(
   303          "etcd db size",
   304          [
   305              "etcd_mvcc_db_total_size_in_bytes",
   306              "etcd_mvcc_db_total_size_in_use_in_bytes",
   307              "etcd_server_quota_backend_bytes",
   308          ],
   309          yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
   310      ),
   311  ]
   312  
   313  APISERVER_PANELS = [
   314      d.simple_graph(
   315          "goroutines",
   316          'go_goroutines{job="master", endpoint="apiserver"}',
   317          legend="{{instance}}",
   318      ),
   319      d.simple_graph(
   320          "gc rate",
   321          'rate(go_gc_duration_seconds_count{job="master", endpoint="apiserver"}[1m])',
   322          legend="{{instance}}",
   323      ),
   324      d.simple_graph(
   325          "alloc rate",
   326          'rate(go_memstats_alloc_bytes_total{job="master", endpoint="apiserver"}[1m])',
   327          yAxes=g.single_y_axis(format=g.BYTES_PER_SEC_FORMAT),
   328          legend="{{instance}}",
   329      ),
   330      d.simple_graph(
   331          "Number of active watches",
   332          'sum(apiserver_registered_watchers{kind=~"(?i:(${resource:regex}))s*"}) by (instance, group, version, kind)',
   333          legend="{{instance}}: {{version}}.{{group}}.{{kind}}",
   334      ),
   335      d.simple_graph(
   336          "Watch events rate",
   337          d.one_line(
   338              """
   339  sum(
   340    irate(
   341      apiserver_watch_events_total{
   342        kind=~"(?i:(${resource:regex}))s*"
   343      }[1m]
   344    )
   345  ) by (instance, group, version, kind)"""
   346          ),
   347          legend="{{instance}}: {{version}}.{{group}}.{{kind}}",
   348      ),
   349      d.simple_graph(
   350          "Watch events traffic",
   351          d.one_line(
   352              """
   353  sum(
   354    irate(
   355      apiserver_watch_events_sizes_sum{
   356        kind=~"(?i:(${resource:regex}))s*"
   357     }[1m]
   358    )
   359  ) by (instance, group, version, kind)"""
   360          ),
   361          yAxes=g.single_y_axis(format=g.BYTES_PER_SEC_FORMAT),
   362          legend="{{instance}}: {{version}}.{{group}}.{{kind}}",
   363      ),
   364      d.simple_graph(
   365          "Watch event avg size",
   366          d.one_line(
   367              """
   368  sum(
   369    rate(
   370      apiserver_watch_events_sizes_sum{
   371        kind=~"(?i:(${resource:regex}))s*"
   372      }[1m]
   373    )
   374    /
   375    rate(
   376      apiserver_watch_events_sizes_count{
   377        kind=~"(?i:(${resource:regex}))s*"
   378      }[1m]
   379    )
   380  ) by (instance, group, version, kind)"""
   381          ),
   382          legend="{{instance}}: {{version}}.{{group}}.{{kind}}",
   383      ),
   384      d.simple_graph(
   385          "Watch terminated total",
   386          "sum(rate(apiserver_terminated_watchers_total{}[1m])) by (resource, instance)",
   387          legend="{{instance}}: {{resource}}",
   388      ),
   389      d.simple_graph(
   390          "Inflight requests",
   391          "sum(apiserver_current_inflight_requests) by (requestKind, instance)",
   392          legend="{{instance}}: {{requestKind}}",
   393      ),
   394      d.simple_graph(
   395          "Request rate",
   396          d.one_line(
   397              """
   398  sum(
   399    rate(
   400      apiserver_request_total{
   401        verb=~"${verb:regex}",
   402        resource=~"${resource:regex}s*"
   403      }[1m]
   404    )
   405  ) by (verb, resource, subresource, instance)"""
   406          ),
   407          # TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed
   408          # legend="{{instance}}: {{verb}} {{resource}}",
   409      ),
   410      d.simple_graph(
   411          "Request rate by code",
   412          "sum(rate(apiserver_request_total[1m])) by (code, instance)",
   413          legend="{{instance}}: {{code}}",
   414      ),
   415      d.simple_graph(
   416          "Request latency (50th percentile) (excl. WATCH)",
   417          d.one_line(
   418              """
   419  apiserver:apiserver_request_latency:histogram_quantile{
   420    quantile="0.50",
   421    verb!="WATCH",
   422    verb=~"${verb:regex}",
   423    resource=~"${resource:regex}s*"
   424  }"""
   425          ),
   426          # TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed
   427          # legend="{{verb}} {{scope}}/{{resource}}",
   428          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   429      ),
   430      d.simple_graph(
   431          "Request latency (99th percentile) (excl. WATCH)",
   432          d.one_line(
   433              """
   434  apiserver:apiserver_request_latency:histogram_quantile{
   435    quantile="0.99",
   436    verb!="WATCH",
   437    verb=~"${verb:regex}",
   438     resource=~"${resource:regex}s*"
   439  }"""
   440          ),
   441          # TODO(github.com/grafana/grafana/issues/19410): uncomment once fixed
   442          # legend="{{verb}} {{scope}}/{{resource}}",
   443          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   444      ),
   445      d.simple_graph(
   446          "Traffic (excl. WATCH)",
   447          d.one_line(
   448              """
   449  sum(
   450    rate(
   451      apiserver_response_sizes_sum{
   452        verb!="WATCH",
   453        verb=~"${verb:regex}",
   454        resource=~"${resource:regex}s*"
   455      }[1m]
   456    )
   457  ) by (verb, version, resource, subresource, scope, instance)"""
   458          ),
   459          yAxes=g.single_y_axis(format=g.BYTES_PER_SEC_FORMAT),
   460      ),
   461      d.simple_graph(
   462          "Webhook admission duration (99th percentile)",
   463          "histogram_quantile(0.99, sum(rate(apiserver_admission_webhook_admission_duration_seconds_bucket[1m])) by (le, type, name))",
   464          legend="{{type}}: {{name}}",
   465          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   466      ),
   467      d.simple_graph(
   468          "Request filter latency for each filter type (99th percentile)",
   469          "histogram_quantile(0.99, sum(rate(apiserver_request_filter_duration_seconds_bucket[1m])) by (le, filter))",
   470          legend="{{filter}}",
   471          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   472      ),
   473      d.simple_graph(
   474          "Failed external requests",
   475          'sum(rate(rest_client_requests_total{endpoint="apiserver", code!="200", host!="[::1]:443"}[1m])) by (code, instance, method)',
   476          legend="{{instance}}: {{code}} {{method}}",
   477      ),
   478      d.simple_graph(
   479          "Extrernal requests latency (99th percentile)",
   480          'histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{endpoint="apiserver", host!="[::1]:443"}[1m])) by (verb, host, instance, le))',
   481          legend="{{instance}}: {{verb}} {{host}}",
   482          yAxes=g.single_y_axis(format=g.SECONDS_FORMAT),
   483      )
   484  ]
   485  
   486  CONTROLLER_MANAGER_PANELS = [
   487    d.simple_graph(
   488      "Workqueue depths",
   489      'workqueue_depth{endpoint="kube-controller-manager"}',
   490      legend="{{name}}",
   491    )
   492  ]
   493  
   494  VM_PANELS = [
   495      d.simple_graph(
   496          "fs bytes reads by container",
   497          "sum(rate(container_fs_reads_bytes_total[1m])) by (container, instance)",
   498          legend="{{instance}}: {{container}}",
   499          yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
   500      ),
   501      d.simple_graph(
   502          "fs reads by container",
   503          "sum(rate(container_fs_reads_total[1m])) by (container, instance)",
   504          legend="{{instance}}: {{container}}",
   505      ),
   506      d.simple_graph(
   507          "fs bytes writes by container",
   508          "sum(rate(container_fs_writes_bytes_total[1m])) by (container, instance)",
   509          legend="{{instance}}: {{container}}",
   510          yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
   511      ),
   512      d.simple_graph(
   513          "fs writes by container",
   514          "sum(rate(container_fs_writes_total[1m])) by (container, instance)",
   515          legend="{{instance}}: {{container}}",
   516      ),
   517      d.Graph(
   518          title="CPU usage by container",
   519          targets=[
   520              d.TargetWithInterval(
   521                  expr='sum(rate(container_cpu_usage_seconds_total{container!=""}[1m])) by (container, instance)',
   522                  legendFormat="{{instance}}: {{container}}",
   523              ),
   524              d.TargetWithInterval(expr="machine_cpu_cores", legendFormat="limit"),
   525          ],
   526      ),
   527      d.Graph(
   528          title="memory usage by container",
   529          targets=[
   530              d.TargetWithInterval(
   531                  expr='sum(container_memory_usage_bytes{container!=""}) by (container, instance)',
   532                  legendFormat="{{instance}}: {{container}}",
   533              ),
   534              d.TargetWithInterval(expr="machine_memory_bytes", legendFormat="limit"),
   535          ],
   536          yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
   537      ),
   538      d.Graph(
   539          title="memory working set by container",
   540          targets=[
   541              d.TargetWithInterval(
   542                  expr='sum(container_memory_working_set_bytes{container!=""}) by (container, instance)',
   543                  legendFormat="{{instance}}: {{container}}",
   544              ),
   545              d.TargetWithInterval(expr="machine_memory_bytes", legendFormat="limit"),
   546          ],
   547          yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
   548      ),
   549      d.Graph(
   550          title="Network usage (bytes)",
   551          targets=[
   552              d.Target(
   553                  expr='rate(container_network_transmit_bytes_total{id="/"}[1m])',
   554                  legendFormat="{{instance}} transmit",
   555              ),
   556              d.Target(
   557                  expr='rate(container_network_receive_bytes_total{id="/"}[1m])',
   558                  legendFormat="{{instance}} receive",
   559              ),
   560          ],
   561          yAxes=g.single_y_axis(format=g.BYTES_PER_SEC_FORMAT),
   562      ),
   563      d.Graph(
   564          title="Network usage (packets)",
   565          targets=[
   566              d.Target(
   567                  expr='rate(container_network_transmit_packets_total{id="/"}[1m])',
   568                  legendFormat="{{instance}} transmit",
   569              ),
   570              d.Target(
   571                  expr='rate(container_network_receive_packets_total{id="/"}[1m])',
   572                  legendFormat="{{instance}} receive",
   573              ),
   574          ],
   575      ),
   576      d.Graph(
   577          title="Network usage (avg packet size)",
   578          targets=[
   579              d.Target(
   580                  expr='rate(container_network_transmit_bytes_total{id="/"}[1m]) / rate(container_network_transmit_packets_total{id="/"}[1m])',
   581                  legendFormat="{{instance}} transmit",
   582              ),
   583              d.Target(
   584                  expr='rate(container_network_receive_bytes_total{id="/"}[1m]) / rate(container_network_receive_packets_total{id="/"}[1m])',
   585                  legendFormat="{{instance}} receive",
   586              ),
   587          ],
   588          yAxes=g.single_y_axis(format=g.BYTES_FORMAT),
   589      ),
   590      d.Graph(
   591          title="Network tcp segments",
   592          targets=[
   593              d.Target(
   594                  expr="sum(rate(node_netstat_Tcp_InSegs[1m])) by (instance)",
   595                  legendFormat="InSegs {{instance}}",
   596              ),
   597              d.Target(
   598                  expr="sum(rate(node_netstat_Tcp_OutSegs[1m])) by (instance)",
   599                  legendFormat="OutSegs {{instance}}",
   600              ),
   601              d.Target(
   602                  expr="sum(rate(node_netstat_Tcp_RetransSegs[1m])) by (instance)",
   603                  legendFormat="RetransSegs {{instance}}",
   604              ),
   605          ],
   606          yAxes=g.single_y_axis(format=g.SHORT_FORMAT, logBase=10),
   607      ),
   608  ]