github.com/redhat-appstudio/e2e-tests@v0.0.0-20240520140907-9709f6f59323/tests/load-tests/cluster_read_config.yaml (about)

     1  - name: measurements.tekton_pipelines_controller_running_pipelineruns_count
     2    monitoring_query: sum(tekton_pipelines_controller_running_pipelineruns_count)
     3    monitoring_step: 15
     4  
     5  - name: measurements.storage_count_attachable_volumes_in_use
     6    monitoring_query: sum(storage_count_attachable_volumes_in_use)
     7    monitoring_step: 15
     8  
     9  - name: measurements.cluster_cpu_usage_seconds_total_rate
    10    monitoring_query: sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=""})
    11    monitoring_step: 15
    12  
    13  - name: measurements.cluster_memory_usage_rss_total
    14    monitoring_query: sum(container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", container!=""})
    15    monitoring_step: 15
    16  
    17  - name: measurements.cluster_disk_throughput_total
    18    monitoring_query: sum (rate(container_fs_reads_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]) + rate(container_fs_writes_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]))
    19    monitoring_step: 15
    20  
    21  - name: measurements.token_pool_rate_primary
    22    monitoring_query: sum(rate(token_pool_gauge{rateLimited="primary"}[5m]))
    23    monitoring_step: 15
    24  
    25  - name: measurements.token_pool_rate_secondary
    26    monitoring_query: sum(rate(token_pool_gauge{rateLimited="secondary"}[5m]))
    27    monitoring_step: 15
    28  
    29  - name: measurements.cluster_nodes_worker_count
    30    monitoring_query: count(kube_node_role{role="worker"})
    31    monitoring_step: 15
    32  
    33  - name: measurements.cluster_pods_count
    34    monitoring_query: count(kube_pod_info)
    35    monitoring_step: 15
    36  
    37  - name: measurements.cluster_running_pods_on_workers_count
    38    monitoring_query: count(kube_pod_info * on(node) group_left(role) kube_node_role{role="worker"} and on(pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"} > 0))
    39    monitoring_step: 15
    40  
    41  - name: measurements.scheduler_pending_pods_count
    42    monitoring_query: sum(scheduler_pending_pods)
    43    monitoring_step: 15
    44  
    45  - name: measurements.tekton_tekton_pipelines_controller_workqueue_depth
    46    monitoring_query: sum(tekton_pipelines_controller_workqueue_depth)
    47    monitoring_step: 15
    48  
    49  - name: measurements.pipelinerun_duration_scheduled_seconds
    50    monitoring_query: sum(pipelinerun_duration_scheduled_seconds_sum / pipelinerun_duration_scheduled_seconds_count)
    51    monitoring_step: 15
    52  
    53  - name: measurements.tekton_pipelines_controller_running_taskruns_throttled_by_node
    54    monitoring_query: sum(tekton_pipelines_controller_running_taskruns_throttled_by_node_count)
    55    monitoring_step: 15
    56  
    57  - name: measurements.tekton_pipelines_controller_running_taskruns_throttled_by_quota
    58    monitoring_query: sum(tekton_pipelines_controller_running_taskruns_throttled_by_quota_count)
    59    monitoring_step: 15
    60  
    61  - name: measurements.etcd_request_duration_seconds_average
    62    monitoring_query: sum(rate(etcd_request_duration_seconds_sum{}[5m])) / sum(rate(etcd_request_duration_seconds_count[5m]))
    63    monitoring_step: 15
    64  
    65  - name: measurements.cluster_network_bytes_total
    66    monitoring_query: sum(irate(container_network_receive_bytes_total{cluster="",namespace=~".*"}[5m])) + sum(irate(container_network_transmit_bytes_total{cluster="",namespace=~".*"}[5m]))
    67    monitoring_step: 15
    68  
    69  - name: measurements.cluster_network_receive_bytes_total
    70    monitoring_query: sum(irate(container_network_receive_bytes_total{cluster="",namespace=~".*"}[5m]))
    71    monitoring_step: 15
    72  
    73  - name: measurements.cluster_network_transmit_bytes_total
    74    monitoring_query: sum(irate(container_network_transmit_bytes_total{cluster="",namespace=~".*"}[5m]))
    75    monitoring_step: 15
    76  
    77  - name: measurements.node_disk_io_time_seconds_total
    78    monitoring_query: sum(irate(node_disk_io_time_seconds_total{cluster="",namespace=~".*"}[5m]))
    79    monitoring_step: 15
    80  
    81  # redhat-appstudio metrics
    82  # Availability of GitHub app
    83  - name: measurements.redhat_appstudio_buildservice_global_github_app_available
    84    monitoring_query: sum(redhat_appstudio_buildservice_global_github_app_available)
    85    monitoring_step: 15
    86  
    87  # Component creation til simple build pipeline submision or PaC provision in seconds
    88  - name: measurements.redhat_appstudio_buildservice_component_onboarding_time_sum
    89    monitoring_query: sum(redhat_appstudio_buildservice_component_onboarding_time_sum)
    90    monitoring_step: 15
    91  
    92  # Image repository provision to ready to use in seconds
    93  - name: measurements.redhat_appstudio_imagecontroller_image_repository_provision_time_sum
    94    monitoring_query: sum(redhat_appstudio_imagecontroller_image_repository_provision_time_sum)
    95    monitoring_step: 15
    96  
    97  # Interesting CI environment variables
    98  {% for var in [
    99    'BUILD_ID',
   100    'HOSTNAME',
   101    'JOB_NAME',
   102    'OPENSHIFT_API',
   103    'PROW_JOB_ID',
   104    'PULL_BASE_REF',
   105    'PULL_BASE_SHA',
   106    'PULL_HEAD_REF',
   107    'PULL_NUMBER',
   108    'PULL_PULL_SHA',
   109    'PULL_REFS',
   110    'REPO_NAME',
   111    'REPO_OWNER',
   112    'SCENARIO',
   113  ] %}
   114  - name: metadata.env.{{ var }}
   115    env_variable: {{ var }}
   116  {% endfor %}
   117  
   118  # Git info
   119  {% macro git_info(dir, path) -%}
   120  - name: metadata.git.{{ path }}.commit.hash
   121    command: cd "{{ dir }}" && git log -1 --pretty=format:"%H"
   122  - name: metadata.git.{{ path }}.commit.abbreviated_hash
   123    command: cd "{{ dir }}" && git log -1 --pretty=format:"%h"
   124  - name: metadata.git.{{ path }}.commit.author_date
   125    command: cd "{{ dir }}" && git log -1 --pretty=format:"%aI"
   126  - name: metadata.git.{{ path }}.commit.committer_date
   127    command: cd "{{ dir }}" && git log -1 --pretty=format:"%cI"
   128  - name: metadata.git.{{ path }}.commit.subject
   129    command: cd "{{ dir }}" && git log -1 --pretty=format:"%s"
   130  - name: metadata.git.{{ path }}.commit.author_name
   131    command: cd "{{ dir }}" && git log -1 --pretty=format:"%aN"
   132  - name: metadata.git.{{ path }}.commit.author_email
   133    command: cd "{{ dir }}" && git log -1 --pretty=format:"%aE"
   134  {%- endmacro %}
   135  {{ git_info('.', 'redhat_appstudio.e2e_tests') }}
   136  {{ git_info('tmp/infra-deployments', 'redhat_appstudio.infra_deployments') }}
   137  
   138  # Cluster version
   139  - name: metadata.cluster.versions
   140    command: oc version -o json
   141    output: json
   142  
   143  # Cluster nodes info
   144  - name: metadata.cluster.control-plane.count
   145    command: oc get nodes -l node-role.kubernetes.io/master -o name | wc -l
   146  
   147  - name: metadata.cluster.control-plane.flavor
   148    command: oc get nodes -l node-role.kubernetes.io/master -o json | jq --raw-output '.items | map(.metadata.labels."beta.kubernetes.io/instance-type") | unique | sort | join(",")'
   149  
   150  - name: metadata.cluster.control-plane.nodes
   151    command: oc get nodes -l node-role.kubernetes.io/master -o json | jq '.items | map(.metadata.name)'
   152    output: json
   153  
   154  - name: metadata.cluster.compute-nodes.count
   155    command: oc get nodes -l node-role.kubernetes.io/worker -o name | wc -l
   156  
   157  - name: metadata.cluster.compute-nodes.flavor
   158    command: oc get nodes -l node-role.kubernetes.io/worker -o json | jq --raw-output '.items | map(.metadata.labels."beta.kubernetes.io/instance-type") | unique | sort | join(",")'
   159  
   160  - name: metadata.cluster.compute-nodes.nodes
   161    command: oc get nodes -l node-role.kubernetes.io/worker -o json | jq '.items | map(.metadata.name)'
   162    output: json
   163  
   164  - name: metadata.scenario
   165    command: if [ -r /usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${SCENARIO} ]; then cat /usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${SCENARIO} | sed 's/\\ /,/g' | sed 's/[^ ]* \([^= ]*\)=\([^= ]*\)/"\1":"\2",/g' | sed 's/\(.*\),$/{\1}/g'; else echo '{}'; fi
   166    output: json
   167  
   168  {% macro monitor_pod(namespace, pod, step=15, pod_suffix_regex='-[0-9a-f]+-.*') -%}
   169  # Gather monitoring data about the pod
   170  - name: measurements.{{ pod }}.cpu
   171    monitoring_query: sum(pod:container_cpu_usage:sum{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'})
   172    monitoring_step: {{ step }}
   173  - name: measurements.{{ pod }}.memory
   174    monitoring_query: sum(container_memory_usage_bytes{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', container!='POD', container!=''})
   175    monitoring_step: {{ step }}
   176  - name: measurements.{{ pod }}.network_throughput
   177    monitoring_query: sum( rate(container_network_transmit_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) + rate(container_network_receive_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) )
   178    monitoring_step: {{ step * 4 }}
   179  - name: measurements.{{ pod }}.network_drop
   180    monitoring_query: sum( rate(container_network_transmit_packets_dropped_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) + rate(container_network_receive_packets_dropped_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) )
   181    monitoring_step: {{ step * 4 }}
   182  - name: measurements.{{ pod }}.disk_throughput
   183    monitoring_query: sum( sum(rate(container_fs_reads_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', device!='/dev/dm-0'}[{{ step * 4 }}s])) + sum(rate(container_fs_writes_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', device!='/dev/dm-0'}[{{ step * 4 }}s])) )
   184    monitoring_step: {{ step * 4 }}
   185  - name: measurements.{{ pod }}.restarts
   186    monitoring_query: sum(kube_pod_container_status_restarts_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'})
   187    monitoring_step: {{ step }}
   188  - name: measurements.{{ pod }}.count_ready
   189    monitoring_query: sum( kube_pod_status_ready{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'} )
   190    monitoring_step: {{ step }}
   191  {%- endmacro %}
   192  
   193  {% macro monitor_pod_container(namespace, pod, container, step=15, pod_suffix_regex='-[0-9a-f]+-.*') -%}
   194  # Gather monitoring data about the pod's container
   195  - name: measurements.{{ pod }}.container[{{ container }}].memory
   196    monitoring_query: sum(container_memory_usage_bytes{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', container='{{container}}'})
   197    monitoring_step: {{ step }}
   198  {%- endmacro %}
   199  
   200  {{ monitor_pod('openshift-pipelines', 'tekton-pipelines-controller', 15) }}
   201  {{ monitor_pod('tekton-results', 'tekton-results-watcher', 1, '-.*') }}
   202  {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 1, '-.*') }}