github.com/redhat-appstudio/e2e-tests@v0.0.0-20240520140907-9709f6f59323/tests/load-tests/cluster_read_config.yaml (about) 1 - name: measurements.tekton_pipelines_controller_running_pipelineruns_count 2 monitoring_query: sum(tekton_pipelines_controller_running_pipelineruns_count) 3 monitoring_step: 15 4 5 - name: measurements.storage_count_attachable_volumes_in_use 6 monitoring_query: sum(storage_count_attachable_volumes_in_use) 7 monitoring_step: 15 8 9 - name: measurements.cluster_cpu_usage_seconds_total_rate 10 monitoring_query: sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=""}) 11 monitoring_step: 15 12 13 - name: measurements.cluster_memory_usage_rss_total 14 monitoring_query: sum(container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", container!=""}) 15 monitoring_step: 15 16 17 - name: measurements.cluster_disk_throughput_total 18 monitoring_query: sum (rate(container_fs_reads_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m]) + rate(container_fs_writes_bytes_total{id!="", device=~"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+", cluster=""}[5m])) 19 monitoring_step: 15 20 21 - name: measurements.token_pool_rate_primary 22 monitoring_query: sum(rate(token_pool_gauge{rateLimited="primary"}[5m])) 23 monitoring_step: 15 24 25 - name: measurements.token_pool_rate_secondary 26 monitoring_query: sum(rate(token_pool_gauge{rateLimited="secondary"}[5m])) 27 monitoring_step: 15 28 29 - name: measurements.cluster_nodes_worker_count 30 monitoring_query: count(kube_node_role{role="worker"}) 31 monitoring_step: 15 32 33 - name: measurements.cluster_pods_count 34 monitoring_query: count(kube_pod_info) 35 monitoring_step: 15 36 37 - name: measurements.cluster_running_pods_on_workers_count 38 monitoring_query: count(kube_pod_info * on(node) group_left(role) kube_node_role{role="worker"} and on(pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"} > 0)) 39 monitoring_step: 15 40 41 - name: measurements.scheduler_pending_pods_count 42 monitoring_query: sum(scheduler_pending_pods) 43 monitoring_step: 15 44 45 - name: measurements.tekton_tekton_pipelines_controller_workqueue_depth 46 monitoring_query: sum(tekton_pipelines_controller_workqueue_depth) 47 monitoring_step: 15 48 49 - name: measurements.pipelinerun_duration_scheduled_seconds 50 monitoring_query: sum(pipelinerun_duration_scheduled_seconds_sum / pipelinerun_duration_scheduled_seconds_count) 51 monitoring_step: 15 52 53 - name: measurements.tekton_pipelines_controller_running_taskruns_throttled_by_node 54 monitoring_query: sum(tekton_pipelines_controller_running_taskruns_throttled_by_node_count) 55 monitoring_step: 15 56 57 - name: measurements.tekton_pipelines_controller_running_taskruns_throttled_by_quota 58 monitoring_query: sum(tekton_pipelines_controller_running_taskruns_throttled_by_quota_count) 59 monitoring_step: 15 60 61 - name: measurements.etcd_request_duration_seconds_average 62 monitoring_query: sum(rate(etcd_request_duration_seconds_sum{}[5m])) / sum(rate(etcd_request_duration_seconds_count[5m])) 63 monitoring_step: 15 64 65 - name: measurements.cluster_network_bytes_total 66 monitoring_query: sum(irate(container_network_receive_bytes_total{cluster="",namespace=~".*"}[5m])) + sum(irate(container_network_transmit_bytes_total{cluster="",namespace=~".*"}[5m])) 67 monitoring_step: 15 68 69 - name: measurements.cluster_network_receive_bytes_total 70 monitoring_query: sum(irate(container_network_receive_bytes_total{cluster="",namespace=~".*"}[5m])) 71 monitoring_step: 15 72 73 - name: measurements.cluster_network_transmit_bytes_total 74 monitoring_query: sum(irate(container_network_transmit_bytes_total{cluster="",namespace=~".*"}[5m])) 75 monitoring_step: 15 76 77 - name: measurements.node_disk_io_time_seconds_total 78 monitoring_query: sum(irate(node_disk_io_time_seconds_total{cluster="",namespace=~".*"}[5m])) 79 monitoring_step: 15 80 81 # redhat-appstudio metrics 82 # Availability of GitHub app 83 - name: measurements.redhat_appstudio_buildservice_global_github_app_available 84 monitoring_query: sum(redhat_appstudio_buildservice_global_github_app_available) 85 monitoring_step: 15 86 87 # Component creation til simple build pipeline submision or PaC provision in seconds 88 - name: measurements.redhat_appstudio_buildservice_component_onboarding_time_sum 89 monitoring_query: sum(redhat_appstudio_buildservice_component_onboarding_time_sum) 90 monitoring_step: 15 91 92 # Image repository provision to ready to use in seconds 93 - name: measurements.redhat_appstudio_imagecontroller_image_repository_provision_time_sum 94 monitoring_query: sum(redhat_appstudio_imagecontroller_image_repository_provision_time_sum) 95 monitoring_step: 15 96 97 # Interesting CI environment variables 98 {% for var in [ 99 'BUILD_ID', 100 'HOSTNAME', 101 'JOB_NAME', 102 'OPENSHIFT_API', 103 'PROW_JOB_ID', 104 'PULL_BASE_REF', 105 'PULL_BASE_SHA', 106 'PULL_HEAD_REF', 107 'PULL_NUMBER', 108 'PULL_PULL_SHA', 109 'PULL_REFS', 110 'REPO_NAME', 111 'REPO_OWNER', 112 'SCENARIO', 113 ] %} 114 - name: metadata.env.{{ var }} 115 env_variable: {{ var }} 116 {% endfor %} 117 118 # Git info 119 {% macro git_info(dir, path) -%} 120 - name: metadata.git.{{ path }}.commit.hash 121 command: cd "{{ dir }}" && git log -1 --pretty=format:"%H" 122 - name: metadata.git.{{ path }}.commit.abbreviated_hash 123 command: cd "{{ dir }}" && git log -1 --pretty=format:"%h" 124 - name: metadata.git.{{ path }}.commit.author_date 125 command: cd "{{ dir }}" && git log -1 --pretty=format:"%aI" 126 - name: metadata.git.{{ path }}.commit.committer_date 127 command: cd "{{ dir }}" && git log -1 --pretty=format:"%cI" 128 - name: metadata.git.{{ path }}.commit.subject 129 command: cd "{{ dir }}" && git log -1 --pretty=format:"%s" 130 - name: metadata.git.{{ path }}.commit.author_name 131 command: cd "{{ dir }}" && git log -1 --pretty=format:"%aN" 132 - name: metadata.git.{{ path }}.commit.author_email 133 command: cd "{{ dir }}" && git log -1 --pretty=format:"%aE" 134 {%- endmacro %} 135 {{ git_info('.', 'redhat_appstudio.e2e_tests') }} 136 {{ git_info('tmp/infra-deployments', 'redhat_appstudio.infra_deployments') }} 137 138 # Cluster version 139 - name: metadata.cluster.versions 140 command: oc version -o json 141 output: json 142 143 # Cluster nodes info 144 - name: metadata.cluster.control-plane.count 145 command: oc get nodes -l node-role.kubernetes.io/master -o name | wc -l 146 147 - name: metadata.cluster.control-plane.flavor 148 command: oc get nodes -l node-role.kubernetes.io/master -o json | jq --raw-output '.items | map(.metadata.labels."beta.kubernetes.io/instance-type") | unique | sort | join(",")' 149 150 - name: metadata.cluster.control-plane.nodes 151 command: oc get nodes -l node-role.kubernetes.io/master -o json | jq '.items | map(.metadata.name)' 152 output: json 153 154 - name: metadata.cluster.compute-nodes.count 155 command: oc get nodes -l node-role.kubernetes.io/worker -o name | wc -l 156 157 - name: metadata.cluster.compute-nodes.flavor 158 command: oc get nodes -l node-role.kubernetes.io/worker -o json | jq --raw-output '.items | map(.metadata.labels."beta.kubernetes.io/instance-type") | unique | sort | join(",")' 159 160 - name: metadata.cluster.compute-nodes.nodes 161 command: oc get nodes -l node-role.kubernetes.io/worker -o json | jq '.items | map(.metadata.name)' 162 output: json 163 164 - name: metadata.scenario 165 command: if [ -r /usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${SCENARIO} ]; then cat /usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${SCENARIO} | sed 's/\\ /,/g' | sed 's/[^ ]* \([^= ]*\)=\([^= ]*\)/"\1":"\2",/g' | sed 's/\(.*\),$/{\1}/g'; else echo '{}'; fi 166 output: json 167 168 {% macro monitor_pod(namespace, pod, step=15, pod_suffix_regex='-[0-9a-f]+-.*') -%} 169 # Gather monitoring data about the pod 170 - name: measurements.{{ pod }}.cpu 171 monitoring_query: sum(pod:container_cpu_usage:sum{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}) 172 monitoring_step: {{ step }} 173 - name: measurements.{{ pod }}.memory 174 monitoring_query: sum(container_memory_usage_bytes{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', container!='POD', container!=''}) 175 monitoring_step: {{ step }} 176 - name: measurements.{{ pod }}.network_throughput 177 monitoring_query: sum( rate(container_network_transmit_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) + rate(container_network_receive_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) ) 178 monitoring_step: {{ step * 4 }} 179 - name: measurements.{{ pod }}.network_drop 180 monitoring_query: sum( rate(container_network_transmit_packets_dropped_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) + rate(container_network_receive_packets_dropped_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}[{{ step * 4 }}s]) ) 181 monitoring_step: {{ step * 4 }} 182 - name: measurements.{{ pod }}.disk_throughput 183 monitoring_query: sum( sum(rate(container_fs_reads_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', device!='/dev/dm-0'}[{{ step * 4 }}s])) + sum(rate(container_fs_writes_bytes_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', device!='/dev/dm-0'}[{{ step * 4 }}s])) ) 184 monitoring_step: {{ step * 4 }} 185 - name: measurements.{{ pod }}.restarts 186 monitoring_query: sum(kube_pod_container_status_restarts_total{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'}) 187 monitoring_step: {{ step }} 188 - name: measurements.{{ pod }}.count_ready 189 monitoring_query: sum( kube_pod_status_ready{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}'} ) 190 monitoring_step: {{ step }} 191 {%- endmacro %} 192 193 {% macro monitor_pod_container(namespace, pod, container, step=15, pod_suffix_regex='-[0-9a-f]+-.*') -%} 194 # Gather monitoring data about the pod's container 195 - name: measurements.{{ pod }}.container[{{ container }}].memory 196 monitoring_query: sum(container_memory_usage_bytes{namespace='{{ namespace }}', pod=~'{{ pod }}{{ pod_suffix_regex }}', container='{{container}}'}) 197 monitoring_step: {{ step }} 198 {%- endmacro %} 199 200 {{ monitor_pod('openshift-pipelines', 'tekton-pipelines-controller', 15) }} 201 {{ monitor_pod('tekton-results', 'tekton-results-watcher', 1, '-.*') }} 202 {{ monitor_pod_container('tekton-results', 'tekton-results-watcher', 'watcher', 1, '-.*') }}