github.com/redhat-appstudio/e2e-tests@v0.0.0-20240520140907-9709f6f59323/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh (about) 1 #!/bin/bash 2 3 set -o nounset 4 set -o errexit 5 set -o pipefail 6 7 # shellcheck disable=SC1090 8 source "/usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${1:-concurrent}" 9 10 pushd "${2:-.}" 11 12 output_dir="${OUTPUT_DIR:-./tests/load-tests}" 13 14 csv_delim=";" 15 csv_delim_quoted="\"$csv_delim\"" 16 dt_format='"%Y-%m-%dT%H:%M:%SZ"' 17 18 collect_artifacts() { 19 echo "Collecting load test artifacts.." 20 mkdir -p "${ARTIFACT_DIR}/logs" 21 find "$output_dir" -type f -name 'load-tests.max-concurrency.*.log' -exec cp -vf {} "${ARTIFACT_DIR}/logs" \; 22 find "$output_dir" -type f -name 'load-tests.max-concurrency.json' -exec cp -vf {} "${ARTIFACT_DIR}" \; 23 mkdir -p "${ARTIFACT_DIR}/pprof" 24 find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "${ARTIFACT_DIR}/pprof" \; 25 } 26 27 collect_monitoring_data() { 28 echo "Collecting monitoring data..." 29 echo "Setting up tool to collect monitoring data..." 30 python3 -m venv venv 31 set +u 32 # shellcheck disable=SC1091 33 source venv/bin/activate 34 set -u 35 python3 -m pip install -U pip 36 python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core" 37 38 ## Monitoring data for entire test 39 monitoring_collection_data=$(find "$output_dir" -type f -name 'load-tests.max-concurrency.json') 40 monitoring_collection_log="$ARTIFACT_DIR/monitoring-collection.log" 41 monitoring_collection_dir=$ARTIFACT_DIR/monitoring-collection-raw-data-dir 42 mkdir -p "$monitoring_collection_dir" 43 echo "Collecting monitoring data for entire test" 44 mstart=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get startTimestamp)" --iso-8601=seconds) 45 mend=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get endTimestamp)" --iso-8601=seconds) 46 mhost=$(oc -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host') 47 status_data.py \ 48 --status-data-file "$monitoring_collection_data" \ 49 --additional ./tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml \ 50 --monitoring-start "$mstart" \ 51 --monitoring-end "$mend" \ 52 --monitoring-raw-data-dir "$monitoring_collection_dir" \ 53 --prometheus-host "https://$mhost" \ 54 --prometheus-port 443 \ 55 --prometheus-token "$(oc whoami -t)" \ 56 -d &>"$monitoring_collection_log" 57 cp -f "$monitoring_collection_data" "$ARTIFACT_DIR" 58 59 ## Monitoring data per iteration 60 for monitoring_collection_data in $(find "$output_dir" -type f -name 'load-tests.max-concurrency.*.json'); do 61 iteration_index=$(echo "$monitoring_collection_data" | sed -e 's,.*/load-tests.max-concurrency.\([0-9]\+-[0-9]\+\).json,\1,') 62 monitoring_collection_log="$ARTIFACT_DIR/monitoring-collection.$iteration_index.log" 63 echo "Collecting monitoring data for step $iteration_index..." 64 mstart=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get timestamp)" --iso-8601=seconds) 65 mend=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get endTimestamp)" --iso-8601=seconds) 66 mhost=$(oc -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host') 67 status_data.py \ 68 --status-data-file "$monitoring_collection_data" \ 69 --additional ./tests/load-tests/cluster_read_config.yaml \ 70 --monitoring-start "$mstart" \ 71 --monitoring-end "$mend" \ 72 --prometheus-host "https://$mhost" \ 73 --prometheus-port 443 \ 74 --prometheus-token "$(oc whoami -t)" \ 75 -d &>"$monitoring_collection_log" 76 cp -f "$monitoring_collection_data" "$ARTIFACT_DIR" 77 done 78 set +u 79 deactivate 80 set -u 81 } 82 83 collect_tekton_profiling_data() { 84 echo "Collecting Tekton prifiling data..." 85 if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ] || [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then 86 echo "Collecting profiling data from Tekton" 87 for pprof_profile in $(find "$output_dir" -name "*.pprof"); do 88 if [ -s "$pprof_profile" ]; then 89 file=$(basename "$pprof_profile") 90 go tool pprof -text "$pprof_profile" >"$ARTIFACT_DIR/pprof/$file.txt" || true 91 go tool pprof -svg -output="$ARTIFACT_DIR/pprof/$file.svg" "$pprof_profile" || true 92 fi 93 done 94 fi 95 } 96 97 get_tekton_results_watcher_pod_count() { 98 find "$output_dir" -type f -name 'tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.0001-*.pprof' | wc -l 99 } 100 101 collect_scalability_data() { 102 echo "Collecting scalability data..." 103 104 tekton_results_watcher_pod_count=$(get_tekton_results_watcher_pod_count) 105 tekton_results_watcher_pod_headers="" 106 for i in $(seq -w 1 "$tekton_results_watcher_pod_count"); do 107 tekton_results_watcher_pod_headers="${tekton_results_watcher_pod_headers}${csv_delim}ParkedGoRoutinesPod$i" 108 done 109 110 max_concurrency_csv=$ARTIFACT_DIR/max-concurrency.csv 111 echo "Iteration\ 112 ${csv_delim}Threads\ 113 ${csv_delim}WorkloadKPI\ 114 ${csv_delim}Errors\ 115 ${csv_delim}UserAvgTime\ 116 ${csv_delim}UserMaxTime\ 117 ${csv_delim}ApplicationAvgTime\ 118 ${csv_delim}ApplicationMaxTime\ 119 ${csv_delim}CDQAvgTime\ 120 ${csv_delim}CDQMaxTime\ 121 ${csv_delim}ComponentsAvgTime\ 122 ${csv_delim}ComponentsMaxTime\ 123 ${csv_delim}PipelineRunAvgTime\ 124 ${csv_delim}PipelineRunMaxTime\ 125 ${csv_delim}IntegrationTestsRunPipelineSucceededTimeAvg\ 126 ${csv_delim}IntegrationTestsRunPipelineSucceededTimeMax\ 127 ${csv_delim}DeploymentSucceededTimeAvg\ 128 ${csv_delim}DeploymentSucceededTimeMax\ 129 ${csv_delim}ClusterCPUUsageAvg\ 130 ${csv_delim}ClusterDiskUsageAvg\ 131 ${csv_delim}ClusterMemoryUsageAvg\ 132 ${csv_delim}ClusterPodCountAvg\ 133 ${csv_delim}ClusterNodesWorkerCountAvg\ 134 ${csv_delim}ClusterRunningPodsOnWorkersCountAvg\ 135 ${csv_delim}ClusterPVCInUseAvg\ 136 ${csv_delim}TektonResultsWatcherMemoryMin\ 137 ${csv_delim}TektonResultsWatcherMemoryMax\ 138 ${csv_delim}TektonResultsWatcherMemoryRange\ 139 ${tekton_results_watcher_pod_headers}\ 140 ${csv_delim}SchedulerPendingPodsCountAvg\ 141 ${csv_delim}TokenPoolRatePrimaryAvg\ 142 ${csv_delim}TokenPoolRateSecondaryAvg\ 143 ${csv_delim}ClusterPipelineRunCountAvg\ 144 ${csv_delim}ClusterPipelineWorkqueueDepthAvg\ 145 ${csv_delim}ClusterPipelineScheduleFirstPodAvg\ 146 ${csv_delim}ClusterTaskRunThrottledByNodeResourcesAvg\ 147 ${csv_delim}ClusterTaskRunThrottledByDefinedQuotaAvg\ 148 ${csv_delim}EtcdRequestDurationSecondsAvg\ 149 ${csv_delim}ClusterNetworkBytesTotalAvg\ 150 ${csv_delim}ClusterNetworkReceiveBytesTotalAvg\ 151 ${csv_delim}ClusterNetworkTransmitBytesTotalAvg\ 152 ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \ 153 >"$max_concurrency_csv" 154 mc_files=$(find "$output_dir" -type f -name 'load-tests.max-concurrency.*.json') 155 if [ -n "$mc_files" ]; then 156 for i in $mc_files; do 157 iteration_index=$(echo "$i" | sed -e 's,'"$output_dir"'/load-tests.max-concurrency.\([0-9-]\+\).*,\1,g') 158 159 parked_go_routines=$(get_parked_go_routines "$iteration_index") 160 parked_go_routines_columns="" 161 if [ -n "$parked_go_routines" ]; then 162 for g in $parked_go_routines; do 163 parked_go_routines_columns="$parked_go_routines_columns + $csv_delim_quoted + \"$g\"" 164 done 165 else 166 for _ in $(seq 1 "$(get_tekton_results_watcher_pod_count)"); do 167 parked_go_routines_columns="$parked_go_routines_columns + $csv_delim_quoted" 168 done 169 fi 170 jq -rc "(.metadata.\"max-concurrency\".iteration | tostring) \ 171 + $csv_delim_quoted + (.threads | tostring) \ 172 + $csv_delim_quoted + (.workloadKPI | tostring) \ 173 + $csv_delim_quoted + (.errorsTotal | tostring) \ 174 + $csv_delim_quoted + (.createUserTimeAvg | tostring) \ 175 + $csv_delim_quoted + (.createUserTimeMax | tostring) \ 176 + $csv_delim_quoted + (.createApplicationsTimeAvg | tostring) \ 177 + $csv_delim_quoted + (.createApplicationsTimeMax | tostring) \ 178 + $csv_delim_quoted + (.createCDQsTimeAvg | tostring) \ 179 + $csv_delim_quoted + (.createCDQsTimeMax | tostring) \ 180 + $csv_delim_quoted + (.createComponentsTimeAvg | tostring) \ 181 + $csv_delim_quoted + (.createComponentsTimeMax | tostring) \ 182 + $csv_delim_quoted + (.runPipelineSucceededTimeAvg | tostring) \ 183 + $csv_delim_quoted + (.runPipelineSucceededTimeMax | tostring) \ 184 + $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeAvg | tostring) \ 185 + $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeMax | tostring) \ 186 + $csv_delim_quoted + (.deploymentSucceededTimeAvg | tostring) \ 187 + $csv_delim_quoted + (.deploymentSucceededTimeMax | tostring) \ 188 + $csv_delim_quoted + (.measurements.cluster_cpu_usage_seconds_total_rate.mean | tostring) \ 189 + $csv_delim_quoted + (.measurements.cluster_disk_throughput_total.mean | tostring) \ 190 + $csv_delim_quoted + (.measurements.cluster_memory_usage_rss_total.mean | tostring) \ 191 + $csv_delim_quoted + (.measurements.cluster_pods_count.mean | tostring) \ 192 + $csv_delim_quoted + (.measurements.cluster_nodes_worker_count.mean | tostring) \ 193 + $csv_delim_quoted + (.measurements.cluster_running_pods_on_workers_count.mean | tostring) \ 194 + $csv_delim_quoted + (.measurements.storage_count_attachable_volumes_in_use.mean | tostring) \ 195 + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.min | tostring) \ 196 + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.max | tostring) \ 197 + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.range | tostring) \ 198 ${parked_go_routines_columns} \ 199 + $csv_delim_quoted + (.measurements.scheduler_pending_pods_count.mean | tostring) \ 200 + $csv_delim_quoted + (.measurements.token_pool_rate_primary.mean | tostring) \ 201 + $csv_delim_quoted + (.measurements.token_pool_rate_secondary.mean | tostring) \ 202 + $csv_delim_quoted + (.measurements.tekton_pipelines_controller_running_pipelineruns_count.mean | tostring) \ 203 + $csv_delim_quoted + (.measurements.tekton_tekton_pipelines_controller_workqueue_depth.mean | tostring) \ 204 + $csv_delim_quoted + (.measurements.pipelinerun_duration_scheduled_seconds.mean | tostring) \ 205 + $csv_delim_quoted + (.measurements.tekton_pipelines_controller_running_taskruns_throttled_by_node.mean | tostring) \ 206 + $csv_delim_quoted + (.measurements.tekton_pipelines_controller_running_taskruns_throttled_by_quota.mean | tostring) \ 207 + $csv_delim_quoted + (.measurements.etcd_request_duration_seconds_average.mean | tostring) \ 208 + $csv_delim_quoted + (.measurements.cluster_network_bytes_total.mean | tostring) \ 209 + $csv_delim_quoted + (.measurements.cluster_network_receive_bytes_total.mean | tostring) \ 210 + $csv_delim_quoted + (.measurements.cluster_network_transmit_bytes_total.mean | tostring) \ 211 + $csv_delim_quoted + (.measurements.node_disk_io_time_seconds_total.mean | tostring)" \ 212 "$i" >>"$max_concurrency_csv" 213 done 214 else 215 echo "WARNING: No file matching '$output_dir/load-tests.max-concurrency.*.json' found!" 216 fi 217 } 218 219 get_parked_go_routines() { 220 goroutines_pprof=$(find "$output_dir" -name "tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.$1.pprof") 221 count=0 222 for i in $goroutines_pprof; do 223 if [ $count -gt 0 ]; then 224 echo -n " " 225 fi 226 echo -n "$(go tool pprof -text "$i" 2>/dev/null | grep 'runtime.gopark$' | sed -e 's,[ ]*\([0-9]\+\) .*,\1,g')" 227 count=$((count + 1)) 228 done 229 } 230 231 collect_timestamp_csvs() { 232 echo "Collecting PipelineRun timestamps..." 233 pipelinerun_timestamps=$ARTIFACT_DIR/pipelineruns.tekton.dev_timestamps.csv 234 echo "PipelineRun${csv_delim}Namespace${csv_delim}Succeeded${csv_delim}Reason${csv_delim}Message${csv_delim}Created${csv_delim}Started${csv_delim}FinallyStarted${csv_delim}Completed${csv_delim}Created->Started${csv_delim}Started->FinallyStarted${csv_delim}FinallyStarted->Completed${csv_delim}SucceededDuration${csv_delim}FailedDuration" >"$pipelinerun_timestamps" 235 jq_cmd=".items[] | (.metadata.name) \ 236 + $csv_delim_quoted + (.metadata.namespace) \ 237 + $csv_delim_quoted + (.status.conditions[0].status) \ 238 + $csv_delim_quoted + (.status.conditions[0].reason) \ 239 + $csv_delim_quoted + (.status.conditions[0].message|split($csv_delim_quoted)|join(\"_\")) \ 240 + $csv_delim_quoted + (.metadata.creationTimestamp) \ 241 + $csv_delim_quoted + (.status.startTime) \ 242 + $csv_delim_quoted + (.status.finallyStartTime) \ 243 + $csv_delim_quoted + (.status.completionTime) \ 244 + $csv_delim_quoted + (if .status.startTime != null and .metadata.creationTimestamp != null then ((.status.startTime | strptime($dt_format) | mktime) - (.metadata.creationTimestamp | strptime($dt_format) | mktime) | tostring) else \"\" end) \ 245 + $csv_delim_quoted + (if .status.finallyStartTime != null and .status.startTime != null then ((.status.finallyStartTime | strptime($dt_format) | mktime) - (.status.startTime | strptime($dt_format) | mktime) | tostring) else \"\" end) \ 246 + $csv_delim_quoted + (if .status.completionTime != null and .status.finallyStartTime != null then ((.status.completionTime | strptime($dt_format) | mktime) - (.status.finallyStartTime | strptime($dt_format) | mktime) | tostring) else \"\" end) \ 247 + $csv_delim_quoted + (if .status.conditions[0].status == \"True\" and .status.completionTime != null and .metadata.creationTimestamp != null then ((.status.completionTime | strptime($dt_format) | mktime) - (.metadata.creationTimestamp | strptime($dt_format) | mktime) | tostring) else \"\" end) \ 248 + $csv_delim_quoted + (if .status.conditions[0].status == \"False\" and .status.completionTime != null and .metadata.creationTimestamp != null then ((.status.completionTime | strptime($dt_format) | mktime) - (.metadata.creationTimestamp | strptime($dt_format) | mktime) | tostring) else \"\" end)" 249 oc get pipelineruns.tekton.dev -A -o json | jq "$jq_cmd" | sed -e "s/\n//g" -e "s/^\"//g" -e "s/\"$//g" -e "s/Z;/;/g" | sort -t ";" -k 13 -r -n >>"$pipelinerun_timestamps" 250 } 251 252 echo "Collecting max concurrency results..." 253 collect_artifacts || true 254 collect_timestamp_csvs || true 255 collect_monitoring_data || true 256 collect_scalability_data || true 257 collect_tekton_profiling_data || true 258 popd