github.com/redhat-appstudio/e2e-tests@v0.0.0-20240520140907-9709f6f59323/tests/load-tests/ci-scripts/max-concurrency/collect-results.sh (about)

     1  #!/bin/bash
     2  
     3  set -o nounset
     4  set -o errexit
     5  set -o pipefail
     6  
     7  # shellcheck disable=SC1090
     8  source "/usr/local/ci-secrets/redhat-appstudio-load-test/load-test-scenario.${1:-concurrent}"
     9  
    10  pushd "${2:-.}"
    11  
    12  output_dir="${OUTPUT_DIR:-./tests/load-tests}"
    13  
    14  csv_delim=";"
    15  csv_delim_quoted="\"$csv_delim\""
    16  dt_format='"%Y-%m-%dT%H:%M:%SZ"'
    17  
    18  collect_artifacts() {
    19      echo "Collecting load test artifacts.."
    20      mkdir -p "${ARTIFACT_DIR}/logs"
    21      find "$output_dir" -type f -name 'load-tests.max-concurrency.*.log' -exec cp -vf {} "${ARTIFACT_DIR}/logs" \;
    22      find "$output_dir" -type f -name 'load-tests.max-concurrency.json' -exec cp -vf {} "${ARTIFACT_DIR}" \;
    23      mkdir -p "${ARTIFACT_DIR}/pprof"
    24      find "$output_dir" -type f -name '*.pprof' -exec cp -vf {} "${ARTIFACT_DIR}/pprof" \;
    25  }
    26  
    27  collect_monitoring_data() {
    28      echo "Collecting monitoring data..."
    29      echo "Setting up tool to collect monitoring data..."
    30      python3 -m venv venv
    31      set +u
    32      # shellcheck disable=SC1091
    33      source venv/bin/activate
    34      set -u
    35      python3 -m pip install -U pip
    36      python3 -m pip install -e "git+https://github.com/redhat-performance/opl.git#egg=opl-rhcloud-perf-team-core&subdirectory=core"
    37  
    38      ## Monitoring data for entire test
    39      monitoring_collection_data=$(find "$output_dir" -type f -name 'load-tests.max-concurrency.json')
    40      monitoring_collection_log="$ARTIFACT_DIR/monitoring-collection.log"
    41      monitoring_collection_dir=$ARTIFACT_DIR/monitoring-collection-raw-data-dir
    42      mkdir -p "$monitoring_collection_dir"
    43      echo "Collecting monitoring data for entire test"
    44      mstart=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get startTimestamp)" --iso-8601=seconds)
    45      mend=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get endTimestamp)" --iso-8601=seconds)
    46      mhost=$(oc -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')
    47      status_data.py \
    48          --status-data-file "$monitoring_collection_data" \
    49          --additional ./tests/load-tests/ci-scripts/max-concurrency/cluster_read_config.yaml \
    50          --monitoring-start "$mstart" \
    51          --monitoring-end "$mend" \
    52          --monitoring-raw-data-dir "$monitoring_collection_dir" \
    53          --prometheus-host "https://$mhost" \
    54          --prometheus-port 443 \
    55          --prometheus-token "$(oc whoami -t)" \
    56          -d &>"$monitoring_collection_log"
    57      cp -f "$monitoring_collection_data" "$ARTIFACT_DIR"
    58  
    59      ## Monitoring data per iteration
    60      for monitoring_collection_data in $(find "$output_dir" -type f -name 'load-tests.max-concurrency.*.json'); do
    61          iteration_index=$(echo "$monitoring_collection_data" | sed -e 's,.*/load-tests.max-concurrency.\([0-9]\+-[0-9]\+\).json,\1,')
    62          monitoring_collection_log="$ARTIFACT_DIR/monitoring-collection.$iteration_index.log"
    63          echo "Collecting monitoring data for step $iteration_index..."
    64          mstart=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get timestamp)" --iso-8601=seconds)
    65          mend=$(date --utc --date "$(status_data.py --status-data-file "$monitoring_collection_data" --get endTimestamp)" --iso-8601=seconds)
    66          mhost=$(oc -n openshift-monitoring get route -l app.kubernetes.io/name=thanos-query -o json | jq --raw-output '.items[0].spec.host')
    67          status_data.py \
    68              --status-data-file "$monitoring_collection_data" \
    69              --additional ./tests/load-tests/cluster_read_config.yaml \
    70              --monitoring-start "$mstart" \
    71              --monitoring-end "$mend" \
    72              --prometheus-host "https://$mhost" \
    73              --prometheus-port 443 \
    74              --prometheus-token "$(oc whoami -t)" \
    75              -d &>"$monitoring_collection_log"
    76          cp -f "$monitoring_collection_data" "$ARTIFACT_DIR"
    77      done
    78      set +u
    79      deactivate
    80      set -u
    81  }
    82  
    83  collect_tekton_profiling_data() {
    84      echo "Collecting Tekton prifiling data..."
    85      if [ "${TEKTON_PERF_ENABLE_CPU_PROFILING:-}" == "true" ] || [ "${TEKTON_PERF_ENABLE_MEMORY_PROFILING:-}" == "true" ]; then
    86          echo "Collecting profiling data from Tekton"
    87          for pprof_profile in $(find "$output_dir" -name "*.pprof"); do
    88              if [ -s "$pprof_profile" ]; then
    89                  file=$(basename "$pprof_profile")
    90                  go tool pprof -text "$pprof_profile" >"$ARTIFACT_DIR/pprof/$file.txt" || true
    91                  go tool pprof -svg -output="$ARTIFACT_DIR/pprof/$file.svg" "$pprof_profile" || true
    92              fi
    93          done
    94      fi
    95  }
    96  
    97  get_tekton_results_watcher_pod_count() {
    98      find "$output_dir" -type f -name 'tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.0001-*.pprof' | wc -l
    99  }
   100  
   101  collect_scalability_data() {
   102      echo "Collecting scalability data..."
   103  
   104      tekton_results_watcher_pod_count=$(get_tekton_results_watcher_pod_count)
   105      tekton_results_watcher_pod_headers=""
   106      for i in $(seq -w 1 "$tekton_results_watcher_pod_count"); do
   107          tekton_results_watcher_pod_headers="${tekton_results_watcher_pod_headers}${csv_delim}ParkedGoRoutinesPod$i"
   108      done
   109  
   110      max_concurrency_csv=$ARTIFACT_DIR/max-concurrency.csv
   111      echo "Iteration\
   112  ${csv_delim}Threads\
   113  ${csv_delim}WorkloadKPI\
   114  ${csv_delim}Errors\
   115  ${csv_delim}UserAvgTime\
   116  ${csv_delim}UserMaxTime\
   117  ${csv_delim}ApplicationAvgTime\
   118  ${csv_delim}ApplicationMaxTime\
   119  ${csv_delim}CDQAvgTime\
   120  ${csv_delim}CDQMaxTime\
   121  ${csv_delim}ComponentsAvgTime\
   122  ${csv_delim}ComponentsMaxTime\
   123  ${csv_delim}PipelineRunAvgTime\
   124  ${csv_delim}PipelineRunMaxTime\
   125  ${csv_delim}IntegrationTestsRunPipelineSucceededTimeAvg\
   126  ${csv_delim}IntegrationTestsRunPipelineSucceededTimeMax\
   127  ${csv_delim}DeploymentSucceededTimeAvg\
   128  ${csv_delim}DeploymentSucceededTimeMax\
   129  ${csv_delim}ClusterCPUUsageAvg\
   130  ${csv_delim}ClusterDiskUsageAvg\
   131  ${csv_delim}ClusterMemoryUsageAvg\
   132  ${csv_delim}ClusterPodCountAvg\
   133  ${csv_delim}ClusterNodesWorkerCountAvg\
   134  ${csv_delim}ClusterRunningPodsOnWorkersCountAvg\
   135  ${csv_delim}ClusterPVCInUseAvg\
   136  ${csv_delim}TektonResultsWatcherMemoryMin\
   137  ${csv_delim}TektonResultsWatcherMemoryMax\
   138  ${csv_delim}TektonResultsWatcherMemoryRange\
   139  ${tekton_results_watcher_pod_headers}\
   140  ${csv_delim}SchedulerPendingPodsCountAvg\
   141  ${csv_delim}TokenPoolRatePrimaryAvg\
   142  ${csv_delim}TokenPoolRateSecondaryAvg\
   143  ${csv_delim}ClusterPipelineRunCountAvg\
   144  ${csv_delim}ClusterPipelineWorkqueueDepthAvg\
   145  ${csv_delim}ClusterPipelineScheduleFirstPodAvg\
   146  ${csv_delim}ClusterTaskRunThrottledByNodeResourcesAvg\
   147  ${csv_delim}ClusterTaskRunThrottledByDefinedQuotaAvg\
   148  ${csv_delim}EtcdRequestDurationSecondsAvg\
   149  ${csv_delim}ClusterNetworkBytesTotalAvg\
   150  ${csv_delim}ClusterNetworkReceiveBytesTotalAvg\
   151  ${csv_delim}ClusterNetworkTransmitBytesTotalAvg\
   152  ${csv_delim}NodeDiskIoTimeSecondsTotalAvg" \
   153          >"$max_concurrency_csv"
   154      mc_files=$(find "$output_dir" -type f -name 'load-tests.max-concurrency.*.json')
   155      if [ -n "$mc_files" ]; then
   156          for i in $mc_files; do
   157              iteration_index=$(echo "$i" | sed -e 's,'"$output_dir"'/load-tests.max-concurrency.\([0-9-]\+\).*,\1,g')
   158  
   159              parked_go_routines=$(get_parked_go_routines "$iteration_index")
   160              parked_go_routines_columns=""
   161              if [ -n "$parked_go_routines" ]; then
   162                  for g in $parked_go_routines; do
   163                      parked_go_routines_columns="$parked_go_routines_columns + $csv_delim_quoted + \"$g\""
   164                  done
   165              else
   166                  for _ in $(seq 1 "$(get_tekton_results_watcher_pod_count)"); do
   167                      parked_go_routines_columns="$parked_go_routines_columns + $csv_delim_quoted"
   168                  done
   169              fi
   170              jq -rc "(.metadata.\"max-concurrency\".iteration | tostring) \
   171                  + $csv_delim_quoted + (.threads | tostring) \
   172                  + $csv_delim_quoted + (.workloadKPI | tostring) \
   173                  + $csv_delim_quoted + (.errorsTotal | tostring) \
   174                  + $csv_delim_quoted + (.createUserTimeAvg | tostring) \
   175                  + $csv_delim_quoted + (.createUserTimeMax | tostring) \
   176                  + $csv_delim_quoted + (.createApplicationsTimeAvg | tostring) \
   177                  + $csv_delim_quoted + (.createApplicationsTimeMax | tostring) \
   178                  + $csv_delim_quoted + (.createCDQsTimeAvg | tostring) \
   179                  + $csv_delim_quoted + (.createCDQsTimeMax | tostring) \
   180                  + $csv_delim_quoted + (.createComponentsTimeAvg | tostring) \
   181                  + $csv_delim_quoted + (.createComponentsTimeMax | tostring) \
   182                  + $csv_delim_quoted + (.runPipelineSucceededTimeAvg | tostring) \
   183                  + $csv_delim_quoted + (.runPipelineSucceededTimeMax | tostring) \
   184                  + $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeAvg | tostring) \
   185                  + $csv_delim_quoted + (.integrationTestsRunPipelineSucceededTimeMax | tostring) \
   186                  + $csv_delim_quoted + (.deploymentSucceededTimeAvg | tostring) \
   187                  + $csv_delim_quoted + (.deploymentSucceededTimeMax | tostring) \
   188                  + $csv_delim_quoted + (.measurements.cluster_cpu_usage_seconds_total_rate.mean | tostring) \
   189                  + $csv_delim_quoted + (.measurements.cluster_disk_throughput_total.mean | tostring) \
   190                  + $csv_delim_quoted + (.measurements.cluster_memory_usage_rss_total.mean | tostring) \
   191                  + $csv_delim_quoted + (.measurements.cluster_pods_count.mean | tostring) \
   192                  + $csv_delim_quoted + (.measurements.cluster_nodes_worker_count.mean | tostring) \
   193                  + $csv_delim_quoted + (.measurements.cluster_running_pods_on_workers_count.mean | tostring) \
   194                  + $csv_delim_quoted + (.measurements.storage_count_attachable_volumes_in_use.mean | tostring) \
   195                  + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.min | tostring) \
   196                  + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.max | tostring) \
   197                  + $csv_delim_quoted + (.measurements.\"tekton-results-watcher\".\"container[watcher]\".memory.range | tostring) \
   198                  ${parked_go_routines_columns} \
   199                  + $csv_delim_quoted + (.measurements.scheduler_pending_pods_count.mean | tostring) \
   200                  + $csv_delim_quoted + (.measurements.token_pool_rate_primary.mean | tostring) \
   201                  + $csv_delim_quoted + (.measurements.token_pool_rate_secondary.mean | tostring) \
   202                  + $csv_delim_quoted + (.measurements.tekton_pipelines_controller_running_pipelineruns_count.mean | tostring) \
   203                  + $csv_delim_quoted + (.measurements.tekton_tekton_pipelines_controller_workqueue_depth.mean | tostring) \
   204                  + $csv_delim_quoted + (.measurements.pipelinerun_duration_scheduled_seconds.mean | tostring) \
   205                  + $csv_delim_quoted + (.measurements.tekton_pipelines_controller_running_taskruns_throttled_by_node.mean | tostring) \
   206                  + $csv_delim_quoted + (.measurements.tekton_pipelines_controller_running_taskruns_throttled_by_quota.mean | tostring) \
   207                  + $csv_delim_quoted + (.measurements.etcd_request_duration_seconds_average.mean | tostring) \
   208                  + $csv_delim_quoted + (.measurements.cluster_network_bytes_total.mean | tostring) \
   209                  + $csv_delim_quoted + (.measurements.cluster_network_receive_bytes_total.mean | tostring) \
   210                  + $csv_delim_quoted + (.measurements.cluster_network_transmit_bytes_total.mean | tostring) \
   211                  + $csv_delim_quoted + (.measurements.node_disk_io_time_seconds_total.mean | tostring)" \
   212                  "$i" >>"$max_concurrency_csv"
   213          done
   214      else
   215          echo "WARNING: No file matching '$output_dir/load-tests.max-concurrency.*.json' found!"
   216      fi
   217  }
   218  
   219  get_parked_go_routines() {
   220      goroutines_pprof=$(find "$output_dir" -name "tekton-results-watcher.tekton-results-watcher-*.goroutine-dump-0.$1.pprof")
   221      count=0
   222      for i in $goroutines_pprof; do
   223          if [ $count -gt 0 ]; then
   224              echo -n " "
   225          fi
   226          echo -n "$(go tool pprof -text "$i" 2>/dev/null | grep 'runtime.gopark$' | sed -e 's,[ ]*\([0-9]\+\) .*,\1,g')"
   227          count=$((count + 1))
   228      done
   229  }
   230  
   231  collect_timestamp_csvs() {
   232      echo "Collecting PipelineRun timestamps..."
   233      pipelinerun_timestamps=$ARTIFACT_DIR/pipelineruns.tekton.dev_timestamps.csv
   234      echo "PipelineRun${csv_delim}Namespace${csv_delim}Succeeded${csv_delim}Reason${csv_delim}Message${csv_delim}Created${csv_delim}Started${csv_delim}FinallyStarted${csv_delim}Completed${csv_delim}Created->Started${csv_delim}Started->FinallyStarted${csv_delim}FinallyStarted->Completed${csv_delim}SucceededDuration${csv_delim}FailedDuration" >"$pipelinerun_timestamps"
   235      jq_cmd=".items[] | (.metadata.name) \
   236  + $csv_delim_quoted + (.metadata.namespace) \
   237  + $csv_delim_quoted + (.status.conditions[0].status) \
   238  + $csv_delim_quoted + (.status.conditions[0].reason) \
   239  + $csv_delim_quoted + (.status.conditions[0].message|split($csv_delim_quoted)|join(\"_\")) \
   240  + $csv_delim_quoted + (.metadata.creationTimestamp) \
   241  + $csv_delim_quoted + (.status.startTime) \
   242  + $csv_delim_quoted + (.status.finallyStartTime) \
   243  + $csv_delim_quoted + (.status.completionTime) \
   244  + $csv_delim_quoted + (if .status.startTime != null and .metadata.creationTimestamp != null then ((.status.startTime | strptime($dt_format) | mktime) - (.metadata.creationTimestamp | strptime($dt_format) | mktime) | tostring) else \"\" end) \
   245  + $csv_delim_quoted + (if .status.finallyStartTime != null and .status.startTime != null then ((.status.finallyStartTime | strptime($dt_format) | mktime) - (.status.startTime | strptime($dt_format) | mktime) | tostring) else \"\" end) \
   246  + $csv_delim_quoted + (if .status.completionTime != null and .status.finallyStartTime != null then ((.status.completionTime | strptime($dt_format) | mktime) - (.status.finallyStartTime | strptime($dt_format) | mktime) | tostring) else \"\" end) \
   247  + $csv_delim_quoted + (if .status.conditions[0].status == \"True\" and .status.completionTime != null and .metadata.creationTimestamp != null then ((.status.completionTime | strptime($dt_format) | mktime) - (.metadata.creationTimestamp | strptime($dt_format) | mktime) | tostring) else \"\" end) \
   248  + $csv_delim_quoted + (if .status.conditions[0].status == \"False\" and .status.completionTime != null and .metadata.creationTimestamp != null then ((.status.completionTime | strptime($dt_format) | mktime) - (.metadata.creationTimestamp | strptime($dt_format) | mktime) | tostring) else \"\" end)"
   249      oc get pipelineruns.tekton.dev -A -o json | jq "$jq_cmd" | sed -e "s/\n//g" -e "s/^\"//g" -e "s/\"$//g" -e "s/Z;/;/g" | sort -t ";" -k 13 -r -n >>"$pipelinerun_timestamps"
   250  }
   251  
   252  echo "Collecting max concurrency results..."
   253  collect_artifacts || true
   254  collect_timestamp_csvs || true
   255  collect_monitoring_data || true
   256  collect_scalability_data || true
   257  collect_tekton_profiling_data || true
   258  popd