sigs.k8s.io/kueue@v0.6.2/test/performance/run-test.sh (about) 1 #!/bin/bash 2 3 # shellcheck disable=SC1091 4 if test -f .env; then 5 source .env 6 fi 7 8 CL2_HOME_DIR=${CL2_HOME_DIR:=/Users/johny/perf-tests/clusterloader2} 9 CL2_BINARY_NAME=${CL2_BINARY_NAME:=clusterloader} 10 11 USE_KUEUE=${USE_KUEUE:=true} 12 13 DEFAULT_EXPERIMENTS=( 14 "10 2 0 2s 3m 100 100Gi" 15 ) 16 17 EXPERIMENTS=("${EXPERIMENTS[@]:=${DEFAULT_EXPERIMENTS[@]}}") 18 19 KUBECONFIG=${KUBECONFIG:="$HOME/.kube/config"} 20 export KUBECONFIG 21 22 PROVIDER=${PROVIDER:=gke} 23 24 export EXEC_DEPLOYMENT_YAML="$CL2_HOME_DIR/pkg/execservice/manifest/exec_deployment.yaml" 25 26 cp -r "$CL2_HOME_DIR/pkg/prometheus/manifests/" tmp_manifests 27 trap 'rm -r tmp_manifests' EXIT 28 export PROMETHEUS_MANIFEST_PATH=$(pwd)/tmp_manifests 29 30 now=$(date +%Y-%m-%d-%H.%M.%S) 31 32 if [[ "$USE_KUEUE" == true ]]; then 33 export CL2_USE_KUEUE=true 34 # Kustomize places all Kubernetes object manifests (role, rolebinding and servicemonitor) in the same file, 35 # however, Clusterloader expects, that there is one manifest per file, otherwise it does not create all the 36 # objects from the file. 37 # The yq expression below splits produced manifest into 3 files and then moves 38 # to temporary $PROMETHEUS_MANIFEST_PATH 39 kubectl kustomize ../../config/prometheus | yq -s '.kind' -o yaml 40 mv Role.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-role.yaml" 41 mv RoleBinding.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-role-binding.yaml" 42 mv ServiceMonitor.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-service-monitor.yaml" 43 kubectl apply -f prerequisites/resource-flavor.yaml 44 report_dir_name="kueue_report_$now" 45 else 46 report_dir_name="report_$now" 47 fi 48 mkdir -p "$report_dir_name" 49 50 { 51 echo -ne "Test Arguments," 52 echo -ne "P50 Job Create to start latency (ms)," 53 echo -ne "P90 Job Create to start latency (ms)," 54 echo -ne "P50 Job Start to complete latency (ms)," 55 echo -ne "P90 Job Start to complete latency (ms)," 56 echo -ne "Max Job Throughput (max jobs/s)," 57 echo -ne "Total Jobs," 58 echo -ne "Total Pods," 59 echo -ne "Duration (s)," 60 echo -ne "Avg Pod Waiting time (s)," 61 echo -ne "P90 Pod Waiting time (s)," 62 echo -ne "Avg Pod Completion time (s)," 63 echo "P90 Pod Completion time (s)" 64 } >>"$report_dir_name/summary.csv" 65 66 for item in "${EXPERIMENTS[@]}"; do 67 IFS=" " read -ra conditions <<<"$item" 68 export CL2_SMALL_JOBS="${conditions[0]}" 69 export CL2_MEDIUM_JOBS="${conditions[1]}" 70 export CL2_LARGE_JOBS="${conditions[2]}" 71 export CL2_JOB_RUNNING_TIME="${conditions[3]}" 72 export CL2_TEST_TIMEOUT="${conditions[4]}" 73 cores="${conditions[5]}" 74 memory="${conditions[6]}" 75 experiment_dir="$report_dir_name/$CL2_SMALL_JOBS-$CL2_MEDIUM_JOBS-$CL2_LARGE_JOBS-$CL2_JOB_RUNNING_TIME-$CL2_TEST_TIMEOUT-$cores-$memory" 76 mkdir -p "$experiment_dir" 77 echo "======================================================================================" 78 echo "Running an experiment with [$CL2_SMALL_JOBS, $CL2_MEDIUM_JOBS, $CL2_LARGE_JOBS, $CL2_JOB_RUNNING_TIME, $CL2_TEST_TIMEOUT, $cores, $memory]" 79 if [[ "$USE_KUEUE" == true ]]; then 80 cp prerequisites/cluster-queue.template prerequisites/cluster-queue.yaml 81 yq -i e ".spec.resources[0].flavors[0].quota.min=$cores" prerequisites/cluster-queue.yaml 82 yq -i e ".spec.resources[1].flavors[0].quota.min=\"$memory\"" prerequisites/cluster-queue.yaml 83 kubectl apply -f prerequisites/cluster-queue.yaml 84 fi 85 "$CL2_HOME_DIR/$CL2_BINARY_NAME" \ 86 --testconfig=config.yaml \ 87 --enable-prometheus-server=true \ 88 --provider="$PROVIDER" \ 89 --v=2 --prometheus-scrape-metrics-server=true \ 90 --prometheus-scrape-kube-state-metrics=true \ 91 --report-dir="$experiment_dir" 92 echo "Experiment finished. Extracting results from the report..." 93 { 94 echo -ne "$CL2_SMALL_JOBS $CL2_MEDIUM_JOBS $CL2_LARGE_JOBS $CL2_JOB_RUNNING_TIME $CL2_TEST_TIMEOUT $cores $memory," 95 echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="create_to_start").data.Perc50' "$experiment_dir"/JobLifecycleLatency*.json)," 96 echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="create_to_start").data.Perc90' "$experiment_dir"/JobLifecycleLatency*.json)," 97 echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="start_to_complete").data.Perc50' "$experiment_dir"/JobLifecycleLatency*.json)," 98 echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="start_to_complete").data.Perc90' "$experiment_dir"/JobLifecycleLatency*.json)," 99 echo -ne "$(jq '.dataItems[0].data.max_job_throughput' "$experiment_dir"/GenericPrometheusQuery*.json)," 100 echo -ne "$(jq '.dataItems[0].data.total_jobs_scheduled' "$experiment_dir"/GenericPrometheusQuery*.json)," 101 echo -ne "$(jq '.dataItems[0].data.total_pods_scheduled' "$experiment_dir"/GenericPrometheusQuery*.json)," 102 echo -ne "$(jq '.dataItems[0].data.job_performance' "$experiment_dir"/Timer*.json)", 103 echo -ne "$(jq '.dataItems[0].data.avg_pod_waiting_time' "$experiment_dir"/GenericPrometheusQuery*.json)," 104 echo -ne "$(jq '.dataItems[0].data.perc_90_pod_waiting_time' "$experiment_dir"/GenericPrometheusQuery*.json)," 105 echo -ne "$(jq '.dataItems[0].data.avg_pod_running_time' "$experiment_dir"/GenericPrometheusQuery*.json)," 106 jq '.dataItems[0].data.perc_90_pod_completion_time' "$experiment_dir"/GenericPrometheusQuery*.json 107 } >>"$report_dir_name/summary.csv" 108 if [[ "$USE_KUEUE" == true ]]; then 109 kubectl delete -f prerequisites/cluster-queue.yaml 110 fi 111 done 112 113 if [[ "$USE_KUEUE" == true ]]; then 114 kubectl delete -f prerequisites/resource-flavor.yaml 115 fi