sigs.k8s.io/kueue@v0.6.2/test/performance/run-test.sh

sigs.k8s.io/kueue@v0.6.2/test/performance/run-test.sh (about)

     1  #!/bin/bash
     2  
     3  # shellcheck disable=SC1091
     4  if test -f .env; then
     5    source .env
     6  fi
     7  
     8  CL2_HOME_DIR=${CL2_HOME_DIR:=/Users/johny/perf-tests/clusterloader2} 
     9  CL2_BINARY_NAME=${CL2_BINARY_NAME:=clusterloader}
    10  
    11  USE_KUEUE=${USE_KUEUE:=true}
    12  
    13  DEFAULT_EXPERIMENTS=(
    14      "10 2 0 2s 3m 100 100Gi"
    15  )
    16  
    17  EXPERIMENTS=("${EXPERIMENTS[@]:=${DEFAULT_EXPERIMENTS[@]}}")
    18  
    19  KUBECONFIG=${KUBECONFIG:="$HOME/.kube/config"}
    20  export KUBECONFIG
    21  
    22  PROVIDER=${PROVIDER:=gke}
    23  
    24  export EXEC_DEPLOYMENT_YAML="$CL2_HOME_DIR/pkg/execservice/manifest/exec_deployment.yaml"
    25  
    26  cp -r "$CL2_HOME_DIR/pkg/prometheus/manifests/" tmp_manifests
    27  trap 'rm -r tmp_manifests' EXIT
    28  export PROMETHEUS_MANIFEST_PATH=$(pwd)/tmp_manifests
    29  
    30  now=$(date +%Y-%m-%d-%H.%M.%S)
    31  
    32  if [[ "$USE_KUEUE" == true ]]; then
    33      export CL2_USE_KUEUE=true
    34      # Kustomize places all Kubernetes object manifests (role, rolebinding and servicemonitor) in the same file, 
    35      # however, Clusterloader expects, that there is one manifest per file, otherwise it does not create all the 
    36      # objects from the file. 
    37      # The yq expression below splits produced manifest into 3 files and then moves 
    38      # to temporary $PROMETHEUS_MANIFEST_PATH
    39      kubectl kustomize ../../config/prometheus | yq -s '.kind' -o yaml
    40      mv Role.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-role.yaml"
    41      mv RoleBinding.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-role-binding.yaml"
    42      mv ServiceMonitor.yml "$PROMETHEUS_MANIFEST_PATH/prometheus-kueue-service-monitor.yaml"
    43      kubectl apply -f prerequisites/resource-flavor.yaml
    44      report_dir_name="kueue_report_$now"
    45  else 
    46      report_dir_name="report_$now"
    47  fi
    48  mkdir -p "$report_dir_name"
    49  
    50  {
    51      echo -ne "Test Arguments,"
    52      echo -ne "P50 Job Create to start latency (ms),"
    53      echo -ne "P90 Job Create to start latency (ms),"
    54      echo -ne "P50 Job Start to complete latency (ms),"
    55      echo -ne "P90 Job Start to complete latency (ms),"
    56      echo -ne "Max Job Throughput (max jobs/s),"
    57      echo -ne "Total Jobs,"
    58      echo -ne "Total Pods,"
    59      echo -ne "Duration (s),"
    60      echo -ne "Avg Pod Waiting time (s),"
    61      echo -ne "P90 Pod Waiting time (s),"
    62      echo -ne "Avg Pod Completion time (s),"
    63      echo "P90 Pod Completion time (s)"
    64  } >>"$report_dir_name/summary.csv"
    65  
    66  for item in "${EXPERIMENTS[@]}"; do
    67      IFS=" " read -ra conditions <<<"$item"
    68      export CL2_SMALL_JOBS="${conditions[0]}"
    69      export CL2_MEDIUM_JOBS="${conditions[1]}"
    70      export CL2_LARGE_JOBS="${conditions[2]}"
    71      export CL2_JOB_RUNNING_TIME="${conditions[3]}"
    72      export CL2_TEST_TIMEOUT="${conditions[4]}"
    73      cores="${conditions[5]}"
    74      memory="${conditions[6]}"
    75      experiment_dir="$report_dir_name/$CL2_SMALL_JOBS-$CL2_MEDIUM_JOBS-$CL2_LARGE_JOBS-$CL2_JOB_RUNNING_TIME-$CL2_TEST_TIMEOUT-$cores-$memory"
    76      mkdir -p "$experiment_dir"
    77      echo "======================================================================================"
    78      echo "Running an experiment with [$CL2_SMALL_JOBS, $CL2_MEDIUM_JOBS, $CL2_LARGE_JOBS, $CL2_JOB_RUNNING_TIME, $CL2_TEST_TIMEOUT, $cores, $memory]"
    79      if [[ "$USE_KUEUE" == true ]]; then
    80          cp prerequisites/cluster-queue.template prerequisites/cluster-queue.yaml        
    81          yq -i e ".spec.resources[0].flavors[0].quota.min=$cores" prerequisites/cluster-queue.yaml
    82          yq -i e ".spec.resources[1].flavors[0].quota.min=\"$memory\"" prerequisites/cluster-queue.yaml
    83          kubectl apply -f prerequisites/cluster-queue.yaml
    84      fi
    85      "$CL2_HOME_DIR/$CL2_BINARY_NAME" \
    86          --testconfig=config.yaml \
    87          --enable-prometheus-server=true \
    88          --provider="$PROVIDER" \
    89          --v=2 --prometheus-scrape-metrics-server=true \
    90          --prometheus-scrape-kube-state-metrics=true \
    91          --report-dir="$experiment_dir"
    92      echo "Experiment finished. Extracting results from the report..."
    93      {
    94          echo -ne "$CL2_SMALL_JOBS $CL2_MEDIUM_JOBS $CL2_LARGE_JOBS $CL2_JOB_RUNNING_TIME $CL2_TEST_TIMEOUT $cores $memory,"
    95          echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="create_to_start").data.Perc50' "$experiment_dir"/JobLifecycleLatency*.json),"
    96          echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="create_to_start").data.Perc90' "$experiment_dir"/JobLifecycleLatency*.json),"
    97          echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="start_to_complete").data.Perc50' "$experiment_dir"/JobLifecycleLatency*.json),"
    98          echo -ne "$(jq '.dataItems[] | select(.labels.Metric=="start_to_complete").data.Perc90' "$experiment_dir"/JobLifecycleLatency*.json),"
    99          echo -ne "$(jq '.dataItems[0].data.max_job_throughput' "$experiment_dir"/GenericPrometheusQuery*.json),"
   100          echo -ne "$(jq '.dataItems[0].data.total_jobs_scheduled' "$experiment_dir"/GenericPrometheusQuery*.json),"
   101          echo -ne "$(jq '.dataItems[0].data.total_pods_scheduled' "$experiment_dir"/GenericPrometheusQuery*.json),"
   102          echo -ne "$(jq '.dataItems[0].data.job_performance' "$experiment_dir"/Timer*.json)",
   103          echo -ne "$(jq '.dataItems[0].data.avg_pod_waiting_time' "$experiment_dir"/GenericPrometheusQuery*.json),"
   104          echo -ne "$(jq '.dataItems[0].data.perc_90_pod_waiting_time' "$experiment_dir"/GenericPrometheusQuery*.json),"
   105          echo -ne "$(jq '.dataItems[0].data.avg_pod_running_time' "$experiment_dir"/GenericPrometheusQuery*.json),"
   106          jq '.dataItems[0].data.perc_90_pod_completion_time' "$experiment_dir"/GenericPrometheusQuery*.json
   107      } >>"$report_dir_name/summary.csv"
   108      if [[ "$USE_KUEUE" == true ]]; then
   109          kubectl delete -f prerequisites/cluster-queue.yaml
   110      fi
   111  done
   112  
   113  if [[ "$USE_KUEUE" == true ]]; then
   114      kubectl delete -f prerequisites/resource-flavor.yaml
   115  fi