sigs.k8s.io/kueue@v0.6.2/test/performance/config.yaml (about) 1 {{$MODE := DefaultParam .MODE "Indexed"}} 2 {{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}} 3 4 {{$smallJobs := DefaultParam .CL2_SMALL_JOBS 10}} 5 {{$mediumJobs := DefaultParam .CL2_MEDIUM_JOBS 2}} 6 {{$largeJobs := DefaultParam .CL2_LARGE_JOBS 0}} 7 8 {{$namespaces := DefaultParam .CL2_NAMESPACES 1}} 9 10 {{$smallJobsPerNamespace := DivideInt $smallJobs $namespaces}} 11 {{$mediumJobsPerNamespace := DivideInt $mediumJobs $namespaces}} 12 {{$largeJobsPerNamespace := DivideInt $largeJobs $namespaces}} 13 14 {{$smallJobSize := 5}} 15 {{$mediumJobSize := 20}} 16 {{$largeJobSize := 100}} 17 18 {{$jobRunningTime := DefaultParam .CL2_JOB_RUNNING_TIME "30s"}} 19 20 {{$clusterQueue := "default-cluster-queue"}} 21 {{$localQueue := "local-queue"}} 22 23 {{$testTimeout := DefaultParam .CL2_TEST_TIMEOUT "5m"}} 24 25 {{$namespacePrefix := "queue-test"}} 26 27 {{$useKueue := DefaultParam .CL2_USE_KUEUE false}} 28 29 name: batch 30 31 namespace: 32 number: {{$namespaces}} 33 prefix: {{$namespacePrefix}} 34 35 tuningSets: 36 - name: UniformQPS 37 qpsLoad: 38 qps: {{$LOAD_TEST_THROUGHPUT}} 39 40 steps: 41 - name: Start measurements 42 measurements: 43 - Identifier: Timer 44 Method: Timer 45 Params: 46 action: start 47 label: job_performance 48 - Identifier: WaitForFinishedJobs 49 Method: WaitForFinishedJobs 50 Params: 51 action: start 52 labelSelector: group = test-job 53 - Identifier: JobLifecycleLatency 54 Method: JobLifecycleLatency 55 Params: 56 action: start 57 labelSelector: group = test-job 58 - Identifier: GenericPrometheusQuery 59 Method: GenericPrometheusQuery 60 Params: 61 action: start 62 metricName: Job (Kueue) API performance 63 metricVersion: v1 64 unit: s 65 queries: 66 - name: total_jobs_scheduled 67 query: count(kube_job_info{namespace=~"{{$namespacePrefix}}.*"}) 68 - name: total_pods_scheduled 69 query: count(kube_pod_info{namespace=~"{{$namespacePrefix}}.*"}) 70 - name: avg_pod_running_time 71 query: (avg(kube_pod_completion_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"})) 72 - name: perc_90_pod_completion_time 73 query: quantile(0.90, kube_pod_completion_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"}) 74 - name: avg_pod_waiting_time 75 query: (avg(kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_created{namespace=~"{{$namespacePrefix}}.*"})) 76 - name: perc_90_pod_waiting_time 77 query: quantile(0.90, kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_created{namespace=~"{{$namespacePrefix}}.*"}) 78 - name: max_job_throughput 79 query: max_over_time(sum(rate(kueue_admitted_workloads_total{cluster_queue="{{$clusterQueue}}"}[1m]))[{{$testTimeout}}:5s]) 80 - name: Sleep 81 measurements: 82 - Identifier: sleep 83 Method: Sleep 84 Params: 85 duration: 10s 86 {{if $useKueue}} 87 - name: Create local queue 88 phases: 89 - namespaceRange: 90 min: 1 91 max: {{$namespaces}} 92 replicasPerNamespace: 1 93 tuningSet: UniformQPS 94 objectBundle: 95 - basename: {{$localQueue}} 96 objectTemplatePath: "local-queue.yaml" 97 templateFillMap: 98 ClusterQueue: {{$clusterQueue}} 99 {{end}} 100 - name: Create {{$MODE}} jobs 101 phases: 102 - namespaceRange: 103 min: 1 104 max: {{$namespaces}} 105 replicasPerNamespace: {{$smallJobsPerNamespace}} 106 tuningSet: UniformQPS 107 objectBundle: 108 - basename: small 109 objectTemplatePath: "job.yaml" 110 templateFillMap: 111 UseKueue: {{$useKueue}} 112 Replicas: {{$smallJobSize}} 113 Mode: {{$MODE}} 114 Sleep: {{$jobRunningTime}} 115 LocalQueue: "{{$localQueue}}-0" 116 - namespaceRange: 117 min: 1 118 max: {{$namespaces}} 119 replicasPerNamespace: {{$mediumJobsPerNamespace}} 120 tuningSet: UniformQPS 121 objectBundle: 122 - basename: medium 123 objectTemplatePath: "job.yaml" 124 templateFillMap: 125 UseKueue: {{$useKueue}} 126 Replicas: {{$mediumJobSize}} 127 Mode: {{$MODE}} 128 Sleep: {{$jobRunningTime}} 129 LocalQueue: "{{$localQueue}}-0" 130 - namespaceRange: 131 min: 1 132 max: {{$namespaces}} 133 replicasPerNamespace: {{$largeJobsPerNamespace}} 134 tuningSet: UniformQPS 135 objectBundle: 136 - basename: large 137 objectTemplatePath: "job.yaml" 138 templateFillMap: 139 UseKueue: {{$useKueue}} 140 Replicas: {{$largeJobSize}} 141 Mode: {{$MODE}} 142 Sleep: {{$jobRunningTime}} 143 LocalQueue: "{{$localQueue}}-0" 144 - name: Wait for {{$MODE}} jobs to finish 145 measurements: 146 - Identifier: JobLifecycleLatency 147 Method: JobLifecycleLatency 148 Params: 149 action: gather 150 timeout: {{$testTimeout}} 151 - Identifier: WaitForFinishedJobs 152 Method: WaitForFinishedJobs 153 Params: 154 action: gather 155 timeout: {{$testTimeout}} 156 - name: Stop Timer 157 measurements: 158 - Identifier: Timer 159 Method: Timer 160 Params: 161 action: stop 162 label: job_performance 163 - name: Gather Timer 164 measurements: 165 - Identifier: Timer 166 Method: Timer 167 Params: 168 action: gather 169 - name: Sleep 170 measurements: 171 - Identifier: sleep 172 Method: Sleep 173 Params: 174 duration: 30s 175 - name: Gather Prometheus measurements 176 measurements: 177 - Identifier: GenericPrometheusQuery 178 Method: GenericPrometheusQuery 179 Params: 180 action: gather 181 enableViolations: true