sigs.k8s.io/kueue@v0.6.2/test/performance/config.yaml (about)

     1  {{$MODE := DefaultParam .MODE "Indexed"}}
     2  {{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}}
     3  
     4  {{$smallJobs := DefaultParam .CL2_SMALL_JOBS 10}}
     5  {{$mediumJobs := DefaultParam .CL2_MEDIUM_JOBS 2}}
     6  {{$largeJobs := DefaultParam .CL2_LARGE_JOBS 0}}
     7  
     8  {{$namespaces := DefaultParam .CL2_NAMESPACES 1}}
     9  
    10  {{$smallJobsPerNamespace := DivideInt $smallJobs $namespaces}}
    11  {{$mediumJobsPerNamespace := DivideInt $mediumJobs $namespaces}}
    12  {{$largeJobsPerNamespace := DivideInt $largeJobs $namespaces}}
    13  
    14  {{$smallJobSize := 5}}
    15  {{$mediumJobSize := 20}}
    16  {{$largeJobSize := 100}}
    17  
    18  {{$jobRunningTime := DefaultParam .CL2_JOB_RUNNING_TIME "30s"}}
    19  
    20  {{$clusterQueue := "default-cluster-queue"}}
    21  {{$localQueue := "local-queue"}}
    22  
    23  {{$testTimeout := DefaultParam .CL2_TEST_TIMEOUT "5m"}}
    24  
    25  {{$namespacePrefix := "queue-test"}}
    26  
    27  {{$useKueue := DefaultParam .CL2_USE_KUEUE false}}
    28  
    29  name: batch
    30  
    31  namespace:
    32    number: {{$namespaces}}
    33    prefix: {{$namespacePrefix}}
    34  
    35  tuningSets:
    36  - name: UniformQPS
    37    qpsLoad:
    38      qps: {{$LOAD_TEST_THROUGHPUT}}
    39  
    40  steps:
    41  - name: Start measurements
    42    measurements:
    43    - Identifier: Timer
    44      Method: Timer
    45      Params:
    46        action: start
    47        label: job_performance  
    48    - Identifier: WaitForFinishedJobs
    49      Method: WaitForFinishedJobs
    50      Params:
    51        action: start
    52        labelSelector: group = test-job
    53    - Identifier: JobLifecycleLatency
    54      Method: JobLifecycleLatency
    55      Params:
    56        action: start
    57        labelSelector: group = test-job
    58    - Identifier: GenericPrometheusQuery
    59      Method: GenericPrometheusQuery
    60      Params:
    61        action: start
    62        metricName: Job (Kueue) API performance
    63        metricVersion: v1
    64        unit: s
    65        queries:
    66          - name: total_jobs_scheduled
    67            query: count(kube_job_info{namespace=~"{{$namespacePrefix}}.*"})
    68          - name: total_pods_scheduled
    69            query: count(kube_pod_info{namespace=~"{{$namespacePrefix}}.*"})
    70          - name: avg_pod_running_time
    71            query: (avg(kube_pod_completion_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"}))
    72          - name: perc_90_pod_completion_time
    73            query: quantile(0.90, kube_pod_completion_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"})
    74          - name: avg_pod_waiting_time
    75            query: (avg(kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_created{namespace=~"{{$namespacePrefix}}.*"})) 
    76          - name: perc_90_pod_waiting_time
    77            query: quantile(0.90, kube_pod_start_time{namespace=~"{{$namespacePrefix}}.*"} - kube_pod_created{namespace=~"{{$namespacePrefix}}.*"})                
    78          - name: max_job_throughput
    79            query: max_over_time(sum(rate(kueue_admitted_workloads_total{cluster_queue="{{$clusterQueue}}"}[1m]))[{{$testTimeout}}:5s])                               
    80  - name: Sleep
    81    measurements:
    82    - Identifier: sleep
    83      Method: Sleep
    84      Params:
    85        duration: 10s  
    86  {{if $useKueue}}
    87  - name: Create local queue
    88    phases: 
    89    - namespaceRange:
    90        min: 1
    91        max: {{$namespaces}}
    92      replicasPerNamespace: 1
    93      tuningSet: UniformQPS
    94      objectBundle:
    95      - basename: {{$localQueue}}
    96        objectTemplatePath: "local-queue.yaml"
    97        templateFillMap:
    98          ClusterQueue: {{$clusterQueue}}     
    99  {{end}}
   100  - name: Create {{$MODE}} jobs
   101    phases:
   102    - namespaceRange:
   103        min: 1
   104        max: {{$namespaces}}
   105      replicasPerNamespace: {{$smallJobsPerNamespace}}
   106      tuningSet: UniformQPS
   107      objectBundle:
   108      - basename: small
   109        objectTemplatePath: "job.yaml"
   110        templateFillMap:
   111          UseKueue: {{$useKueue}}
   112          Replicas: {{$smallJobSize}}
   113          Mode: {{$MODE}}
   114          Sleep: {{$jobRunningTime}}
   115          LocalQueue: "{{$localQueue}}-0"        
   116    - namespaceRange:
   117        min: 1
   118        max: {{$namespaces}}
   119      replicasPerNamespace: {{$mediumJobsPerNamespace}}
   120      tuningSet: UniformQPS
   121      objectBundle:
   122      - basename: medium
   123        objectTemplatePath: "job.yaml"
   124        templateFillMap:
   125          UseKueue: {{$useKueue}} 
   126          Replicas: {{$mediumJobSize}}
   127          Mode: {{$MODE}}
   128          Sleep: {{$jobRunningTime}}
   129          LocalQueue: "{{$localQueue}}-0"
   130    - namespaceRange:
   131        min: 1
   132        max: {{$namespaces}}
   133      replicasPerNamespace: {{$largeJobsPerNamespace}}
   134      tuningSet: UniformQPS
   135      objectBundle:
   136      - basename: large
   137        objectTemplatePath: "job.yaml"
   138        templateFillMap:
   139          UseKueue: {{$useKueue}}
   140          Replicas: {{$largeJobSize}}
   141          Mode: {{$MODE}}
   142          Sleep: {{$jobRunningTime}}
   143          LocalQueue: "{{$localQueue}}-0"                
   144  - name: Wait for {{$MODE}} jobs to finish
   145    measurements:
   146    - Identifier: JobLifecycleLatency
   147      Method: JobLifecycleLatency
   148      Params:
   149        action: gather
   150        timeout: {{$testTimeout}}       
   151    - Identifier: WaitForFinishedJobs
   152      Method: WaitForFinishedJobs
   153      Params:
   154        action: gather
   155        timeout: {{$testTimeout}}
   156  - name: Stop Timer  
   157    measurements: 
   158    - Identifier: Timer
   159      Method: Timer 
   160      Params:
   161        action: stop
   162        label: job_performance  
   163  - name: Gather Timer  
   164    measurements: 
   165    - Identifier: Timer
   166      Method: Timer 
   167      Params:
   168        action: gather            
   169  - name: Sleep
   170    measurements:
   171    - Identifier: sleep
   172      Method: Sleep
   173      Params:
   174        duration: 30s            
   175  - name: Gather Prometheus measurements
   176    measurements:
   177    - Identifier: GenericPrometheusQuery
   178      Method: GenericPrometheusQuery
   179      Params:
   180        action: gather
   181        enableViolations: true