k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/testing/load/config.yaml

k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/testing/load/config.yaml (about)

     1  # ASSUMPTIONS:
     2  # - Underlying cluster should have 100+ nodes.
     3  # - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
     4  # - The number of created SVCs is half the number of created Deployments.
     5  # - Only half of Deployments will be assigned 1-1 to existing SVCs.
     6  
     7  #Constants
     8  # Cater for the case where the number of nodes is less than nodes per namespace. See https://github.com/kubernetes/perf-tests/issues/887
     9  {{$NODES_PER_NAMESPACE := MinInt .Nodes (DefaultParam .NODES_PER_NAMESPACE 100)}}
    10  # See https://github.com/kubernetes/perf-tests/pull/1667#issuecomment-769642266
    11  {{$IS_SMALL_CLUSTER := lt .Nodes 100}}
    12  {{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
    13  {{$LOAD_TEST_THROUGHPUT := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 10}}
    14  {{$DELETE_TEST_THROUGHPUT := DefaultParam .CL2_DELETE_TEST_THROUGHPUT $LOAD_TEST_THROUGHPUT}}
    15  {{$RATE_LIMIT_POD_CREATION := DefaultParam .CL2_RATE_LIMIT_POD_CREATION true}}
    16  {{$BIG_GROUP_SIZE := DefaultParam .BIG_GROUP_SIZE 250}}
    17  {{$MEDIUM_GROUP_SIZE := DefaultParam .MEDIUM_GROUP_SIZE 30}}
    18  {{$SMALL_GROUP_SIZE := DefaultParam .SMALL_GROUP_SIZE 5}}
    19  {{$SMALL_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .SMALL_STATEFUL_SETS_PER_NAMESPACE 1}}
    20  {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE := DefaultParam .MEDIUM_STATEFUL_SETS_PER_NAMESPACE 1}}
    21  {{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
    22  {{$ENABLE_API_AVAILABILITY_MEASUREMENT := DefaultParam .CL2_ENABLE_API_AVAILABILITY_MEASUREMENT false}}
    23  {{$ENABLE_NETWORK_POLICY_ENFORCEMENT_LATENCY_TEST := DefaultParam .CL2_ENABLE_NETWORK_POLICY_ENFORCEMENT_LATENCY_TEST false}}
    24  {{$RANDOM_SCALE_FACTOR := 0.5}}
    25  #Variables
    26  {{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
    27  {{$totalPods := MultiplyInt $namespaces $NODES_PER_NAMESPACE $PODS_PER_NODE}}
    28  {{$podsPerNamespace := DivideInt $totalPods $namespaces}}
    29  {{$saturationTime := DivideInt $totalPods $LOAD_TEST_THROUGHPUT}}
    30  {{$deletionTime := DivideInt $totalPods $DELETE_TEST_THROUGHPUT}}
    31  # bigDeployments - 1/4 of namespace pods should be in big Deployments.
    32  {{$bigDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $BIG_GROUP_SIZE)}}
    33  # mediumDeployments - 1/4 of namespace pods should be in medium Deployments.
    34  {{$mediumDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 4 $MEDIUM_GROUP_SIZE)}}
    35  # smallDeployments - 1/2 of namespace pods should be in small Deployments.
    36  {{$smallDeploymentsPerNamespace := DivideInt $podsPerNamespace (MultiplyInt 2 $SMALL_GROUP_SIZE)}}
    37  
    38  # Stateful sets are enabled. Reduce the number of small and medium deployments per namespace
    39  # See https://github.com/kubernetes/perf-tests/issues/1036#issuecomment-607631768
    40  # Ensure non zero or negative after subtraction.
    41  {{$smallDeploymentsPerNamespace := MaxInt 0 (SubtractInt $smallDeploymentsPerNamespace $SMALL_STATEFUL_SETS_PER_NAMESPACE)}}
    42  {{$mediumDeploymentsPerNamespace := MaxInt 0 (SubtractInt $mediumDeploymentsPerNamespace $MEDIUM_STATEFUL_SETS_PER_NAMESPACE)}}
    43  
    44  # Jobs are enabled. Reduce the number of small, medium, big deployments per namespace.
    45  # Ensure non zero or negative after subtraction.
    46  {{$smallDeploymentsPerNamespace := MaxInt 0 (SubtractInt $smallDeploymentsPerNamespace 1)}}
    47  {{$mediumDeploymentsPerNamespace := MaxInt 0 (SubtractInt $mediumDeploymentsPerNamespace 1)}}
    48  {{$bigDeploymentsPerNamespace := MaxInt 0 (SubtractInt $bigDeploymentsPerNamespace 1)}}
    49  
    50  # Disable big jobs on small clusters.
    51  {{$bigJobsPerNamespace := IfThenElse $IS_SMALL_CLUSTER 0 1}}
    52  
    53  # The minimal number of pods to be used to measure various things like
    54  # pod-startup-latency or scheduler-throughput. The purpose of it is to avoid
    55  # problems in small clusters where we wouldn't have enough samples (pods) to
    56  # measure things accurately.
    57  {{$MIN_PODS_IN_SMALL_CLUSTERS := 500}}
    58  
    59  # BEGIN scheduler-throughput section
    60  # TODO( https://github.com/kubernetes/perf-tests/issues/1027): Lower the number of "min-pods" once we fix the scheduler throughput measurement.
    61  {{$totalSchedulerThroughputPods := MaxInt (MultiplyInt 2 $MIN_PODS_IN_SMALL_CLUSTERS) .Nodes}}
    62  # Determines number of pods per deployment. Should be a divider of $totalSchedulerThroughputPods.
    63  {{$schedulerThroughputPodsPerDeployment := DefaultParam .CL2_SCHEDULER_THROUGHPUT_PODS_PER_DEPLOYMENT $totalSchedulerThroughputPods}}
    64  {{$schedulerThroughputNamespaces := DivideInt $totalSchedulerThroughputPods $schedulerThroughputPodsPerDeployment}}
    65  
    66  # Set schedulerThroughputNamespaces to 1 on small clusters otherwise it will result
    67  # in an unnecessary number of namespaces.
    68  {{$schedulerThroughputNamespaces := IfThenElse $IS_SMALL_CLUSTER 1 $schedulerThroughputNamespaces}}
    69  # END scheduler-throughput section
    70  
    71  # Command to be executed
    72  {{$EXEC_COMMAND := DefaultParam .CL2_EXEC_COMMAND nil}}
    73  {{$EXIT_AFTER_EXEC := DefaultParam .CL2_EXIT_AFTER_EXEC false}}
    74  {{$EXEC_TIMEOUT := DefaultParam .CL2_EXEC_TIMEOUT "3600s"}}
    75  {{$SLEEP_AFTER_EXEC_DURATION := DefaultParam .CL2_SLEEP_AFTER_EXEC_DURATION "0s"}}
    76  
    77  {{$latencyPodImage := DefaultParam .CL2_LATENCY_POD_IMAGE "registry.k8s.io/pause:3.9"}}
    78  {{$defaultQps := DefaultParam .CL2_DEFAULT_QPS (IfThenElse (le .Nodes 500) 10 100)}}
    79  
    80  {{$ADDITIONAL_MEASUREMENT_MODULES := DefaultParam .CL2_ADDITIONAL_MEASURMENT_MODULES nil}}
    81  {{$ADDITIONAL_PHASES_MODULES := DefaultParam .CL2_ADDITIONAL_PHASES_MODULES nil}}
    82  
    83  name: load
    84  namespace:
    85    number: {{AddInt $namespaces $schedulerThroughputNamespaces}}
    86  tuningSets:
    87  - name: Sequence
    88    parallelismLimitedLoad:
    89      parallelismLimit: 1
    90  # TODO(https://github.com/kubernetes/perf-tests/issues/1024): This TuningSet is used only for pod-startup-latency, get rid of it
    91  # Uniform5qps: for each running phase, use 5 qps.
    92  - name: Uniform5qps
    93    qpsLoad:
    94      qps: 5
    95  # default is a tuningset that is meant to be used when we don't have any specific requirements on pace of operations.
    96  - name: default
    97    globalQPSLoad:
    98      qps: {{$defaultQps}}
    99      burst: 1
   100  - name: RandomizedSaturationTimeLimited
   101    RandomizedTimeLimitedLoad:
   102      timeLimit: {{$saturationTime}}s
   103  - name: RandomizedScalingTimeLimited
   104    RandomizedTimeLimitedLoad:
   105      # The expected number of created/deleted pods is totalPods/4 when scaling,
   106      # as each RS changes its size from X to a uniform random value in [X/2, 3X/2].
   107      # To match 10 [pods/s] requirement, we need to divide saturationTime by 4.
   108      timeLimit: {{DivideInt $saturationTime 4}}s
   109  - name: RandomizedDeletionTimeLimited
   110    RandomizedTimeLimitedLoad:
   111      timeLimit: {{$deletionTime}}s
   112  {{if $ENABLE_CHAOSMONKEY}}
   113  chaosMonkey:
   114    nodeFailure:
   115      failureRate: 0.01
   116      interval: 5m
   117      jitterFactor: 2.0
   118      simulatedDowntime: 10m
   119  {{end}}
   120  steps:
   121  - module:
   122      path: /modules/measurements.yaml
   123      params:
   124        action: start
   125  
   126  {{if $ADDITIONAL_MEASUREMENT_MODULES}}
   127  {{range $ADDITIONAL_MEASUREMENT_MODULES}}
   128  - module:
   129      path: {{.}}
   130      params:
   131        action: start
   132  {{end}}
   133  {{end}}
   134  
   135  {{if $ENABLE_NETWORK_POLICY_ENFORCEMENT_LATENCY_TEST}}
   136  - module:
   137      path: modules/network-policy/net-policy-enforcement-latency.yaml
   138      params:
   139        setup: true
   140        run: true
   141        testType: "pod-creation"
   142  {{end}}
   143  
   144  - module:
   145      path: modules/services.yaml
   146      params:
   147        actionName: "Creating"
   148        namespaces: {{$namespaces}}
   149        smallServicesPerNamespace: {{DivideInt (AddInt $smallDeploymentsPerNamespace 1) 2}}
   150        mediumServicesPerNamespace: {{DivideInt (AddInt $mediumDeploymentsPerNamespace 1) 2}}
   151        bigServicesPerNamespace: {{DivideInt (AddInt $bigDeploymentsPerNamespace 1) 2}}
   152  
   153  - name: Creating PriorityClass for DaemonSets
   154    phases:
   155    - replicasPerNamespace: 1
   156      tuningSet: Sequence
   157      objectBundle:
   158        - basename: daemonset-priorityclass
   159          objectTemplatePath: daemonset-priorityclass.yaml
   160  
   161  # Moved from reconcile-objects.yaml to mitigate https://github.com/kubernetes/kubernetes/issues/96635.
   162  # TODO(https://github.com/kubernetes/perf-tests/issues/1823): Merge back to reconcile-objects.yaml once the k/k bug is fixed.
   163  - module:
   164      path: /modules/configmaps-secrets.yaml
   165      params:
   166        actionName: create
   167        tuningSet: default
   168        namespaces: {{$namespaces}}
   169        bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
   170        mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}}
   171        smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
   172  
   173  - module:
   174      path: /modules/reconcile-objects.yaml
   175      params:
   176        actionName: "create"
   177        namespaces: {{$namespaces}}
   178        {{if $RATE_LIMIT_POD_CREATION}}
   179        tuningSet: RandomizedSaturationTimeLimited
   180        operationTimeout: 15m
   181        {{else}}
   182        tuningSet: default
   183        operationTimeout: {{AddInt $saturationTime 900}}s
   184        {{end}}
   185        testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}}
   186        # We rely on the fact that daemonset is using the same image as the 'pod-startup-latency' module.
   187        # The goal is to cache the image to all nodes before we start any latency pod,
   188        # so that when we measure pod startup latency, the image is already present on all nodes.
   189        # This way, the pod startup latency we measure excludes (or limits impact) of image pulling,
   190        # whuch matches our SLO definition: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md.
   191        daemonSetImage: {{$latencyPodImage}}
   192        daemonSetEnv: "before update"
   193        daemonSetReplicas: 1
   194        bigDeploymentSize: {{$BIG_GROUP_SIZE}}
   195        bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
   196        mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}}
   197        mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}}
   198        smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
   199        smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
   200        smallStatefulSetSize: {{$SMALL_GROUP_SIZE}}
   201        smallStatefulSetsPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
   202        mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}}
   203        mediumStatefulSetsPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
   204        bigJobSize: {{$BIG_GROUP_SIZE}}
   205        bigJobsPerNamespace: {{$bigJobsPerNamespace}}
   206        mediumJobSize: {{$MEDIUM_GROUP_SIZE}}
   207        mediumJobsPerNamespace: 1
   208        smallJobSize: {{$SMALL_GROUP_SIZE}}
   209        smallJobsPerNamespace: 1
   210  
   211  {{if $ADDITIONAL_PHASES_MODULES}}
   212  {{range $ADDITIONAL_PHASES_MODULES}}
   213  - module:
   214      path: {{.}}
   215      params:
   216        action: "create"
   217  {{end}}
   218  {{end}}
   219  
   220  {{if not $IS_SMALL_CLUSTER}}
   221  # BEGIN scheduler throughput
   222  - module:
   223      path: modules/scheduler-throughput.yaml
   224      params:
   225        action: create
   226        namespaces: {{$namespaces}}
   227        replicasPerNamespace: 1
   228        schedulerThroughputNamespaces: {{$schedulerThroughputNamespaces}}
   229        schedulerThroughputPodsPerDeployment: {{$schedulerThroughputPodsPerDeployment}}
   230  {{end}}
   231  
   232  - module:
   233      path: modules/dns-k8s-hostnames.yaml
   234  
   235  {{if $EXEC_COMMAND}}
   236  
   237  {{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
   238  - name: Pausing APIAvailability measurement
   239    measurements:
   240    - Identifier: APIAvailability
   241      Method: APIAvailability
   242      Params:
   243        action: pause
   244  {{end}}
   245  
   246  - name: Exec command
   247    measurements:
   248    - Identifier: ExecCommand
   249      Method: Exec
   250      Params:
   251        timeout: {{$EXEC_TIMEOUT}}
   252        command:
   253        {{range $EXEC_COMMAND}}
   254        - {{.}}
   255        {{end}}
   256  
   257  {{if $ENABLE_API_AVAILABILITY_MEASUREMENT}}
   258  - name: Unpausing APIAvailability measurement
   259    measurements:
   260    - Identifier: APIAvailability
   261      Method: APIAvailability
   262      Params:
   263        action: unpause
   264  {{end}}
   265  
   266  - name: Sleep
   267    measurements:
   268    - Identifier: WaitAfterExec
   269      Method: Sleep
   270      Params:
   271        duration: {{$SLEEP_AFTER_EXEC_DURATION}}
   272  {{end}}
   273  
   274  {{if not $EXIT_AFTER_EXEC}}
   275  
   276  {{if not $IS_SMALL_CLUSTER}}
   277  - module:
   278      path: modules/scheduler-throughput.yaml
   279      params:
   280        action: delete
   281        namespaces: {{$namespaces}}
   282        replicasPerNamespace: 0
   283        schedulerThroughputNamespaces: {{$schedulerThroughputNamespaces}}
   284        schedulerThroughputPodsPerDeployment: {{$schedulerThroughputPodsPerDeployment}}
   285  # END scheduler throughput
   286  {{end}}
   287  
   288  {{if not $IS_SMALL_CLUSTER}}
   289  # TODO(kubernetes/perf-tests/issues/1024): We shouldn't have a dedicated module for measuring pod-startup-latency.
   290  - module:
   291      path: modules/pod-startup-latency.yaml
   292      params:
   293        namespaces: {{$namespaces}}
   294        minPodsInSmallCluster: {{$MIN_PODS_IN_SMALL_CLUSTERS}}
   295        image: {{$latencyPodImage}}
   296  {{end}}
   297  
   298  {{if $ENABLE_NETWORK_POLICY_ENFORCEMENT_LATENCY_TEST}}
   299  - module:
   300      path: modules/network-policy/net-policy-metrics.yaml
   301      params:
   302        action: gather
   303        usePolicyCreationMetrics: false
   304  
   305  - module:
   306      path: modules/network-policy/net-policy-enforcement-latency.yaml
   307      params:
   308        complete: true
   309        testType: "pod-creation"
   310  
   311  - module:
   312      path: modules/network-policy/net-policy-enforcement-latency.yaml
   313      params:
   314        run: true
   315        testType: "policy-creation"
   316  {{end}}
   317  
   318  - module:
   319      path: /modules/reconcile-objects.yaml
   320      params:
   321        actionName: "scale and update"
   322        namespaces: {{$namespaces}}
   323        {{if $RATE_LIMIT_POD_CREATION}}
   324        tuningSet: RandomizedScalingTimeLimited
   325        operationTimeout: 15m
   326        {{else}}
   327        tuningSet: default
   328        operationTimeout: {{AddInt (DivideInt $saturationTime 4) 900}}s
   329        {{end}}
   330        randomScaleFactor: {{$RANDOM_SCALE_FACTOR}}
   331        testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}}
   332        daemonSetImage: {{$latencyPodImage}}
   333        daemonSetEnv: "after update"
   334        daemonSetReplicas: 1
   335        bigDeploymentSize: {{$BIG_GROUP_SIZE}}
   336        bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}}
   337        mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}}
   338        mediumDeploymentsPerNamespace: {{$mediumDeploymentsPerNamespace}}
   339        smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
   340        smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}}
   341        smallStatefulSetSize: {{$SMALL_GROUP_SIZE}}
   342        smallStatefulSetsPerNamespace: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
   343        mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}}
   344        mediumStatefulSetsPerNamespace: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
   345        bigJobSize: {{$BIG_GROUP_SIZE}}
   346        bigJobsPerNamespace: {{$bigJobsPerNamespace}}
   347        mediumJobSize: {{$MEDIUM_GROUP_SIZE}}
   348        mediumJobsPerNamespace: 1
   349        smallJobSize: {{$SMALL_GROUP_SIZE}}
   350        smallJobsPerNamespace: 1
   351  
   352  {{if $ADDITIONAL_PHASES_MODULES}}
   353  {{range $ADDITIONAL_PHASES_MODULES}}
   354  - module:
   355      path: {{.}}
   356      params:
   357        action: "scale and update"
   358  {{end}}
   359  {{end}}
   360  
   361  - module:
   362      path: /modules/reconcile-objects.yaml
   363      params:
   364        actionName: "delete"
   365        namespaces: {{$namespaces}}
   366        {{if $RATE_LIMIT_POD_CREATION}}
   367        tuningSet: RandomizedDeletionTimeLimited
   368        operationTimeout: 15m
   369        {{else}}
   370        tuningSet: default
   371        operationTimeout: {{AddInt $deletionTime 900}}s
   372        {{end}}
   373        testMaxReplicaFactor: {{$RANDOM_SCALE_FACTOR}}
   374        daemonSetReplicas: 0
   375        bigDeploymentSize: {{$BIG_GROUP_SIZE}}
   376        bigDeploymentsPerNamespace: 0
   377        mediumDeploymentSize: {{$MEDIUM_GROUP_SIZE}}
   378        mediumDeploymentsPerNamespace: 0
   379        smallDeploymentSize: {{$SMALL_GROUP_SIZE}}
   380        smallDeploymentsPerNamespace: 0
   381        smallStatefulSetSize: {{$SMALL_GROUP_SIZE}}
   382        smallStatefulSetsPerNamespace: 0
   383        mediumStatefulSetSize: {{$MEDIUM_GROUP_SIZE}}
   384        mediumStatefulSetsPerNamespace: 0
   385        bigJobSize: {{$BIG_GROUP_SIZE}}
   386        bigJobsPerNamespace: 0
   387        mediumJobSize: {{$MEDIUM_GROUP_SIZE}}
   388        mediumJobsPerNamespace: 0
   389        smallJobSize: {{$SMALL_GROUP_SIZE}}
   390        smallJobsPerNamespace: 0
   391        pvSmallStatefulSetSize: {{$SMALL_STATEFUL_SETS_PER_NAMESPACE}}
   392        pvMediumStatefulSetSize: {{$MEDIUM_STATEFUL_SETS_PER_NAMESPACE}}
   393  
   394  {{if $ADDITIONAL_PHASES_MODULES}}
   395  {{range $ADDITIONAL_PHASES_MODULES}}
   396  - module:
   397      path: {{.}}
   398      params:
   399        action: "delete"
   400  {{end}}
   401  {{end}}
   402  
   403  - module:
   404      path: /modules/configmaps-secrets.yaml
   405      params:
   406        actionName: delete
   407        tuningSet: default
   408        namespaces: {{$namespaces}}
   409        bigDeploymentsPerNamespace: 0
   410        mediumDeploymentsPerNamespace: 0
   411        smallDeploymentsPerNamespace: 0
   412  
   413  - name: Deleting PriorityClass for DaemonSets
   414    phases:
   415      - replicasPerNamespace: 0
   416        tuningSet: Sequence
   417        objectBundle:
   418          - basename: daemonset-priorityclass
   419            objectTemplatePath: daemonset-priorityclass.yaml
   420  
   421  - module:
   422      path: modules/services.yaml
   423      params:
   424        actionName: "Deleting"
   425        namespaces: {{$namespaces}}
   426        smallServicesPerNamespace: 0
   427        mediumServicesPerNamespace: 0
   428        bigServicesPerNamespace: 0
   429  {{end}} # not EXIT_AFTER_EXEC
   430  
   431  - module:
   432      path: /modules/measurements.yaml
   433      params:
   434        action: gather
   435  
   436  {{if $ADDITIONAL_MEASUREMENT_MODULES}}
   437  {{range $ADDITIONAL_MEASUREMENT_MODULES}}
   438  - module:
   439      path: {{.}}
   440      params:
   441        action: gather
   442  {{end}}
   443  {{end}}
   444  
   445  {{if $ENABLE_NETWORK_POLICY_ENFORCEMENT_LATENCY_TEST}}
   446  - module:
   447      path: modules/network-policy/net-policy-enforcement-latency.yaml
   448      params:
   449        complete: true
   450        testType: "policy-creation"
   451  {{end}}