k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/testing/density/high-density-config.yaml

k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/testing/density/high-density-config.yaml (about)

     1  # TODO(https://github.com/kubernetes/perf-tests/issues/1007): Make it possible to run high density as part of the load test.
     2  # ASSUMPTIONS:
     3  # - Underlying cluster should have 100+ nodes.
     4  # - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
     5  
     6  #Constants
     7  {{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}}
     8  # Cater for the case where the number of nodes is less than nodes per namespace. See https://github.com/kubernetes/perf-tests/issues/887
     9  {{$NODES_PER_NAMESPACE := MinInt .Nodes (DefaultParam .NODES_PER_NAMESPACE 100)}}
    10  {{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
    11  {{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}}
    12  {{$SCHEDULER_THROUGHPUT_THRESHOLD := DefaultParam .CL2_SCHEDULER_THROUGHPUT_THRESHOLD 0}}
    13  # LATENCY_POD_MEMORY and LATENCY_POD_CPU are calculated for 1-core 4GB node.
    14  # Increasing allocation of both memory and cpu by 10%
    15  # decreases the value of priority function in scheduler by one point.
    16  # This results in decreased probability of choosing the same node again.
    17  {{$LATENCY_POD_CPU := DefaultParam .LATENCY_POD_CPU 100}}
    18  {{$LATENCY_POD_MEMORY := DefaultParam .LATENCY_POD_MEMORY 350}}
    19  {{$MIN_LATENCY_PODS := 500}}
    20  {{$MIN_SATURATION_PODS_TIMEOUT := 180}}
    21  {{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
    22  {{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
    23  {{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}
    24  {{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK false}}
    25  {{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}}
    26  #Variables
    27  {{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
    28  {{$podsPerNamespace := MultiplyInt $PODS_PER_NODE $NODES_PER_NAMESPACE}}
    29  {{$totalPods := MultiplyInt $podsPerNamespace $namespaces}}
    30  {{$latencyReplicas := DivideInt (MaxInt $MIN_LATENCY_PODS .Nodes) $namespaces}}
    31  {{$totalLatencyPods := MultiplyInt $namespaces $latencyReplicas}}
    32  {{$saturationDeploymentTimeout := DivideFloat $totalPods $DENSITY_TEST_THROUGHPUT | AddInt $MIN_SATURATION_PODS_TIMEOUT}}
    33  # saturationDeploymentHardTimeout must be at least 20m to make sure that ~10m node
    34  # failure won't fail the test. See https://github.com/kubernetes/kubernetes/issues/73461#issuecomment-467338711
    35  {{$saturationDeploymentHardTimeout := MaxInt $saturationDeploymentTimeout 1200}}
    36  
    37  {{$saturationDeploymentSpec := DefaultParam .SATURATION_DEPLOYMENT_SPEC "deployment.yaml"}}
    38  {{$latencyDeploymentSpec := DefaultParam .LATENCY_DEPLOYMENT_SPEC "deployment.yaml"}}
    39  
    40  name: density
    41  namespace:
    42    number: {{$namespaces}}
    43  tuningSets:
    44  - name: Uniform5qps
    45    qpsLoad:
    46      qps: 5
    47  {{if $ENABLE_CHAOSMONKEY}}
    48  chaosMonkey:
    49    nodeFailure:
    50      failureRate: 0.01
    51      interval: 1m
    52      jitterFactor: 10.0
    53      simulatedDowntime: 10m
    54  {{end}}
    55  steps:
    56  - name: Starting measurements
    57    measurements:
    58    - Identifier: APIResponsivenessPrometheus
    59      Method: APIResponsivenessPrometheus
    60      Params:
    61        action: start
    62    - Identifier: APIResponsivenessPrometheusSimple
    63      Method: APIResponsivenessPrometheus
    64      Params:
    65        action: start
    66    # TODO(oxddr): figure out how many probers to run in function of cluster
    67    - Identifier: InClusterNetworkLatency
    68      Method: InClusterNetworkLatency
    69      Params:
    70        action: start
    71        replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
    72    - Identifier: DnsLookupLatency
    73      Method: DnsLookupLatency
    74      Params:
    75        action: start
    76        replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
    77    - Identifier: TestMetrics
    78      Method: TestMetrics
    79      Params:
    80        action: start
    81        resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}}
    82        systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
    83        restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
    84        enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}
    85  
    86  - name: Starting saturation pod measurements
    87    measurements:
    88    - Identifier: SaturationPodStartupLatency
    89      Method: PodStartupLatency
    90      Params:
    91        action: start
    92        labelSelector: group = saturation
    93        threshold: {{$saturationDeploymentTimeout}}s
    94    - Identifier: WaitForRunningSaturationDeployments
    95      Method: WaitForControlledPodsRunning
    96      Params:
    97        action: start
    98        apiVersion: apps/v1
    99        kind: Deployment
   100        labelSelector: group = saturation
   101        operationTimeout: {{$saturationDeploymentHardTimeout}}s
   102    - Identifier: SchedulingThroughput
   103      Method: SchedulingThroughput
   104      Params:
   105        action: start
   106        labelSelector: group = saturation
   107  
   108  - name: Creating saturation pods
   109    phases:
   110    - namespaceRange:
   111        min: 1
   112        max: {{$namespaces}}
   113      replicasPerNamespace: 1
   114      tuningSet: Uniform5qps
   115      objectBundle:
   116      - basename: saturation-deployment
   117        objectTemplatePath: {{$saturationDeploymentSpec}}
   118        templateFillMap:
   119          Replicas: {{$podsPerNamespace}}
   120          Group: saturation
   121          CpuRequest: 1m
   122          MemoryRequest: 10M
   123  
   124  - name: Collecting saturation pod measurements
   125    measurements:
   126    - Identifier: WaitForRunningSaturationDeployments
   127      Method: WaitForControlledPodsRunning
   128      Params:
   129        action: gather
   130  - measurements:
   131    - Identifier: SaturationPodStartupLatency
   132      Method: PodStartupLatency
   133      Params:
   134        action: gather
   135  - measurements:
   136    - Identifier: SchedulingThroughput
   137      Method: SchedulingThroughput
   138      Params:
   139        action: gather
   140        threshold: {{$SCHEDULER_THROUGHPUT_THRESHOLD}}
   141  
   142  - name: Starting latency pod measurements
   143    measurements:
   144    - Identifier: PodStartupLatency
   145      Method: PodStartupLatency
   146      Params:
   147        action: start
   148        labelSelector: group = latency
   149    - Identifier: WaitForRunningLatencyDeployments
   150      Method: WaitForControlledPodsRunning
   151      Params:
   152        action: start
   153        apiVersion: apps/v1
   154        kind: Deployment
   155        labelSelector: group = latency
   156        operationTimeout: 15m
   157  
   158  - name: Creating latency pods
   159    phases:
   160    - namespaceRange:
   161        min: 1
   162        max: {{$namespaces}}
   163      replicasPerNamespace: {{$latencyReplicas}}
   164      tuningSet: Uniform5qps
   165      objectBundle:
   166      - basename: latency-deployment
   167        objectTemplatePath: {{$latencyDeploymentSpec}}
   168        templateFillMap:
   169          Replicas: 1
   170          Group: latency
   171          CpuRequest: {{$LATENCY_POD_CPU}}m
   172          MemoryRequest: {{$LATENCY_POD_MEMORY}}M
   173  
   174  - name: Waiting for latency pods to be running
   175    measurements:
   176    - Identifier: WaitForRunningLatencyDeployments
   177      Method: WaitForControlledPodsRunning
   178      Params:
   179        action: gather
   180  
   181  - name: Deleting latency pods
   182    phases:
   183    - namespaceRange:
   184        min: 1
   185        max: {{$namespaces}}
   186      replicasPerNamespace: 0
   187      tuningSet: Uniform5qps
   188      objectBundle:
   189      - basename: latency-deployment
   190        objectTemplatePath: {{$latencyDeploymentSpec}}
   191  
   192  - name: Waiting for latency pods to be deleted
   193    measurements:
   194    - Identifier: WaitForRunningLatencyDeployments
   195      Method: WaitForControlledPodsRunning
   196      Params:
   197        action: gather
   198  
   199  - name: Collecting pod startup latency
   200    measurements:
   201    - Identifier: PodStartupLatency
   202      Method: PodStartupLatency
   203      Params:
   204        action: gather
   205  
   206  - name: Deleting saturation pods
   207    phases:
   208    - namespaceRange:
   209        min: 1
   210        max: {{$namespaces}}
   211      replicasPerNamespace: 0
   212      tuningSet: Uniform5qps
   213      objectBundle:
   214      - basename: saturation-deployment
   215        objectTemplatePath: {{$saturationDeploymentSpec}}
   216  
   217  - name: Waiting for saturation pods to be deleted
   218    measurements:
   219    - Identifier: WaitForRunningSaturationDeployments
   220      Method: WaitForControlledPodsRunning
   221      Params:
   222        action: gather
   223  
   224  - name: Collecting measurements
   225    measurements:
   226    - Identifier: APIResponsivenessPrometheusSimple
   227      Method: APIResponsivenessPrometheus
   228      Params:
   229        action: gather
   230        enableViolations: true
   231        useSimpleLatencyQuery: true
   232        summaryName: APIResponsivenessPrometheus_simple
   233    {{if not $USE_SIMPLE_LATENCY_QUERY}}
   234    - Identifier: APIResponsivenessPrometheus
   235      Method: APIResponsivenessPrometheus
   236      Params:
   237        action: gather
   238    {{end}}
   239    - Identifier: InClusterNetworkLatency
   240      Method: InClusterNetworkLatency
   241      Params:
   242        action: gather
   243    - Identifier: DnsLookupLatency
   244      Method: DnsLookupLatency
   245      Params:
   246        action: gather
   247    - Identifier: TestMetrics
   248      Method: TestMetrics
   249      Params:
   250        action: gather
   251        systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
   252        restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
   253        enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}