k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/testing/density/config.yaml

k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/testing/density/config.yaml (about)

     1  # ASSUMPTIONS:
     2  # - Underlying cluster should have 100+ nodes.
     3  # - Number of nodes should be divisible by NODES_PER_NAMESPACE (default 100).
     4  
     5  #Constants
     6  {{$DENSITY_RESOURCE_CONSTRAINTS_FILE := DefaultParam .DENSITY_RESOURCE_CONSTRAINTS_FILE ""}}
     7  # Cater for the case where the number of nodes is less than nodes per namespace. See https://github.com/kubernetes/perf-tests/issues/887
     8  {{$NODES_PER_NAMESPACE := MinInt .Nodes (DefaultParam .NODES_PER_NAMESPACE 100)}}
     9  {{$PODS_PER_NODE := DefaultParam .PODS_PER_NODE 30}}
    10  {{$DENSITY_TEST_THROUGHPUT := DefaultParam .DENSITY_TEST_THROUGHPUT 20}}
    11  {{$SCHEDULER_THROUGHPUT_THRESHOLD := DefaultParam .CL2_SCHEDULER_THROUGHPUT_THRESHOLD 0}}
    12  # LATENCY_POD_MEMORY and LATENCY_POD_CPU are calculated for 1-core 4GB node.
    13  # Increasing allocation of both memory and cpu by 10%
    14  # decreases the value of priority function in scheduler by one point.
    15  # This results in decreased probability of choosing the same node again.
    16  {{$LATENCY_POD_CPU := DefaultParam .LATENCY_POD_CPU 100}}
    17  {{$LATENCY_POD_MEMORY := DefaultParam .LATENCY_POD_MEMORY 350}}
    18  {{$MIN_LATENCY_PODS := DefaultParam .MIN_LATENCY_PODS 500}}
    19  {{$MIN_SATURATION_PODS_TIMEOUT := 180}}
    20  {{$ENABLE_CHAOSMONKEY := DefaultParam .ENABLE_CHAOSMONKEY false}}
    21  {{$ENABLE_SYSTEM_POD_METRICS:= DefaultParam .ENABLE_SYSTEM_POD_METRICS true}}
    22  {{$ENABLE_CLUSTER_OOMS_TRACKER := DefaultParam .CL2_ENABLE_CLUSTER_OOMS_TRACKER true}}
    23  {{$CLUSTER_OOMS_IGNORED_PROCESSES := DefaultParam .CL2_CLUSTER_OOMS_IGNORED_PROCESSES ""}}
    24  {{$USE_SIMPLE_LATENCY_QUERY := DefaultParam .USE_SIMPLE_LATENCY_QUERY false}}
    25  {{$ENABLE_RESTART_COUNT_CHECK := DefaultParam .ENABLE_RESTART_COUNT_CHECK true}}
    26  {{$RESTART_COUNT_THRESHOLD_OVERRIDES:= DefaultParam .RESTART_COUNT_THRESHOLD_OVERRIDES ""}}
    27  {{$ALLOWED_SLOW_API_CALLS := DefaultParam .CL2_ALLOWED_SLOW_API_CALLS 0}}
    28  {{$ENABLE_VIOLATIONS_FOR_SCHEDULING_THROUGHPUT := DefaultParam .CL2_ENABLE_VIOLATIONS_FOR_SCHEDULING_THROUGHPUT true}}
    29  #Variables
    30  {{$namespaces := DivideInt .Nodes $NODES_PER_NAMESPACE}}
    31  {{$podsPerNamespace := MultiplyInt $PODS_PER_NODE $NODES_PER_NAMESPACE}}
    32  {{$totalPods := MultiplyInt $podsPerNamespace $namespaces}}
    33  {{$latencyReplicas := DivideInt (MaxInt $MIN_LATENCY_PODS .Nodes) $namespaces}}
    34  {{$totalLatencyPods := MultiplyInt $namespaces $latencyReplicas}}
    35  {{$saturationDeploymentTimeout := DivideFloat $totalPods $DENSITY_TEST_THROUGHPUT | AddInt $MIN_SATURATION_PODS_TIMEOUT}}
    36  # saturationDeploymentHardTimeout must be at least 20m to make sure that ~10m node
    37  # failure won't fail the test. See https://github.com/kubernetes/kubernetes/issues/73461#issuecomment-467338711
    38  {{$saturationDeploymentHardTimeout := MaxInt $saturationDeploymentTimeout 1200}}
    39  
    40  {{$saturationDeploymentSpec := DefaultParam .SATURATION_DEPLOYMENT_SPEC "deployment.yaml"}}
    41  {{$latencyDeploymentSpec := DefaultParam .LATENCY_DEPLOYMENT_SPEC "deployment.yaml"}}
    42  
    43  # Probe measurements shared parameter
    44  {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT := DefaultParam .CL2_PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT "15m"}}
    45  
    46  name: density
    47  namespace:
    48    number: {{$namespaces}}
    49  tuningSets:
    50  - name: Uniform5qps
    51    qpsLoad:
    52      qps: 5
    53  {{if $ENABLE_CHAOSMONKEY}}
    54  chaosMonkey:
    55    nodeFailure:
    56      failureRate: 0.01
    57      interval: 1m
    58      jitterFactor: 10.0
    59      simulatedDowntime: 10m
    60  {{end}}
    61  steps:
    62  - name: Starting measurements
    63    measurements:
    64    - Identifier: APIResponsivenessPrometheus
    65      Method: APIResponsivenessPrometheus
    66      Params:
    67        action: start
    68    - Identifier: APIResponsivenessPrometheusSimple
    69      Method: APIResponsivenessPrometheus
    70      Params:
    71        action: start
    72    # TODO(oxddr): figure out how many probers to run in function of cluster
    73    - Identifier: InClusterNetworkLatency
    74      Method: InClusterNetworkLatency
    75      Params:
    76        action: start
    77        checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}}
    78        replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
    79    - Identifier: DnsLookupLatency
    80      Method: DnsLookupLatency
    81      Params:
    82        action: start
    83        checkProbesReadyTimeout: {{$PROBE_MEASUREMENTS_CHECK_PROBES_READY_TIMEOUT}}
    84        replicasPerProbe: {{AddInt 2 (DivideInt .Nodes 100)}}
    85    - Identifier: TestMetrics
    86      Method: TestMetrics
    87      Params:
    88        action: start
    89        resourceConstraints: {{$DENSITY_RESOURCE_CONSTRAINTS_FILE}}
    90        systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
    91        clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
    92        clusterOOMsIgnoredProcesses: {{$CLUSTER_OOMS_IGNORED_PROCESSES}}
    93        restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
    94        enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}
    95  
    96  - name: Starting saturation pod measurements
    97    measurements:
    98    - Identifier: SaturationPodStartupLatency
    99      Method: PodStartupLatency
   100      Params:
   101        action: start
   102        labelSelector: group = saturation
   103        threshold: {{$saturationDeploymentTimeout}}s
   104    - Identifier: WaitForRunningSaturationDeployments
   105      Method: WaitForControlledPodsRunning
   106      Params:
   107        action: start
   108        apiVersion: apps/v1
   109        kind: Deployment
   110        labelSelector: group = saturation
   111        operationTimeout: {{$saturationDeploymentHardTimeout}}s
   112    - Identifier: SchedulingThroughput
   113      Method: SchedulingThroughput
   114      Params:
   115        action: start
   116        labelSelector: group = saturation
   117  
   118  - name: Creating saturation pods
   119    phases:
   120    - namespaceRange:
   121        min: 1
   122        max: {{$namespaces}}
   123      replicasPerNamespace: 1
   124      tuningSet: Uniform5qps
   125      objectBundle:
   126      - basename: saturation-deployment
   127        objectTemplatePath: {{$saturationDeploymentSpec}}
   128        templateFillMap:
   129          Replicas: {{$podsPerNamespace}}
   130          Group: saturation
   131          CpuRequest: 1m
   132          MemoryRequest: 10M
   133  
   134  - name: Waiting for saturation pods to be running
   135    measurements:
   136    - Identifier: WaitForRunningSaturationDeployments
   137      Method: WaitForControlledPodsRunning
   138      Params:
   139        action: gather
   140  
   141  - name: Collecting saturation pod measurements
   142    measurements:
   143    - Identifier: SaturationPodStartupLatency
   144      Method: PodStartupLatency
   145      Params:
   146        action: gather
   147    - Identifier: SchedulingThroughput
   148      Method: SchedulingThroughput
   149      Params:
   150        action: gather
   151        enableViolations: {{$ENABLE_VIOLATIONS_FOR_SCHEDULING_THROUGHPUT}}
   152        threshold: {{$SCHEDULER_THROUGHPUT_THRESHOLD}}
   153  
   154  - name: Starting latency pod measurements
   155    measurements:
   156    - Identifier: PodStartupLatency
   157      Method: PodStartupLatency
   158      Params:
   159        action: start
   160        labelSelector: group = latency
   161    - Identifier: WaitForRunningLatencyDeployments
   162      Method: WaitForControlledPodsRunning
   163      Params:
   164        action: start
   165        apiVersion: apps/v1
   166        kind: Deployment
   167        labelSelector: group = latency
   168        operationTimeout: 15m
   169  
   170  - name: Creating latency pods
   171    phases:
   172    - namespaceRange:
   173        min: 1
   174        max: {{$namespaces}}
   175      replicasPerNamespace: {{$latencyReplicas}}
   176      tuningSet: Uniform5qps
   177      objectBundle:
   178      - basename: latency-deployment
   179        objectTemplatePath: {{$latencyDeploymentSpec}}
   180        templateFillMap:
   181          Replicas: 1
   182          Group: latency
   183          CpuRequest: {{$LATENCY_POD_CPU}}m
   184          MemoryRequest: {{$LATENCY_POD_MEMORY}}M
   185  
   186  - name: Waiting for latency pods to be running
   187    measurements:
   188    - Identifier: WaitForRunningLatencyDeployments
   189      Method: WaitForControlledPodsRunning
   190      Params:
   191        action: gather
   192  
   193  - name: Deleting latency pods
   194    phases:
   195    - namespaceRange:
   196        min: 1
   197        max: {{$namespaces}}
   198      replicasPerNamespace: 0
   199      tuningSet: Uniform5qps
   200      objectBundle:
   201      - basename: latency-deployment
   202        objectTemplatePath: {{$latencyDeploymentSpec}}
   203  
   204  - name: Waiting for latency pods to be deleted
   205    measurements:
   206    - Identifier: WaitForRunningLatencyDeployments
   207      Method: WaitForControlledPodsRunning
   208      Params:
   209        action: gather
   210  
   211  - name: Collecting pod startup latency
   212    measurements:
   213    - Identifier: PodStartupLatency
   214      Method: PodStartupLatency
   215      Params:
   216        action: gather
   217  
   218  - name: Deleting saturation pods
   219    phases:
   220    - namespaceRange:
   221        min: 1
   222        max: {{$namespaces}}
   223      replicasPerNamespace: 0
   224      tuningSet: Uniform5qps
   225      objectBundle:
   226      - basename: saturation-deployment
   227        objectTemplatePath: {{$saturationDeploymentSpec}}
   228  
   229  - name: Waiting for saturation pods to be deleted
   230    measurements:
   231    - Identifier: WaitForRunningSaturationDeployments
   232      Method: WaitForControlledPodsRunning
   233      Params:
   234        action: gather
   235  
   236  - name: Collecting measurements
   237    measurements:
   238    - Identifier: APIResponsivenessPrometheusSimple
   239      Method: APIResponsivenessPrometheus
   240      Params:
   241        action: gather
   242        enableViolations: true
   243        useSimpleLatencyQuery: true
   244        summaryName: APIResponsivenessPrometheus_simple
   245        allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}}
   246    {{if not $USE_SIMPLE_LATENCY_QUERY}}
   247    - Identifier: APIResponsivenessPrometheus
   248      Method: APIResponsivenessPrometheus
   249      Params:
   250        action: gather
   251        allowedSlowCalls: {{$ALLOWED_SLOW_API_CALLS}}
   252    {{end}}
   253    - Identifier: InClusterNetworkLatency
   254      Method: InClusterNetworkLatency
   255      Params:
   256        action: gather
   257    - Identifier: DnsLookupLatency
   258      Method: DnsLookupLatency
   259      Params:
   260        action: gather
   261    - Identifier: TestMetrics
   262      Method: TestMetrics
   263      Params:
   264        action: gather
   265        systemPodMetricsEnabled: {{$ENABLE_SYSTEM_POD_METRICS}}
   266        clusterOOMsTrackerEnabled: {{$ENABLE_CLUSTER_OOMS_TRACKER}}
   267        restartCountThresholdOverrides: {{YamlQuote $RESTART_COUNT_THRESHOLD_OVERRIDES 4}}
   268        enableRestartCountCheck: {{$ENABLE_RESTART_COUNT_CHECK}}