k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/jobs/kubernetes/sig-scalability/sig-scalability-presets.yaml

k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/jobs/kubernetes/sig-scalability/sig-scalability-presets.yaml (about)

     1  presets:
     2  ###### Kubemark envs
     3  ### Common env variables for all kubemark-related suites.
     4  - labels:
     5      preset-e2e-kubemark-common: "true"
     6    env:
     7    - name: KUBE_GCS_UPDATE_LATEST
     8      value: "n"
     9    - name: KUBE_FASTBUILD
    10      value: "true"
    11    - name: KUBE_GCE_ENABLE_IP_ALIASES
    12      value: "true"
    13    - name: CREATE_CUSTOM_NETWORK
    14      value: "true"
    15    - name: ENABLE_HOLLOW_NODE_LOGS
    16      value: "true"
    17    # Turn on profiling for various components.
    18    - name: ETCD_TEST_ARGS
    19      value: "--enable-pprof"
    20    - name: APISERVER_TEST_ARGS
    21      value: "--profiling --contention-profiling"
    22    # Number of bytes of an additional nodes objects annotation in a kubemark
    23    # cluster. The annotation label is added to make nodes objects sizes similar
    24    # to regular cluster nodes.
    25    - name: KUBEMARK_NODE_OBJECT_SIZE_BYTES
    26      value: 15000
    27    # Increase throughput in Kubemark master components and turn on profiling.
    28    - name: KUBEMARK_CONTROLLER_MANAGER_TEST_ARGS
    29      value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100"
    30    - name: KUBEMARK_SCHEDULER_TEST_ARGS
    31      value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100"
    32    # Reduce logs verbosity
    33    - name: TEST_CLUSTER_LOG_LEVEL
    34      value: "--v=2"
    35    - name: API_SERVER_TEST_LOG_LEVEL
    36      value: "--v=3"
    37    # Increase controller-manager's resync period to simulate production.
    38    - name: TEST_CLUSTER_RESYNC_PERIOD
    39      value: "--min-resync-period=12h"
    40    # Reduce etcd compaction frequency to match production.
    41    - name: KUBEMARK_ETCD_COMPACTION_INTERVAL_SEC
    42      value: "150"
    43    # Allow one node to not be ready after cluster creation.
    44    - name: ALLOWED_NOTREADY_NODES
    45      value: 1
    46    - name: ENABLE_PROMETHEUS_SERVER
    47      value: "true"
    48    - name: KUBE_MASTER_NODE_LABELS
    49      value: "node.kubernetes.io/node-exporter-ready=true"
    50    # Keep all logrotated files (not just 5 latest which is a default)
    51    - name: LOGROTATE_FILES_MAX_COUNT
    52      value: 1000
    53    - name: LOGROTATE_MAX_SIZE
    54      value: "5G"
    55    # Ensure good enough architecture for master machines.
    56    - name: MASTER_MIN_CPU_ARCHITECTURE
    57      value: "Intel Ice Lake"
    58    # Increase delete collection parallelism.
    59    - name: TEST_CLUSTER_DELETE_COLLECTION_WORKERS
    60      value: --delete-collection-workers=16
    61    # Dump full systemd journal on master and nodes.
    62    - name: LOG_DUMP_SYSTEMD_JOURNAL
    63      value: "true"
    64    # Timeout for the log dumping over SSH. Relevant only if fallback to SSH log dumping takes place
    65    # e.g. when logexporter daemonset fails for some reason.
    66    # We deliberately cap it at 1h to avoid spending too much time (e.g. over 5h for 5k node cluster)
    67    # on dumping logs that in most cases we won't need anyway.
    68    - name: LOG_DUMP_SSH_TIMEOUT_SECONDS
    69      value: 3600
    70    # Use private clusters for scalability tests - https://github.com/kubernetes/kubernetes/issues/76374
    71    - name: KUBE_GCE_PRIVATE_CLUSTER
    72      value: "true"
    73    # We create approx. 70 hollow nodes per VM. Allow ~4 connections from each of them.
    74    - name: KUBE_GCE_PRIVATE_CLUSTER_PORTS_PER_VM
    75      value: 300
    76    - name: PROMETHEUS_SCRAPE_ETCD
    77      value: "true"
    78    # Disable kubernetes-dashboard
    79    - name: KUBE_ENABLE_CLUSTER_UI
    80      value: "false"
    81    # Enable assertions on scheduler throughput in density test.
    82    # Setting the threshold to 90 should allow us to catch regressions like
    83    # https://github.com/kubernetes/kubernetes/pull/85030 while not making the tests flaky.
    84    - name: CL2_SCHEDULER_THROUGHPUT_THRESHOLD
    85      value: 90
    86    - name: CL2_ALLOWED_SLOW_API_CALLS
    87      value: 1
    88    # Disable PVs until these are fixed in Kubemark:
    89    # https://github.com/kubernetes/perf-tests/issues/803
    90    - name: CL2_ENABLE_PVS
    91      value: "false"
    92    # Switch to using log-dump.sh script included in the kubekins-e2e image
    93    # instead of relying on the deprecated one from k/k repository.
    94    - name: USE_TEST_INFRA_LOG_DUMPING
    95      value: "true"
    96    # If log dumping of nodes is enabled and logexporter creation fails or less than 50 %
    97    # of the nodes got logexported successfully, then report a failure.
    98    - name: LOG_DUMP_EXPECTED_SUCCESS_PERCENTAGE
    99      value: 50
   100    - name: PERF_TESTS_PRINT_COMMIT_HISTORY
   101      value: true
   102    - name: DUMP_TO_GCS_ONLY
   103      value: true
   104    # Disable konnectivity in kubemark as it doesn't work (see https://github.com/kubernetes/perf-tests/issues/1828)
   105    # TODO(https://github.com/kubernetes/perf-tests/issues/1828): Use konnectivity in kubemark.
   106    - name: KUBE_ENABLE_KONNECTIVITY_SERVICE
   107      value: false
   108    - name: DEPLOY_GCI_DRIVER
   109      value: true
   110    - name: PROMETHEUS_STORAGE_CLASS_PROVISIONER
   111      value: pd.csi.storage.gke.io
   112  
   113  ### kubemark-gce-scale
   114  - labels:
   115      preset-e2e-kubemark-gce-scale: "true"
   116    env:
   117    # kubernetes env
   118    # TODO: Remove this after kube-proxy improvements.
   119    - name: USE_REAL_PROXIER
   120      value: "false"
   121    - name: HOLLOW_PROXY_TEST_ARGS
   122      value: "--use-real-proxier=false"
   123    - name: HEAPSTER_KUBELET_TEST_ARGS
   124      value: "--register-with-taints=monitoring=:NoSchedule"
   125    # Heapster node is needed in order for Prometheus pods to fit in a cluster.
   126    - name: HEAPSTER_MACHINE_TYPE
   127      value: "e2-standard-8"
   128  
   129  
   130  ###### Scalability Envs
   131  ### Common env variables for all scalability-related suites.
   132  - labels:
   133      preset-e2e-scalability-common: "true"
   134    env:
   135    # Override GCE defaults.
   136    - name: NODE_SIZE
   137      value: "e2-medium"
   138    - name: NODE_DISK_SIZE
   139      value: "50GB"
   140    - name: REGISTER_MASTER
   141      value: "true"
   142    - name: LOGROTATE_MAX_SIZE
   143      value: "5G"
   144    # Use IP-aliases for scalability tests.
   145    - name: KUBE_GCE_ENABLE_IP_ALIASES
   146      value: "true"
   147    - name: CREATE_CUSTOM_NETWORK
   148      value: "true"
   149    # Ensure good enough architecture for master machines.
   150    - name: MASTER_MIN_CPU_ARCHITECTURE
   151      value: "Intel Ice Lake"
   152    - name: MASTER_SIZE
   153      value: "n2-standard-32"
   154    # Turn on profiling for various components and
   155    # increase throughput in master components.
   156    - name: ETCD_EXTRA_ARGS
   157      value: "--enable-pprof"
   158    - name: APISERVER_TEST_ARGS
   159      value: "--profiling --contention-profiling"
   160    - name: CONTROLLER_MANAGER_TEST_ARGS
   161      value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100"
   162    - name: KUBELET_TEST_ARGS
   163      value: "--enable-debugging-handlers --kube-api-qps=100 --kube-api-burst=100"
   164    - name: NODE_KUBELET_TEST_ARGS
   165      # e2-medium machines report the capacity of 2cpu.
   166      # We adjust the allocatable to match reality and additionally tweak it
   167      # to achieve roughly 1:4 allocatable cpu to memory ration.
   168      value: "--kube-reserved=cpu=1050m"
   169    - name: KUBEPROXY_TEST_ARGS
   170      # TODO(#74011): Remove metrics-bind-address if the default is set.
   171      # FeatureGate added in #110268 v1.26
   172      value: "--profiling --metrics-bind-address=0.0.0.0"
   173    - name: SCHEDULER_TEST_ARGS
   174      value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100"
   175    # Reduce logs verbosity.
   176    - name: TEST_CLUSTER_LOG_LEVEL
   177      value: --v=2
   178    - name: API_SERVER_TEST_LOG_LEVEL
   179      value: "--v=3"
   180    # Increase resync period to simulate production.
   181    - name: TEST_CLUSTER_RESYNC_PERIOD
   182      value: --min-resync-period=12h
   183    # Reduce etcd compaction frequency to match production.
   184    - name: ETCD_COMPACTION_INTERVAL_SEC
   185      value: "150"
   186    # Increase delete collection parallelism.
   187    - name: TEST_CLUSTER_DELETE_COLLECTION_WORKERS
   188      value: --delete-collection-workers=16
   189    # Dump full systemd journal on master and nodes.
   190    - name: LOG_DUMP_SYSTEMD_JOURNAL
   191      value: "true"
   192    # Dump clusterloader prober's log files
   193    - name: LOG_DUMP_EXTRA_FILES
   194      value: "cl2-*"
   195    # Timeout for the log dumping over SSH. Relevant only if fallback to SSH log dumping takes place
   196    # e.g. when logexporter daemonset fails for some reason.
   197    # We deliberately cap it at 1h to avoid spending too much time (e.g. over 5h for 5k node cluster)
   198    # on dumping logs that in most cases we won't need anyway.
   199    - name: LOG_DUMP_SSH_TIMEOUT_SECONDS
   200      value: 3600
   201    # Keep all logrotated files (not just 5 latest which is a default)
   202    - name: LOGROTATE_FILES_MAX_COUNT
   203      value: 1000
   204    - name: ENABLE_PROMETHEUS_SERVER
   205      value: "true"
   206    - name: KUBE_MASTER_NODE_LABELS
   207      value: "node.kubernetes.io/node-exporter-ready=true"
   208    # Use private clusters for scalability tests - https://github.com/kubernetes/kubernetes/issues/76374
   209    - name: KUBE_GCE_PRIVATE_CLUSTER
   210      value: "true"
   211    - name: PROMETHEUS_SCRAPE_ETCD
   212      value: "true"
   213    # Disable kubernetes-dashboard
   214    - name: KUBE_ENABLE_CLUSTER_UI
   215      value: "false"
   216    # Enable assertions on scheduler throughput in density test.
   217    # Setting the threshold to 90 should allow us to catch regressions like
   218    # https://github.com/kubernetes/kubernetes/pull/85030 while not making the tests flaky.
   219    - name: CL2_SCHEDULER_THROUGHPUT_THRESHOLD
   220      value: 90
   221    - name: CL2_ALLOWED_SLOW_API_CALLS
   222      value: 1
   223    # Override of the default list of whitelisted resources during addons reconciliation
   224    # performed by kube-addon-manager. This is the same as the default list in the script
   225    # k/k/cluster/addons/addon-manager/kube-addons.sh but without core/v1/Pod resource.
   226    # As explained in https://github.com/kubernetes/kubernetes/pull/91018, this results
   227    # in a Kubernetes cluster performance bump up.
   228    - name: KUBECTL_PRUNE_WHITELIST_OVERRIDE
   229      value: >-
   230        core/v1/ConfigMap
   231        core/v1/Endpoints
   232        core/v1/Namespace
   233        core/v1/PersistentVolumeClaim
   234        core/v1/PersistentVolume
   235        core/v1/ReplicationController
   236        core/v1/Secret
   237        core/v1/Service
   238        batch/v1/Job
   239        batch/v1/CronJob
   240        apps/v1/DaemonSet
   241        apps/v1/Deployment
   242        apps/v1/ReplicaSet
   243        apps/v1/StatefulSet
   244        networking.k8s.io/v1/Ingress
   245    # Switch to using log-dump.sh script included in the kubekins-e2e image
   246    # instead of relying on the deprecated one from k/k repository.
   247    - name: USE_TEST_INFRA_LOG_DUMPING
   248      value: "true"
   249    # If log dumping of nodes is enabled and logexporter creation fails or less than 50 %
   250    # of the nodes got logexported successfully, then report a failure.
   251    - name: LOG_DUMP_EXPECTED_SUCCESS_PERCENTAGE
   252      value: 50
   253    - name: PERF_TESTS_PRINT_COMMIT_HISTORY
   254      value: true
   255    - name: LOG_DUMP_SAVE_SERVICES
   256      value: "containerd"
   257    - name: DUMP_TO_GCS_ONLY
   258      value: true
   259    - name: DEPLOY_GCI_DRIVER
   260      value: true
   261    - name: PROMETHEUS_STORAGE_CLASS_PROVISIONER
   262      value: pd.csi.storage.gke.io
   263    - name: KUBE_APISERVER_GODEBUG
   264      value: gctrace=1
   265    - name: CL2_ENABLE_QUOTAS_USAGE_MEASUREMENT
   266      value: "true"
   267    - name: KUBE_GCE_PRIVATE_CLUSTER_PORTS_PER_VM
   268      value: 256
   269  
   270  ###### Scalability Envs
   271  ### Common env variables for node scalability-related suites.
   272  - labels:
   273      preset-e2e-scalability-node: "true"
   274    env:
   275    # Override GCE defaults.
   276    - name: MASTER_SIZE
   277      value: "n1-standard-4"
   278    - name: NODE_SIZE
   279      value: "e2-standard-8"
   280    - name: NODE_DISK_SIZE
   281      value: "100GB"
   282    - name: REGISTER_MASTER
   283      value: "true"
   284    - name: LOGROTATE_MAX_SIZE
   285      value: "5G"
   286    # Use IP-aliases for scalability tests.
   287    - name: KUBE_GCE_ENABLE_IP_ALIASES
   288      value: "true"
   289    - name: CREATE_CUSTOM_NETWORK
   290      value: "true"
   291    # Ensure good enough architecture for master machines.
   292    - name: MASTER_MIN_CPU_ARCHITECTURE
   293      value: "Intel Skylake"
   294    # Turn on profiling for various components and
   295    # increase throughput in master components and Kubelet.
   296    - name: ETCD_EXTRA_ARGS
   297      value: "--enable-pprof"
   298    - name: CONTROLLER_MANAGER_TEST_ARGS
   299      value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100"
   300    # Bump max pods per node in Kubelet, because there are more than 10
   301    # system pods in 1-node cluster.
   302    - name: MAX_PODS_PER_NODE
   303      value: "128"
   304    - name: KUBELET_TEST_ARGS
   305      value: "--enable-debugging-handlers --kube-api-qps=100 --kube-api-burst=100"
   306    - name: KUBEPROXY_TEST_ARGS
   307      value: "--profiling"
   308    - name: SCHEDULER_TEST_ARGS
   309      value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100"
   310    # Reduce logs verbosity.
   311    - name: TEST_CLUSTER_LOG_LEVEL
   312      value: --v=2
   313    - name: API_SERVER_TEST_LOG_LEVEL
   314      value: "--v=3"
   315    # Increase resync period to simulate production.
   316    - name: TEST_CLUSTER_RESYNC_PERIOD
   317      value: --min-resync-period=12h
   318    # Increase delete collection parallelism.
   319    - name: TEST_CLUSTER_DELETE_COLLECTION_WORKERS
   320      value: --delete-collection-workers=16
   321    - name: DUMP_TO_GCS_ONLY
   322      value: true
   323  
   324  - labels:
   325      preset-e2e-scalability-presubmits: "true"
   326    env:
   327    - name: PROMETHEUS_SCRAPE_MASTER_KUBELETS
   328      value: true
   329  
   330  - labels:
   331      preset-e2e-scalability-periodics: "true"
   332    env:
   333    - name: PROMETHEUS_SCRAPE_MASTER_KUBELETS
   334      value: true
   335  
   336  - labels:
   337      preset-e2e-scalability-periodics-master: "true"
   338    env: