k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/config/jobs/kubernetes/sig-scalability/sig-scalability-presets.yaml (about) 1 presets: 2 ###### Kubemark envs 3 ### Common env variables for all kubemark-related suites. 4 - labels: 5 preset-e2e-kubemark-common: "true" 6 env: 7 - name: KUBE_GCS_UPDATE_LATEST 8 value: "n" 9 - name: KUBE_FASTBUILD 10 value: "true" 11 - name: KUBE_GCE_ENABLE_IP_ALIASES 12 value: "true" 13 - name: CREATE_CUSTOM_NETWORK 14 value: "true" 15 - name: ENABLE_HOLLOW_NODE_LOGS 16 value: "true" 17 # Turn on profiling for various components. 18 - name: ETCD_TEST_ARGS 19 value: "--enable-pprof" 20 - name: APISERVER_TEST_ARGS 21 value: "--profiling --contention-profiling" 22 # Number of bytes of an additional nodes objects annotation in a kubemark 23 # cluster. The annotation label is added to make nodes objects sizes similar 24 # to regular cluster nodes. 25 - name: KUBEMARK_NODE_OBJECT_SIZE_BYTES 26 value: 15000 27 # Increase throughput in Kubemark master components and turn on profiling. 28 - name: KUBEMARK_CONTROLLER_MANAGER_TEST_ARGS 29 value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100" 30 - name: KUBEMARK_SCHEDULER_TEST_ARGS 31 value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100" 32 # Reduce logs verbosity 33 - name: TEST_CLUSTER_LOG_LEVEL 34 value: "--v=2" 35 - name: API_SERVER_TEST_LOG_LEVEL 36 value: "--v=3" 37 # Increase controller-manager's resync period to simulate production. 38 - name: TEST_CLUSTER_RESYNC_PERIOD 39 value: "--min-resync-period=12h" 40 # Reduce etcd compaction frequency to match production. 41 - name: KUBEMARK_ETCD_COMPACTION_INTERVAL_SEC 42 value: "150" 43 # Allow one node to not be ready after cluster creation. 44 - name: ALLOWED_NOTREADY_NODES 45 value: 1 46 - name: ENABLE_PROMETHEUS_SERVER 47 value: "true" 48 - name: KUBE_MASTER_NODE_LABELS 49 value: "node.kubernetes.io/node-exporter-ready=true" 50 # Keep all logrotated files (not just 5 latest which is a default) 51 - name: LOGROTATE_FILES_MAX_COUNT 52 value: 1000 53 - name: LOGROTATE_MAX_SIZE 54 value: "5G" 55 # Ensure good enough architecture for master machines. 56 - name: MASTER_MIN_CPU_ARCHITECTURE 57 value: "Intel Ice Lake" 58 # Increase delete collection parallelism. 59 - name: TEST_CLUSTER_DELETE_COLLECTION_WORKERS 60 value: --delete-collection-workers=16 61 # Dump full systemd journal on master and nodes. 62 - name: LOG_DUMP_SYSTEMD_JOURNAL 63 value: "true" 64 # Timeout for the log dumping over SSH. Relevant only if fallback to SSH log dumping takes place 65 # e.g. when logexporter daemonset fails for some reason. 66 # We deliberately cap it at 1h to avoid spending too much time (e.g. over 5h for 5k node cluster) 67 # on dumping logs that in most cases we won't need anyway. 68 - name: LOG_DUMP_SSH_TIMEOUT_SECONDS 69 value: 3600 70 # Use private clusters for scalability tests - https://github.com/kubernetes/kubernetes/issues/76374 71 - name: KUBE_GCE_PRIVATE_CLUSTER 72 value: "true" 73 # We create approx. 70 hollow nodes per VM. Allow ~4 connections from each of them. 74 - name: KUBE_GCE_PRIVATE_CLUSTER_PORTS_PER_VM 75 value: 300 76 - name: PROMETHEUS_SCRAPE_ETCD 77 value: "true" 78 # Disable kubernetes-dashboard 79 - name: KUBE_ENABLE_CLUSTER_UI 80 value: "false" 81 # Enable assertions on scheduler throughput in density test. 82 # Setting the threshold to 90 should allow us to catch regressions like 83 # https://github.com/kubernetes/kubernetes/pull/85030 while not making the tests flaky. 84 - name: CL2_SCHEDULER_THROUGHPUT_THRESHOLD 85 value: 90 86 - name: CL2_ALLOWED_SLOW_API_CALLS 87 value: 1 88 # Disable PVs until these are fixed in Kubemark: 89 # https://github.com/kubernetes/perf-tests/issues/803 90 - name: CL2_ENABLE_PVS 91 value: "false" 92 # Switch to using log-dump.sh script included in the kubekins-e2e image 93 # instead of relying on the deprecated one from k/k repository. 94 - name: USE_TEST_INFRA_LOG_DUMPING 95 value: "true" 96 # If log dumping of nodes is enabled and logexporter creation fails or less than 50 % 97 # of the nodes got logexported successfully, then report a failure. 98 - name: LOG_DUMP_EXPECTED_SUCCESS_PERCENTAGE 99 value: 50 100 - name: PERF_TESTS_PRINT_COMMIT_HISTORY 101 value: true 102 - name: DUMP_TO_GCS_ONLY 103 value: true 104 # Disable konnectivity in kubemark as it doesn't work (see https://github.com/kubernetes/perf-tests/issues/1828) 105 # TODO(https://github.com/kubernetes/perf-tests/issues/1828): Use konnectivity in kubemark. 106 - name: KUBE_ENABLE_KONNECTIVITY_SERVICE 107 value: false 108 - name: DEPLOY_GCI_DRIVER 109 value: true 110 - name: PROMETHEUS_STORAGE_CLASS_PROVISIONER 111 value: pd.csi.storage.gke.io 112 113 ### kubemark-gce-scale 114 - labels: 115 preset-e2e-kubemark-gce-scale: "true" 116 env: 117 # kubernetes env 118 # TODO: Remove this after kube-proxy improvements. 119 - name: USE_REAL_PROXIER 120 value: "false" 121 - name: HOLLOW_PROXY_TEST_ARGS 122 value: "--use-real-proxier=false" 123 - name: HEAPSTER_KUBELET_TEST_ARGS 124 value: "--register-with-taints=monitoring=:NoSchedule" 125 # Heapster node is needed in order for Prometheus pods to fit in a cluster. 126 - name: HEAPSTER_MACHINE_TYPE 127 value: "e2-standard-8" 128 129 130 ###### Scalability Envs 131 ### Common env variables for all scalability-related suites. 132 - labels: 133 preset-e2e-scalability-common: "true" 134 env: 135 # Override GCE defaults. 136 - name: NODE_SIZE 137 value: "e2-medium" 138 - name: NODE_DISK_SIZE 139 value: "50GB" 140 - name: REGISTER_MASTER 141 value: "true" 142 - name: LOGROTATE_MAX_SIZE 143 value: "5G" 144 # Use IP-aliases for scalability tests. 145 - name: KUBE_GCE_ENABLE_IP_ALIASES 146 value: "true" 147 - name: CREATE_CUSTOM_NETWORK 148 value: "true" 149 # Ensure good enough architecture for master machines. 150 - name: MASTER_MIN_CPU_ARCHITECTURE 151 value: "Intel Ice Lake" 152 - name: MASTER_SIZE 153 value: "n2-standard-32" 154 # Turn on profiling for various components and 155 # increase throughput in master components. 156 - name: ETCD_EXTRA_ARGS 157 value: "--enable-pprof" 158 - name: APISERVER_TEST_ARGS 159 value: "--profiling --contention-profiling" 160 - name: CONTROLLER_MANAGER_TEST_ARGS 161 value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100" 162 - name: KUBELET_TEST_ARGS 163 value: "--enable-debugging-handlers --kube-api-qps=100 --kube-api-burst=100" 164 - name: NODE_KUBELET_TEST_ARGS 165 # e2-medium machines report the capacity of 2cpu. 166 # We adjust the allocatable to match reality and additionally tweak it 167 # to achieve roughly 1:4 allocatable cpu to memory ration. 168 value: "--kube-reserved=cpu=1050m" 169 - name: KUBEPROXY_TEST_ARGS 170 # TODO(#74011): Remove metrics-bind-address if the default is set. 171 # FeatureGate added in #110268 v1.26 172 value: "--profiling --metrics-bind-address=0.0.0.0" 173 - name: SCHEDULER_TEST_ARGS 174 value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100" 175 # Reduce logs verbosity. 176 - name: TEST_CLUSTER_LOG_LEVEL 177 value: --v=2 178 - name: API_SERVER_TEST_LOG_LEVEL 179 value: "--v=3" 180 # Increase resync period to simulate production. 181 - name: TEST_CLUSTER_RESYNC_PERIOD 182 value: --min-resync-period=12h 183 # Reduce etcd compaction frequency to match production. 184 - name: ETCD_COMPACTION_INTERVAL_SEC 185 value: "150" 186 # Increase delete collection parallelism. 187 - name: TEST_CLUSTER_DELETE_COLLECTION_WORKERS 188 value: --delete-collection-workers=16 189 # Dump full systemd journal on master and nodes. 190 - name: LOG_DUMP_SYSTEMD_JOURNAL 191 value: "true" 192 # Dump clusterloader prober's log files 193 - name: LOG_DUMP_EXTRA_FILES 194 value: "cl2-*" 195 # Timeout for the log dumping over SSH. Relevant only if fallback to SSH log dumping takes place 196 # e.g. when logexporter daemonset fails for some reason. 197 # We deliberately cap it at 1h to avoid spending too much time (e.g. over 5h for 5k node cluster) 198 # on dumping logs that in most cases we won't need anyway. 199 - name: LOG_DUMP_SSH_TIMEOUT_SECONDS 200 value: 3600 201 # Keep all logrotated files (not just 5 latest which is a default) 202 - name: LOGROTATE_FILES_MAX_COUNT 203 value: 1000 204 - name: ENABLE_PROMETHEUS_SERVER 205 value: "true" 206 - name: KUBE_MASTER_NODE_LABELS 207 value: "node.kubernetes.io/node-exporter-ready=true" 208 # Use private clusters for scalability tests - https://github.com/kubernetes/kubernetes/issues/76374 209 - name: KUBE_GCE_PRIVATE_CLUSTER 210 value: "true" 211 - name: PROMETHEUS_SCRAPE_ETCD 212 value: "true" 213 # Disable kubernetes-dashboard 214 - name: KUBE_ENABLE_CLUSTER_UI 215 value: "false" 216 # Enable assertions on scheduler throughput in density test. 217 # Setting the threshold to 90 should allow us to catch regressions like 218 # https://github.com/kubernetes/kubernetes/pull/85030 while not making the tests flaky. 219 - name: CL2_SCHEDULER_THROUGHPUT_THRESHOLD 220 value: 90 221 - name: CL2_ALLOWED_SLOW_API_CALLS 222 value: 1 223 # Override of the default list of whitelisted resources during addons reconciliation 224 # performed by kube-addon-manager. This is the same as the default list in the script 225 # k/k/cluster/addons/addon-manager/kube-addons.sh but without core/v1/Pod resource. 226 # As explained in https://github.com/kubernetes/kubernetes/pull/91018, this results 227 # in a Kubernetes cluster performance bump up. 228 - name: KUBECTL_PRUNE_WHITELIST_OVERRIDE 229 value: >- 230 core/v1/ConfigMap 231 core/v1/Endpoints 232 core/v1/Namespace 233 core/v1/PersistentVolumeClaim 234 core/v1/PersistentVolume 235 core/v1/ReplicationController 236 core/v1/Secret 237 core/v1/Service 238 batch/v1/Job 239 batch/v1/CronJob 240 apps/v1/DaemonSet 241 apps/v1/Deployment 242 apps/v1/ReplicaSet 243 apps/v1/StatefulSet 244 networking.k8s.io/v1/Ingress 245 # Switch to using log-dump.sh script included in the kubekins-e2e image 246 # instead of relying on the deprecated one from k/k repository. 247 - name: USE_TEST_INFRA_LOG_DUMPING 248 value: "true" 249 # If log dumping of nodes is enabled and logexporter creation fails or less than 50 % 250 # of the nodes got logexported successfully, then report a failure. 251 - name: LOG_DUMP_EXPECTED_SUCCESS_PERCENTAGE 252 value: 50 253 - name: PERF_TESTS_PRINT_COMMIT_HISTORY 254 value: true 255 - name: LOG_DUMP_SAVE_SERVICES 256 value: "containerd" 257 - name: DUMP_TO_GCS_ONLY 258 value: true 259 - name: DEPLOY_GCI_DRIVER 260 value: true 261 - name: PROMETHEUS_STORAGE_CLASS_PROVISIONER 262 value: pd.csi.storage.gke.io 263 - name: KUBE_APISERVER_GODEBUG 264 value: gctrace=1 265 - name: CL2_ENABLE_QUOTAS_USAGE_MEASUREMENT 266 value: "true" 267 - name: KUBE_GCE_PRIVATE_CLUSTER_PORTS_PER_VM 268 value: 256 269 270 ###### Scalability Envs 271 ### Common env variables for node scalability-related suites. 272 - labels: 273 preset-e2e-scalability-node: "true" 274 env: 275 # Override GCE defaults. 276 - name: MASTER_SIZE 277 value: "n1-standard-4" 278 - name: NODE_SIZE 279 value: "e2-standard-8" 280 - name: NODE_DISK_SIZE 281 value: "100GB" 282 - name: REGISTER_MASTER 283 value: "true" 284 - name: LOGROTATE_MAX_SIZE 285 value: "5G" 286 # Use IP-aliases for scalability tests. 287 - name: KUBE_GCE_ENABLE_IP_ALIASES 288 value: "true" 289 - name: CREATE_CUSTOM_NETWORK 290 value: "true" 291 # Ensure good enough architecture for master machines. 292 - name: MASTER_MIN_CPU_ARCHITECTURE 293 value: "Intel Skylake" 294 # Turn on profiling for various components and 295 # increase throughput in master components and Kubelet. 296 - name: ETCD_EXTRA_ARGS 297 value: "--enable-pprof" 298 - name: CONTROLLER_MANAGER_TEST_ARGS 299 value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100" 300 # Bump max pods per node in Kubelet, because there are more than 10 301 # system pods in 1-node cluster. 302 - name: MAX_PODS_PER_NODE 303 value: "128" 304 - name: KUBELET_TEST_ARGS 305 value: "--enable-debugging-handlers --kube-api-qps=100 --kube-api-burst=100" 306 - name: KUBEPROXY_TEST_ARGS 307 value: "--profiling" 308 - name: SCHEDULER_TEST_ARGS 309 value: "--profiling --contention-profiling --kube-api-qps=100 --kube-api-burst=100" 310 # Reduce logs verbosity. 311 - name: TEST_CLUSTER_LOG_LEVEL 312 value: --v=2 313 - name: API_SERVER_TEST_LOG_LEVEL 314 value: "--v=3" 315 # Increase resync period to simulate production. 316 - name: TEST_CLUSTER_RESYNC_PERIOD 317 value: --min-resync-period=12h 318 # Increase delete collection parallelism. 319 - name: TEST_CLUSTER_DELETE_COLLECTION_WORKERS 320 value: --delete-collection-workers=16 321 - name: DUMP_TO_GCS_ONLY 322 value: true 323 324 - labels: 325 preset-e2e-scalability-presubmits: "true" 326 env: 327 - name: PROMETHEUS_SCRAPE_MASTER_KUBELETS 328 value: true 329 330 - labels: 331 preset-e2e-scalability-periodics: "true" 332 env: 333 - name: PROMETHEUS_SCRAPE_MASTER_KUBELETS 334 value: true 335 336 - labels: 337 preset-e2e-scalability-periodics-master: "true" 338 env: