k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/cmd/clusterloader.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "fmt" 21 "io/ioutil" 22 "net/http" 23 _ "net/http/pprof" 24 "os" 25 "path" 26 "time" 27 28 "gopkg.in/yaml.v2" 29 corev1 "k8s.io/api/core/v1" 30 "k8s.io/client-go/kubernetes" 31 "k8s.io/klog/v2" 32 "k8s.io/kubernetes/pkg/cluster/ports" 33 "k8s.io/perf-tests/clusterloader2/api" 34 "k8s.io/perf-tests/clusterloader2/pkg/config" 35 "k8s.io/perf-tests/clusterloader2/pkg/errors" 36 "k8s.io/perf-tests/clusterloader2/pkg/execservice" 37 "k8s.io/perf-tests/clusterloader2/pkg/flags" 38 "k8s.io/perf-tests/clusterloader2/pkg/framework" 39 "k8s.io/perf-tests/clusterloader2/pkg/imagepreload" 40 "k8s.io/perf-tests/clusterloader2/pkg/metadata" 41 "k8s.io/perf-tests/clusterloader2/pkg/modifier" 42 "k8s.io/perf-tests/clusterloader2/pkg/prometheus" 43 "k8s.io/perf-tests/clusterloader2/pkg/provider" 44 "k8s.io/perf-tests/clusterloader2/pkg/test" 45 "k8s.io/perf-tests/clusterloader2/pkg/util" 46 47 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common" 48 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common/bundle" 49 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common/dns" 50 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common/network" 51 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common/network-policy" 52 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common/probes" 53 _ "k8s.io/perf-tests/clusterloader2/pkg/measurement/common/slos" 54 ) 55 56 const ( 57 dashLine = "--------------------------------------------------------------------------------" 58 nodesPerClients = 100 59 ) 60 61 var ( 62 clusterLoaderConfig config.ClusterLoaderConfig 63 providerInitOptions provider.InitOptions 64 testConfigPaths []string 65 testSuiteConfigPath string 66 port int 67 dryRun bool 68 ) 69 70 func initClusterFlags() { 71 flags.StringEnvVar(&clusterLoaderConfig.ClusterConfig.KubeConfigPath, "kubeconfig", "KUBECONFIG", "", "Path to the kubeconfig file (if not empty, --run-from-cluster must be false)") 72 flags.BoolEnvVar(&clusterLoaderConfig.ClusterConfig.RunFromCluster, "run-from-cluster", "RUN_FROM_CLUSTER", false, "Whether to use in-cluster client-config to create a client, --kubeconfig must be unset") 73 flags.IntEnvVar(&clusterLoaderConfig.ClusterConfig.Nodes, "nodes", "NUM_NODES", 0, "number of nodes") 74 flags.IntEnvVar(&clusterLoaderConfig.ClusterConfig.KubeletPort, "kubelet-port", "KUBELET_PORT", ports.KubeletPort, "Port of the kubelet to use") 75 flags.IntEnvVar(&clusterLoaderConfig.ClusterConfig.K8SClientsNumber, "k8s-clients-number", "K8S_CLIENTS_NUMBER", 0, fmt.Sprintf("(Optional) Number of k8s clients to use. If 0, will create 1 client per %d nodes", nodesPerClients)) 76 flags.StringEnvVar(&clusterLoaderConfig.ClusterConfig.EtcdCertificatePath, "etcd-certificate", "ETCD_CERTIFICATE", "/etc/srv/kubernetes/pki/etcd-apiserver-server.crt", "Path to the etcd certificate on the master machine") 77 flags.StringEnvVar(&clusterLoaderConfig.ClusterConfig.EtcdKeyPath, "etcd-key", "ETCD_KEY", "/etc/srv/kubernetes/pki/etcd-apiserver-server.key", "Path to the etcd key on the master machine") 78 flags.IntEnvVar(&clusterLoaderConfig.ClusterConfig.EtcdInsecurePort, "etcd-insecure-port", "ETCD_INSECURE_PORT", 2382, "Inscure http port") 79 flags.BoolEnvVar(&clusterLoaderConfig.ClusterConfig.DeleteStaleNamespaces, "delete-stale-namespaces", "DELETE_STALE_NAMESPACES", false, "DEPRECATED: Whether to delete all stale namespaces before the test execution.") 80 err := flags.MarkDeprecated("delete-stale-namespaces", "specify deleteStaleNamespaces in testconfig file instead.") 81 if err != nil { 82 klog.Fatalf("unable to mark flag delete-stale-namespaces deprecated %v", err) 83 } 84 // TODO(#1696): Clean up after removing automanagedNamespaces 85 flags.BoolEnvVar(&clusterLoaderConfig.ClusterConfig.DeleteAutomanagedNamespaces, "delete-automanaged-namespaces", "DELETE_AUTOMANAGED_NAMESPACES", true, "DEPRECATED: Whether to delete all automanaged namespaces after the test execution.") 86 err = flags.MarkDeprecated("delete-automanaged-namespaces", "specify deleteAutomanagedNamespaces in testconfig file instead.") 87 if err != nil { 88 klog.Fatalf("unable to mark flag delete-automanaged-namespaces deprecated %v", err) 89 } 90 flags.StringEnvVar(&clusterLoaderConfig.ClusterConfig.MasterName, "mastername", "MASTER_NAME", "", "Name of the masternode") 91 // TODO(#595): Change the name of the MASTER_IP and MASTER_INTERNAL_IP flags and vars to plural 92 flags.StringSliceEnvVar(&clusterLoaderConfig.ClusterConfig.MasterIPs, "masterip", "MASTER_IP", nil /*defaultValue*/, "Hostname/IP of the master node, supports multiple values when separated by commas") 93 flags.StringSliceEnvVar(&clusterLoaderConfig.ClusterConfig.MasterInternalIPs, "master-internal-ip", "MASTER_INTERNAL_IP", nil /*defaultValue*/, "Cluster internal/private IP of the master vm, supports multiple values when separated by commas") 94 flags.BoolEnvVar(&clusterLoaderConfig.ClusterConfig.APIServerPprofByClientEnabled, "apiserver-pprof-by-client-enabled", "APISERVER_PPROF_BY_CLIENT_ENABLED", true, "Whether apiserver pprof endpoint can be accessed by Kubernetes client.") 95 flags.BoolVar(&clusterLoaderConfig.ClusterConfig.SkipClusterVerification, "skip-cluster-verification", false, "Whether to skip the cluster verification, which expects at least one schedulable node in the cluster") 96 97 flags.StringEnvVar(&providerInitOptions.ProviderName, "provider", "PROVIDER", "", "Cluster provider name") 98 flags.StringSliceEnvVar(&providerInitOptions.ProviderConfigs, "provider-configs", "PROVIDER_CONFIGS", nil, "Cluster provider configurations") 99 flags.StringEnvVar(&providerInitOptions.KubemarkRootKubeConfigPath, "kubemark-root-kubeconfig", "KUBEMARK_ROOT_KUBECONFIG", "", 100 "DEPRECATED: Please use provider-config=\"ROOT_KUBECONFIG=<value>\". Path the to kubemark root kubeconfig file, i.e. kubeconfig of the cluster where kubemark cluster is run. Ignored if provider != kubemark") 101 } 102 103 func validateClusterFlags() *errors.ErrorList { 104 errList := errors.NewErrorList() 105 106 // if '--run-from-cluster=true', create in-cluster config and validate kubeconfig is unset 107 // if '--run-from-cluster=false', use kubeconfig (and validate it is set) 108 switch clusterLoaderConfig.ClusterConfig.RunFromCluster { 109 case true: 110 if clusterLoaderConfig.ClusterConfig.KubeConfigPath != "" { 111 errList.Append(fmt.Errorf("unexpected kubeconfig path specified %q when --run-from-cluster is set", clusterLoaderConfig.ClusterConfig.KubeConfigPath)) 112 } 113 case false: 114 if clusterLoaderConfig.ClusterConfig.KubeConfigPath == "" { 115 errList.Append(fmt.Errorf("no kubeconfig path specified when --run-from-cluster is unset")) 116 } 117 } 118 if clusterLoaderConfig.PrometheusConfig.EnableServer { 119 if !clusterLoaderConfig.ClusterConfig.Provider.Features().SupportEnablePrometheusServer { 120 errList.Append(fmt.Errorf("cannot enable prometheus server for provider %s", clusterLoaderConfig.ClusterConfig.Provider.Name())) 121 } 122 } 123 return errList 124 } 125 126 func initFlags() { 127 flags.StringVar(&clusterLoaderConfig.ReportDir, "report-dir", "", "Path to the directory where the reports should be saved. Default is empty, which cause reports being written to standard output.") 128 // TODO(https://github.com/kubernetes/perf-tests/issues/641): Remove testconfig and testoverrides flags when test suite is fully supported. 129 flags.StringArrayVar(&testConfigPaths, "testconfig", []string{}, "Paths to the test config files") 130 flags.StringArrayVar(&clusterLoaderConfig.OverridePaths, "testoverrides", []string{}, "Paths to the config overrides file. The latter overrides take precedence over changes in former files.") 131 flags.StringVar(&testSuiteConfigPath, "testsuite", "", "Path to the test suite config file") 132 flags.IntVar(&port, "port", 8000, "Port to be used by http server with pprof.") 133 flags.BoolVar(&dryRun, "dry-run", false, "Whether to skip running test and only compile test config") 134 initClusterFlags() 135 execservice.InitFlags(&clusterLoaderConfig.ExecServiceConfig) 136 modifier.InitFlags(&clusterLoaderConfig.ModifierConfig) 137 prometheus.InitFlags(&clusterLoaderConfig.PrometheusConfig) 138 } 139 140 func validateFlags() *errors.ErrorList { 141 errList := errors.NewErrorList() 142 if len(testConfigPaths) == 0 && testSuiteConfigPath == "" { 143 errList.Append(fmt.Errorf("no test config path or test suite path specified")) 144 } 145 if len(testConfigPaths) > 0 && testSuiteConfigPath != "" { 146 errList.Append(fmt.Errorf("test config path and test suite path cannot be provided at the same time")) 147 } 148 errList.Concat(validateClusterFlags()) 149 errList.Concat(prometheus.ValidatePrometheusFlags(&clusterLoaderConfig.PrometheusConfig)) 150 return errList 151 } 152 153 func completeConfig(m *framework.MultiClientSet) error { 154 if clusterLoaderConfig.ClusterConfig.Nodes == 0 { 155 nodes, err := util.GetSchedulableUntainedNodesNumber(m.GetClient()) 156 if err != nil { 157 if clusterLoaderConfig.ClusterConfig.Provider.Name() == provider.KCPName { 158 return fmt.Errorf("getting number of nodes error: %v, please create nodes.core CRD", err) 159 } 160 return fmt.Errorf("getting number of nodes error: %v", err) 161 } 162 clusterLoaderConfig.ClusterConfig.Nodes = nodes 163 klog.V(0).Infof("ClusterConfig.Nodes set to %v", nodes) 164 } 165 if clusterLoaderConfig.ClusterConfig.MasterName == "" { 166 masterName, err := util.GetMasterName(m.GetClient()) 167 if err == nil { 168 clusterLoaderConfig.ClusterConfig.MasterName = masterName 169 klog.V(0).Infof("ClusterConfig.MasterName set to %v", masterName) 170 } else { 171 klog.Errorf("Getting master name error: %v", err) 172 } 173 } 174 if len(clusterLoaderConfig.ClusterConfig.MasterIPs) == 0 { 175 masterIPs, err := util.GetMasterIPs(m.GetClient(), corev1.NodeExternalIP) 176 if err == nil { 177 clusterLoaderConfig.ClusterConfig.MasterIPs = masterIPs 178 klog.V(0).Infof("ClusterConfig.MasterIP set to %v", masterIPs) 179 } else { 180 klog.Errorf("Getting master external ip error: %v", err) 181 } 182 } 183 if len(clusterLoaderConfig.ClusterConfig.MasterInternalIPs) == 0 { 184 masterIPs, err := util.GetMasterIPs(m.GetClient(), corev1.NodeInternalIP) 185 if err == nil { 186 clusterLoaderConfig.ClusterConfig.MasterInternalIPs = masterIPs 187 klog.V(0).Infof("ClusterConfig.MasterInternalIP set to %v", masterIPs) 188 } else { 189 klog.Errorf("Getting master internal ip error: %v", err) 190 } 191 } 192 193 if !clusterLoaderConfig.ClusterConfig.Provider.Features().SupportAccessAPIServerPprofEndpoint { 194 clusterLoaderConfig.ClusterConfig.APIServerPprofByClientEnabled = false 195 } 196 if clusterLoaderConfig.ClusterConfig.K8SClientsNumber == 0 { 197 clusterLoaderConfig.ClusterConfig.K8SClientsNumber = getClientsNumber(clusterLoaderConfig.ClusterConfig.Nodes) 198 } 199 return nil 200 } 201 202 func verifyCluster(c kubernetes.Interface) error { 203 if clusterLoaderConfig.ClusterConfig.Provider.Name() == provider.KCPName { 204 return nil 205 } 206 numSchedulableNodes, err := util.GetSchedulableUntainedNodesNumber(c) 207 if err != nil { 208 return err 209 } 210 if numSchedulableNodes == 0 { 211 return fmt.Errorf("no schedulable nodes in the cluster") 212 } 213 return nil 214 } 215 216 func getClientsNumber(nodesNumber int) int { 217 if clusterLoaderConfig.ClusterConfig.Provider.Name() == provider.KCPName { 218 return 1 219 } 220 return (nodesNumber + nodesPerClients - 1) / nodesPerClients 221 } 222 223 func createReportDir() error { 224 if clusterLoaderConfig.ReportDir != "" { 225 if _, err := os.Stat(clusterLoaderConfig.ReportDir); err != nil { 226 if !os.IsNotExist(err) { 227 return err 228 } 229 if err = os.MkdirAll(clusterLoaderConfig.ReportDir, 0755); err != nil { 230 return fmt.Errorf("report directory creation error: %v", err) 231 } 232 } 233 } 234 return nil 235 } 236 237 func printTestStart(name string) { 238 klog.V(0).Infof(dashLine) 239 klog.V(0).Infof("Running %v", name) 240 klog.V(0).Infof(dashLine) 241 } 242 243 func printTestResult(name, status, errors string) { 244 logf := klog.V(0).Infof 245 if errors != "" { 246 logf = klog.Errorf 247 } 248 logf(dashLine) 249 logf("Test Finished") 250 logf(" Test: %v", name) 251 logf(" Status: %v", status) 252 if errors != "" { 253 logf(" Errors: %v", errors) 254 } 255 logf(dashLine) 256 } 257 258 func main() { 259 defer klog.Flush() 260 initFlags() 261 if err := flags.Parse(); err != nil { 262 klog.Exitf("Flag parse failed: %v", err) 263 } 264 265 // Start http server with pprof. 266 go func() { 267 klog.Infof("Listening on %d", port) 268 err := http.ListenAndServe(fmt.Sprintf("localhost:%d", port), nil) 269 klog.Errorf("http server unexpectedly ended: %v", err) 270 }() 271 272 provider, err := provider.NewProvider(&providerInitOptions) 273 if err != nil { 274 klog.Exitf("Error init provider: %v", err) 275 } 276 clusterLoaderConfig.ClusterConfig.Provider = provider 277 278 if errList := validateFlags(); !errList.IsEmpty() { 279 klog.Exitf("Parsing flags error: %v", errList.String()) 280 } 281 282 mclient, err := framework.NewMultiClientSet(clusterLoaderConfig.ClusterConfig.KubeConfigPath, 1) 283 if err != nil { 284 klog.Exitf("Client creation error: %v", err) 285 } 286 287 if err = completeConfig(mclient); err != nil { 288 klog.Exitf("Config completing error: %v", err) 289 } 290 291 klog.V(0).Infof("Using config: %+v", clusterLoaderConfig) 292 293 if err = createReportDir(); err != nil { 294 klog.Exitf("Cannot create report directory: %v", err) 295 } 296 297 if err = util.LogClusterNodes(mclient.GetClient()); err != nil { 298 klog.Errorf("Nodes info logging error: %v", err) 299 } 300 301 if !clusterLoaderConfig.ClusterConfig.SkipClusterVerification { 302 if err = verifyCluster(mclient.GetClient()); err != nil { 303 klog.Exitf("Cluster verification error: %v", err) 304 } 305 } 306 307 f, err := framework.NewFramework( 308 &clusterLoaderConfig.ClusterConfig, 309 clusterLoaderConfig.ClusterConfig.K8SClientsNumber, 310 ) 311 if err != nil { 312 klog.Exitf("Framework creation error: %v", err) 313 } 314 315 var prometheusController *prometheus.Controller 316 var prometheusFramework *framework.Framework 317 var testReporter test.Reporter 318 319 if !dryRun { 320 if clusterLoaderConfig.PrometheusConfig.EnableServer { 321 if prometheusController, err = prometheus.NewController(&clusterLoaderConfig); err != nil { 322 klog.Exitf("Error while creating Prometheus Controller: %v", err) 323 } 324 prometheusFramework = prometheusController.GetFramework() 325 if err := prometheusController.SetUpPrometheusStack(); err != nil { 326 klog.Exitf("Error while setting up prometheus stack: %v", err) 327 } 328 if clusterLoaderConfig.PrometheusConfig.TearDownServer { 329 prometheusController.EnableTearDownPrometheusStackOnInterrupt() 330 } 331 } 332 if clusterLoaderConfig.ExecServiceConfig.Enable { 333 if err := execservice.SetUpExecService(f, clusterLoaderConfig.ExecServiceConfig); err != nil { 334 klog.Exitf("Error while setting up exec service: %v", err) 335 } 336 } 337 if err := imagepreload.Setup(&clusterLoaderConfig, f); err != nil { 338 klog.Exitf("Error while preloading images: %v", err) 339 } 340 341 if err := metadata.Dump(f, path.Join(clusterLoaderConfig.ReportDir, "cl2-metadata.json")); err != nil { 342 klog.Errorf("Error while dumping metadata: %v", err) 343 } 344 testReporter = test.CreateSimpleReporter(path.Join(clusterLoaderConfig.ReportDir, "junit.xml"), "ClusterLoaderV2") 345 testReporter.BeginTestSuite() 346 } 347 348 var testScenarios []api.TestScenario 349 if testSuiteConfigPath != "" { 350 testSuite, err := config.LoadTestSuite(testSuiteConfigPath) 351 if err != nil { 352 klog.Exitf("Error while reading test suite: %v", err) 353 } 354 testScenarios = []api.TestScenario(testSuite) 355 } else { 356 for i := range testConfigPaths { 357 testScenario := api.TestScenario{ 358 ConfigPath: testConfigPaths[i], 359 OverridePaths: []string{}, 360 } 361 testScenarios = append(testScenarios, testScenario) 362 } 363 } 364 365 var contexts []test.Context 366 for i := range testScenarios { 367 ctx, errList := test.CreateTestContext(f, prometheusFramework, &clusterLoaderConfig, testReporter, &testScenarios[i]) 368 if !errList.IsEmpty() { 369 klog.Exitf("Test context creation failed: %s", errList.String()) 370 } 371 testConfig, errList := test.CompileTestConfig(ctx) 372 // Dump test config before checking errors - it can still be useful for debugging. 373 if testConfig != nil { 374 if err := dumpTestConfig(ctx, testConfig); err != nil { 375 klog.Errorf("Error while dumping test config: %v", err) 376 } 377 } 378 if !errList.IsEmpty() { 379 klog.Exitf("Test compilation failed: %s", errList.String()) 380 } 381 ctx.SetTestConfig(testConfig) 382 contexts = append(contexts, ctx) 383 } 384 385 if dryRun { 386 // Dry run always exits with error so if it's ever enabled in CI, the test will fail. 387 klog.Exitf("Dry run mode enabled, exiting after dumping test config in %s.", path.Join(clusterLoaderConfig.ReportDir)) 388 } 389 390 for i := range contexts { 391 runSingleTest(contexts[i]) 392 } 393 394 testReporter.EndTestSuite() 395 396 if err := prometheusController.MakePrometheusSnapshotIfEnabled(); err != nil { 397 klog.Errorf("Error while making prometheus snapshot: %v", err) 398 } 399 400 if clusterLoaderConfig.PrometheusConfig.EnableServer && clusterLoaderConfig.PrometheusConfig.TearDownServer { 401 if err := prometheusController.TearDownPrometheusStack(); err != nil { 402 klog.Errorf("Error while tearing down prometheus stack: %v", err) 403 } 404 } 405 if clusterLoaderConfig.ExecServiceConfig.Enable { 406 if err := execservice.TearDownExecService(f); err != nil { 407 klog.Errorf("Error while tearing down exec service: %v", err) 408 } 409 } 410 if failedTestItems := testReporter.GetNumberOfFailedTestItems(); failedTestItems > 0 { 411 klog.Exitf("%d tests have failed!", failedTestItems) 412 } 413 } 414 415 func runSingleTest(ctx test.Context) { 416 testID := getTestID(ctx.GetTestScenario()) 417 testStart := time.Now() 418 printTestStart(testID) 419 errList := test.RunTest(ctx) 420 if !errList.IsEmpty() { 421 printTestResult(testID, "Fail", errList.String()) 422 } else { 423 printTestResult(testID, "Success", "") 424 } 425 testConfigPath := ctx.GetTestScenario().ConfigPath 426 ctx.GetTestReporter().ReportTestFinish(time.Since(testStart), testConfigPath, errList) 427 } 428 429 func getTestID(ts *api.TestScenario) string { 430 if ts.Identifier != "" { 431 return fmt.Sprintf("%s(%s)", ts.Identifier, ts.ConfigPath) 432 } 433 return ts.ConfigPath 434 } 435 436 func dumpTestConfig(ctx test.Context, config *api.Config) error { 437 b, err := yaml.Marshal(config) 438 if err != nil { 439 return fmt.Errorf("marshaling config error: %w", err) 440 } 441 filePath := path.Join(ctx.GetClusterLoaderConfig().ReportDir, "generatedConfig_"+config.Name+".yaml") 442 if err := ioutil.WriteFile(filePath, b, 0644); err != nil { 443 return fmt.Errorf("saving file error: %w", err) 444 } 445 klog.Infof("Test config successfully dumped to: %s", filePath) 446 return nil 447 }