k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/kubetest/main.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "flag" 24 "fmt" 25 "log" 26 "os" 27 "os/exec" 28 "os/signal" 29 "path/filepath" 30 "regexp" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/spf13/pflag" 36 "k8s.io/test-infra/kubetest/boskos/client" 37 38 "k8s.io/test-infra/kubetest/conformance" 39 "k8s.io/test-infra/kubetest/kind" 40 "k8s.io/test-infra/kubetest/process" 41 "k8s.io/test-infra/kubetest/util" 42 ) 43 44 // Hardcoded in ginkgo-e2e.sh 45 const defaultGinkgoParallel = 25 46 47 var ( 48 artifacts = filepath.Join(os.Getenv("WORKSPACE"), "_artifacts") 49 boskos, _ = client.NewClient(os.Getenv("JOB_NAME"), "http://boskos.test-pods.svc.cluster.local.", "", "") 50 control = process.NewControl(timeout, interrupt, terminate, verbose) 51 gitTag = "" // initializing default zero value. ldflags will populate this during build time. 52 interrupt = time.NewTimer(time.Duration(0)) // interrupt testing at this time. 53 terminate = time.NewTimer(time.Duration(0)) // terminate testing at this time. 54 timeout = time.Duration(0) 55 verbose = false 56 ) 57 58 type options struct { 59 build buildStrategy 60 boskosWaitDuration time.Duration 61 charts bool 62 checkLeaks bool 63 checkSkew bool 64 cluster string 65 clusterIPRange string 66 deployment string 67 down bool 68 dump string 69 dumpPreTestLogs string 70 extract extractStrategies 71 extractCIBucket string 72 extractReleaseBucket string 73 extractSource bool 74 flushMemAfterBuild bool 75 focusRegex string 76 gcpCloudSdk string 77 gcpMasterImage string 78 gcpMasterSize string 79 gcpNetwork string 80 gcpNodeImage string 81 gcpImageFamily string 82 gcpImageProject string 83 gcpNodes string 84 gcpNodeSize string 85 gcpProject string 86 gcpProjectType string 87 gcpServiceAccount string 88 // gcpSSHProxyInstanceName is the name of the vm instance which ip address will be used to set the 89 // KUBE_SSH_BASTION env. If set, it will result in proxying ssh connections in tests through the 90 // "bastion". It's useful for clusters with nodes without public ssh access, e.g. nodes without 91 // public ip addresses. Works only for gcp providers (gce, gke). 92 gcpSSHProxyInstanceName string 93 gcpRegion string 94 gcpZone string 95 ginkgoParallel ginkgoParallelValue 96 kubecfg string 97 kubemark bool 98 kubemarkMasterSize string 99 kubemarkNodes string // TODO(fejta): switch to int after migration 100 logexporterGCSPath string 101 metadataSources string 102 noAllowDup bool 103 nodeArgs string 104 nodeTestArgs string 105 nodeTests bool 106 preTestCmd string 107 postTestCmd string 108 provider string 109 publish string 110 runtimeConfig string 111 save string 112 skew bool 113 skipDumpClusterLogs bool 114 skipRegex string 115 soak bool 116 soakDuration time.Duration 117 stage stageStrategy 118 storageTestDriverPath string 119 test bool 120 testArgs string 121 testCmd string 122 testCmdName string 123 testCmdArgs []string 124 up bool 125 upgradeArgs string 126 version bool 127 } 128 129 func defineFlags() *options { 130 o := options{} 131 flag.Var(&o.build, "build", "Rebuild k8s binaries, optionally forcing (release|quick|bazel) strategy") 132 flag.DurationVar(&o.boskosWaitDuration, "boskos-wait-duration", 5*time.Minute, "Defines how long it waits until quit getting Boskos resoure, default 5 minutes") 133 flag.BoolVar(&o.charts, "charts", false, "If true, run charts tests") 134 flag.BoolVar(&o.checkSkew, "check-version-skew", true, "Verify client and server versions match") 135 flag.BoolVar(&o.checkLeaks, "check-leaked-resources", false, "Ensure project ends with the same resources") 136 flag.StringVar(&o.cluster, "cluster", "", "Cluster name. Must be set for --deployment=gke (TODO: other deployments).") 137 flag.StringVar(&o.clusterIPRange, "cluster-ip-range", "", "Specifies CLUSTER_IP_RANGE value during --up and --test (only relevant for --deployment=bash). Auto-calculated if empty.") 138 flag.StringVar(&o.deployment, "deployment", "bash", "Choices: none/bash/conformance/gke/kind/kops/node/local") 139 flag.BoolVar(&o.down, "down", false, "If true, tear down the cluster before exiting.") 140 flag.StringVar(&o.dump, "dump", "", "If set, dump bring-up and cluster logs to this location on test or cluster-up failure") 141 flag.StringVar(&o.dumpPreTestLogs, "dump-pre-test-logs", "", "If set, dump cluster logs to this location before running tests") 142 flag.Var(&o.extract, "extract", "Extract k8s binaries from the specified release location") 143 flag.StringVar(&o.extractCIBucket, "extract-ci-bucket", "k8s-release-dev", "Extract k8s CI binaries from the specified GCS bucket") 144 flag.StringVar(&o.extractReleaseBucket, "extract-release-bucket", "kubernetes-release", "Extract k8s release binaries from the specified GCS bucket") 145 flag.BoolVar(&o.extractSource, "extract-source", false, "Extract k8s src together with other tarballs") 146 flag.BoolVar(&o.flushMemAfterBuild, "flush-mem-after-build", false, "If true, try to flush container memory after building") 147 flag.Var(&o.ginkgoParallel, "ginkgo-parallel", fmt.Sprintf("Run Ginkgo tests in parallel, default %d runners. Use --ginkgo-parallel=N to specify an exact count.", defaultGinkgoParallel)) 148 flag.StringVar(&o.gcpCloudSdk, "gcp-cloud-sdk", "", "Install/upgrade google-cloud-sdk to the gs:// path if set") 149 flag.StringVar(&o.gcpProject, "gcp-project", "", "For use with gcloud commands") 150 flag.StringVar(&o.gcpProjectType, "gcp-project-type", "", "Explicitly indicate which project type to select from boskos") 151 flag.StringVar(&o.gcpServiceAccount, "gcp-service-account", "", "Service account to activate before using gcloud") 152 flag.StringVar(&o.gcpZone, "gcp-zone", "", "For use with gcloud commands") 153 flag.StringVar(&o.gcpRegion, "gcp-region", "", "For use with gcloud commands") 154 flag.StringVar(&o.gcpNetwork, "gcp-network", "", "Cluster network. Must be set for --deployment=gke (TODO: other deployments).") 155 flag.StringVar(&o.gcpMasterImage, "gcp-master-image", "", "Master image type (cos|debian on GCE, n/a on GKE)") 156 flag.StringVar(&o.gcpMasterSize, "gcp-master-size", "", "(--provider=gce only) Size of master to create (e.g n1-standard-1). Auto-calculated if left empty.") 157 flag.StringVar(&o.gcpNodeImage, "gcp-node-image", "", "Node image type (cos|container_vm on GKE, cos|debian on GCE)") 158 flag.StringVar(&o.gcpImageFamily, "image-family", "", "Node image family from which to use the latest image, required when --gcp-node-image=CUSTOM") 159 flag.StringVar(&o.gcpImageProject, "image-project", "", "Project containing node image family, required when --gcp-node-image=CUSTOM") 160 flag.StringVar(&o.gcpNodes, "gcp-nodes", "", "(--provider=gce only) Number of nodes to create.") 161 flag.StringVar(&o.gcpNodeSize, "gcp-node-size", "", "(--provider=gce only) Size of nodes to create (e.g n1-standard-1).") 162 flag.StringVar(&o.gcpSSHProxyInstanceName, "gcp-ssh-proxy-instance-name", "", "(--provider=gce|gke only) If set, will result in proxing the ssh connections via the provided instance name while running tests") 163 flag.StringVar(&o.kubecfg, "kubeconfig", "", "The location of a kubeconfig file.") 164 flag.StringVar(&o.focusRegex, "ginkgo-focus", "", "The ginkgo regex to focus. Currently only respected for (dind).") 165 flag.StringVar(&o.skipRegex, "ginkgo-skip", "", "The ginkgo regex to skip. Currently only respected for (dind).") 166 flag.BoolVar(&o.kubemark, "kubemark", false, "If true, run kubemark tests.") 167 flag.StringVar(&o.kubemarkMasterSize, "kubemark-master-size", "", "Kubemark master size (only relevant if --kubemark=true). Auto-calculated based on '--kubemark-nodes' if left empty.") 168 flag.StringVar(&o.kubemarkNodes, "kubemark-nodes", "5", "Number of kubemark nodes to start (only relevant if --kubemark=true).") 169 flag.StringVar(&o.logexporterGCSPath, "logexporter-gcs-path", "", "Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty") 170 flag.StringVar(&o.metadataSources, "metadata-sources", "images.json", "Comma-separated list of files inside ./artifacts to merge into metadata.json") 171 flag.StringVar(&o.nodeArgs, "node-args", "", "Args for node e2e tests.") 172 flag.StringVar(&o.nodeTestArgs, "node-test-args", "", "Test args specifically for node e2e tests.") 173 flag.BoolVar(&o.noAllowDup, "no-allow-dup", false, "if set --allow-dup will not be passed to push-build and --stage will error if the build already exists on the gcs path") 174 flag.BoolVar(&o.nodeTests, "node-tests", false, "If true, run node-e2e tests.") 175 flag.StringVar(&o.preTestCmd, "pre-test-cmd", "", "If set, run the provided command before running any tests.") 176 flag.StringVar(&o.postTestCmd, "post-test-cmd", "", "If set, run the provided command after running all the tests.") 177 flag.StringVar(&o.provider, "provider", "", "Kubernetes provider such as gce, gke, aws, etc") 178 flag.StringVar(&o.publish, "publish", "", "Publish version to the specified gs:// path on success") 179 flag.StringVar(&o.runtimeConfig, "runtime-config", "", "If set, API versions can be turned on or off while bringing up the API server.") 180 flag.StringVar(&o.stage.dockerRegistry, "registry", "", "Push images to the specified docker registry (e.g. gcr.io/a-test-project)") 181 flag.StringVar(&o.save, "save", "", "Save credentials to gs:// path on --up if set (or load from there if not --up)") 182 flag.BoolVar(&o.skew, "skew", false, "If true, run tests in another version at ../kubernetes/kubernetes_skew") 183 flag.BoolVar(&o.skipDumpClusterLogs, "skip-dump-cluster-logs", false, "If true, skip the cluster log dumping") 184 flag.BoolVar(&o.soak, "soak", false, "If true, job runs in soak mode") 185 flag.DurationVar(&o.soakDuration, "soak-duration", 7*24*time.Hour, "Maximum age of a soak cluster before it gets recycled") 186 flag.Var(&o.stage, "stage", "Upload binaries to gs://bucket/devel/job-suffix if set") 187 flag.StringVar(&o.stage.versionSuffix, "stage-suffix", "", "Append suffix to staged version when set") 188 flag.StringVar(&o.storageTestDriverPath, "storage-testdriver-repo-path", "", "Relative path for external e2e test driver config in the csi driver repo") 189 flag.BoolVar(&o.test, "test", false, "Run Ginkgo tests.") 190 flag.StringVar(&o.testArgs, "test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.") 191 flag.StringVar(&o.testCmd, "test-cmd", "", "command to run against the cluster instead of Ginkgo e2e tests") 192 flag.StringVar(&o.testCmdName, "test-cmd-name", "", "name to log the test command as in xml results") 193 flag.DurationVar(&timeout, "timeout", time.Duration(0), "Terminate testing after the timeout duration (s/m/h)") 194 flag.BoolVar(&o.up, "up", false, "If true, start the e2e cluster. If cluster is already up, recreate it.") 195 flag.StringVar(&o.upgradeArgs, "upgrade_args", "", "If set, run upgrade tests before other tests") 196 flag.BoolVar(&o.version, "version", false, "Command to print version") 197 198 // The "-v" flag was also used by glog, which is used by k8s.io/client-go. Duplicate flags cause panics. 199 // 1. Even if we could convince glog to change, they have too many consumers to ever do so. 200 // 2. The glog lib parses flags during init. It is impossible to dynamically rewrite the args before they're parsed by glog. 201 // 3. The glog lib takes an int value, so "-v false" is an error. 202 // 4. It's possible, but unlikely, we could convince k8s.io/client-go to use a logging shim, because a library shouldn't force a logging implementation. This would take a major version release for the lib. 203 // 204 // The most reasonable solution is to accept that we shouldn't have made a single-letter global, and rename all references to this variable. 205 flag.BoolVar(&verbose, "verbose-commands", true, "If true, print all command output.") 206 207 // go flag does not support StringArrayVar 208 pflag.StringArrayVar(&o.testCmdArgs, "test-cmd-args", []string{}, "args for test-cmd") 209 return &o 210 } 211 212 var suite util.TestSuite = util.TestSuite{Name: "kubetest"} 213 214 func validWorkingDirectory() error { 215 cwd, err := os.Getwd() 216 if err != nil { 217 return fmt.Errorf("could not get pwd: %w", err) 218 } 219 acwd, err := filepath.Abs(cwd) 220 if err != nil { 221 return fmt.Errorf("failed to convert %s to an absolute path: %w", cwd, err) 222 } 223 // This also matches "kubernetes_skew" for upgrades. 224 if !strings.Contains(filepath.Base(acwd), "kubernetes") { 225 return fmt.Errorf("must run from kubernetes directory root. current: %s", acwd) 226 } 227 return nil 228 } 229 230 type deployer interface { 231 Up() error 232 IsUp() error 233 DumpClusterLogs(localPath, gcsPath string) error 234 TestSetup() error 235 Down() error 236 GetClusterCreated(gcpProject string) (time.Time, error) 237 KubectlCommand() (*exec.Cmd, error) 238 } 239 240 // publisher is implemented by deployers that want to publish status on success 241 type publisher interface { 242 // Publish is called when the tests were successful; the deployer should publish a success file 243 Publish() error 244 } 245 246 func getDeployer(o *options) (deployer, error) { 247 switch o.deployment { 248 case "bash": 249 return newBash(&o.clusterIPRange, o.gcpProject, o.gcpZone, o.gcpSSHProxyInstanceName, o.provider), nil 250 case "conformance": 251 return conformance.NewDeployer(o.kubecfg) 252 case "gke": 253 return newGKE(o.provider, o.gcpProject, o.gcpZone, o.gcpRegion, o.gcpNetwork, o.gcpNodeImage, o.gcpImageFamily, o.gcpImageProject, o.cluster, o.gcpSSHProxyInstanceName, &o.testArgs, &o.upgradeArgs) 254 case "kind": 255 return kind.NewDeployer(control, string(o.build)) 256 case "kops": 257 return newKops(o.provider, o.gcpProject, o.cluster) 258 case "node": 259 return nodeDeploy{provider: o.provider}, nil 260 case "none": 261 return noneDeploy{}, nil 262 case "local": 263 return newLocalCluster(), nil 264 case "aksengine": 265 return newAKSEngine() 266 case "aks": 267 return newAksDeployer() 268 default: 269 return nil, fmt.Errorf("unknown deployment strategy %q", o.deployment) 270 } 271 } 272 273 func validateFlags(o *options) error { 274 if !o.extract.Enabled() && o.extractSource { 275 return errors.New("--extract-source flag cannot be passed without --extract") 276 } 277 return nil 278 } 279 280 func main() { 281 log.SetFlags(log.LstdFlags | log.Lshortfile) 282 log.Printf("Running kubetest version: %s\n", gitTag) 283 284 pflag.CommandLine = pflag.NewFlagSet(os.Args[0], pflag.ContinueOnError) 285 o := defineFlags() 286 pflag.CommandLine.AddGoFlagSet(flag.CommandLine) 287 if err := pflag.CommandLine.Parse(os.Args[1:]); err != nil { 288 log.Fatalf("Flag parse failed: %v", err) 289 } 290 291 if err := validateFlags(o); err != nil { 292 log.Fatalf("Flags validation failed. err: %v", err) 293 } 294 295 if o.version { 296 log.Printf("kubetest version: %s\n", gitTag) 297 return 298 } 299 300 control = process.NewControl(timeout, interrupt, terminate, verbose) 301 302 // do things when we know we are running in the kubetest image 303 if os.Getenv("KUBETEST_IN_DOCKER") == "true" { 304 o.flushMemAfterBuild = true 305 } 306 // sanity fix for kind deployer, not set for other deployers to avoid 307 // breaking changes... 308 if o.deployment == "kind" { 309 // always default --dump for kind, in CI use $ARTIFACTS 310 artifacts := os.Getenv("ARTIFACTS") 311 if artifacts == "" { 312 artifacts = "./_artifacts" 313 } 314 o.dump = artifacts 315 } 316 317 err := complete(o) 318 319 if boskos.HasResource() { 320 if berr := boskos.ReleaseAll("dirty"); berr != nil { 321 log.Fatalf("[Boskos] Fail To Release: %v, kubetest err: %v", berr, err) 322 } 323 } 324 325 if err != nil { 326 log.Fatalf("Something went wrong: %v", err) 327 } 328 } 329 330 func complete(o *options) error { 331 if !terminate.Stop() { 332 <-terminate.C // Drain the value if necessary. 333 } 334 if !interrupt.Stop() { 335 <-interrupt.C // Drain value 336 } 337 338 if timeout > 0 { 339 log.Printf("Limiting testing to %s", timeout) 340 interrupt.Reset(timeout) 341 } 342 343 if o.dump != "" { 344 defer writeMetadata(o.dump, o.metadataSources) 345 defer control.WriteXML(&suite, o.dump, time.Now()) 346 } 347 if o.logexporterGCSPath != "" { 348 o.testArgs += fmt.Sprintf(" --logexporter-gcs-path=%s", o.logexporterGCSPath) 349 } 350 if err := control.XMLWrap(&suite, "Prepare", func() error { return prepare(o) }); err != nil { 351 return fmt.Errorf("failed to prepare test environment: %w", err) 352 } 353 // Get the deployer before we acquire k8s so any additional flag 354 // verifications happen early. 355 var deploy deployer 356 err := control.XMLWrap(&suite, "GetDeployer", func() error { 357 d, err := getDeployer(o) 358 deploy = d 359 return err 360 }) 361 if err != nil { 362 return fmt.Errorf("error creating deployer: %w", err) 363 } 364 365 // Check soaking before run tests 366 if o.soak { 367 if created, err := deploy.GetClusterCreated(o.gcpProject); err != nil { 368 // continue, but log the error 369 log.Printf("deploy %v, GetClusterCreated failed: %v", o.deployment, err) 370 } else { 371 if time.Now().After(created.Add(o.soakDuration)) { 372 // flip up on - which will tear down previous cluster and start a new one 373 log.Printf("Previous soak cluster created at %v, will recreate the cluster", created) 374 o.up = true 375 } 376 } 377 } 378 379 if err := acquireKubernetes(o, deploy); err != nil { 380 return fmt.Errorf("failed to acquire k8s binaries: %w", err) 381 } 382 if o.extract.Enabled() { 383 // If we specified `--extract-source` we will already be in the correct directory 384 if !o.extractSource { 385 if err := os.Chdir("kubernetes"); err != nil { 386 return fmt.Errorf("failed to chdir to kubernetes dir: %w", err) 387 } 388 } 389 } 390 if err := validWorkingDirectory(); err != nil { 391 return fmt.Errorf("called from invalid working directory: %w", err) 392 } 393 394 if o.down { 395 // listen for signals such as ^C and gracefully attempt to clean up 396 c := make(chan os.Signal, 1) 397 signal.Notify(c, os.Interrupt) 398 go func() { 399 for range c { 400 log.Print("Captured ^C, gracefully attempting to cleanup resources..") 401 if err = deploy.Down(); err != nil { 402 log.Printf("Tearing down deployment failed: %v", err) 403 } 404 if err != nil { 405 os.Exit(1) 406 } 407 408 os.Exit(2) 409 } 410 }() 411 } 412 413 if err := run(deploy, *o); err != nil { 414 return err 415 } 416 417 // Publish the successfully tested version when requested 418 if o.publish != "" { 419 if err := publish(o.publish); err != nil { 420 return err 421 } 422 } 423 return nil 424 } 425 426 func acquireKubernetes(o *options, d deployer) error { 427 // Potentially build kubernetes 428 if o.build.Enabled() { 429 var err error 430 // kind deployer manages build 431 if k, ok := d.(*kind.Deployer); ok { 432 err = control.XMLWrap(&suite, "Build", k.Build) 433 } else if c, ok := d.(*aksEngineDeployer); ok { // Azure deployer 434 err = control.XMLWrap(&suite, "Build", func() error { 435 return c.Build(o.build) 436 }) 437 } else { 438 err = control.XMLWrap(&suite, "Build", o.build.Build) 439 } 440 if o.flushMemAfterBuild { 441 util.FlushMem() 442 } 443 if err != nil { 444 return err 445 } 446 } 447 448 // Potentially stage build binaries somewhere on GCS 449 if o.stage.Enabled() { 450 if err := control.XMLWrap(&suite, "Stage", func() error { 451 return o.stage.Stage(o.noAllowDup) 452 }); err != nil { 453 return err 454 } 455 } 456 457 // Potentially download existing binaries and extract them. 458 if o.extract.Enabled() { 459 err := control.XMLWrap(&suite, "Extract", func() error { 460 // Should we restore a previous state? 461 // Restore if we are not upping the cluster 462 if o.save != "" { 463 if !o.up { 464 // Restore version and .kube/config from --up 465 log.Printf("Overwriting extract strategy to load kubeconfig and version from %s", o.save) 466 o.extract = extractStrategies{ 467 extractStrategy{ 468 mode: load, 469 option: o.save, 470 }, 471 } 472 } 473 } 474 475 // New deployment, extract new version 476 return o.extract.Extract(o.gcpProject, o.gcpZone, o.gcpRegion, o.extractCIBucket, o.extractReleaseBucket, o.extractSource) 477 }) 478 if err != nil { 479 return err 480 } 481 } 482 return nil 483 } 484 485 // Returns the k8s version name 486 func findVersion() string { 487 // The version may be in a version file 488 if _, err := os.Stat("version"); err == nil { 489 b, err := os.ReadFile("version") 490 if err == nil { 491 return strings.TrimSpace(string(b)) 492 } 493 log.Printf("Failed to read version: %v", err) 494 } 495 496 // We can also get it from the git repo. 497 if _, err := os.Stat("hack/lib/version.sh"); err == nil { 498 // TODO(fejta): do this in go. At least we removed the upload-to-gcs.sh dep. 499 gross := `. hack/lib/version.sh && KUBE_ROOT=. kube::version::get_version_vars && echo "${KUBE_GIT_VERSION-}"` 500 b, err := control.Output(exec.Command("bash", "-c", gross)) 501 if err == nil { 502 return strings.TrimSpace(string(b)) 503 } 504 log.Printf("Failed to get_version_vars: %v", err) 505 } 506 507 return "unknown" // Sad trombone 508 } 509 510 // maybeMergeMetadata will add new keyvals into the map; quietly eats errors. 511 func maybeMergeJSON(meta map[string]string, path string) { 512 if data, err := os.ReadFile(path); err == nil { 513 json.Unmarshal(data, &meta) 514 } 515 } 516 517 // Write metadata.json, including version and env arg data. 518 func writeMetadata(path, metadataSources string) error { 519 m := make(map[string]string) 520 521 // Look for any sources of metadata and load 'em 522 for _, f := range strings.Split(metadataSources, ",") { 523 maybeMergeJSON(m, filepath.Join(path, f)) 524 } 525 526 ver := findVersion() 527 m["job-version"] = ver // TODO(krzyzacy): retire 528 m["revision"] = ver 529 m["kubetest-version"] = gitTag 530 re := regexp.MustCompile(`^BUILD_METADATA_(.+)$`) 531 for _, e := range os.Environ() { 532 p := strings.SplitN(e, "=", 2) 533 r := re.FindStringSubmatch(p[0]) 534 if r == nil { 535 continue 536 } 537 k, v := strings.ToLower(r[1]), p[1] 538 m[k] = v 539 } 540 f, err := os.Create(filepath.Join(path, "metadata.json")) 541 if err != nil { 542 return err 543 } 544 defer f.Close() 545 e := json.NewEncoder(f) 546 return e.Encode(m) 547 } 548 549 // Install cloudsdk tarball to location, updating PATH 550 func installGcloud(tarball string, location string) error { 551 552 if err := os.MkdirAll(location, 0775); err != nil { 553 return err 554 } 555 556 if err := control.FinishRunning(exec.Command("tar", "xzf", tarball, "-C", location)); err != nil { 557 return err 558 } 559 560 if err := control.FinishRunning(exec.Command(filepath.Join(location, "google-cloud-sdk", "install.sh"), "--disable-installation-options", "--bash-completion=false", "--path-update=false", "--usage-reporting=false")); err != nil { 561 return err 562 } 563 564 if err := util.InsertPath(filepath.Join(location, "google-cloud-sdk", "bin")); err != nil { 565 return err 566 } 567 568 if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "alpha")); err != nil { 569 return err 570 } 571 572 if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "beta")); err != nil { 573 return err 574 } 575 576 if err := control.FinishRunning(exec.Command("gcloud", "info")); err != nil { 577 return err 578 } 579 return nil 580 } 581 582 func migrateGcpEnvAndOptions(o *options) error { 583 var network string 584 var zone string 585 switch o.provider { 586 case "gke": 587 network = "KUBE_GKE_NETWORK" 588 zone = "ZONE" 589 default: 590 network = "KUBE_GCE_NETWORK" 591 zone = "KUBE_GCE_ZONE" 592 } 593 return util.MigrateOptions([]util.MigratedOption{ 594 { 595 Env: "PROJECT", 596 Option: &o.gcpProject, 597 Name: "--gcp-project", 598 }, 599 { 600 Env: zone, 601 Option: &o.gcpZone, 602 Name: "--gcp-zone", 603 }, 604 { 605 Env: "REGION", 606 Option: &o.gcpRegion, 607 Name: "--gcp-region", 608 }, 609 { 610 Env: "GOOGLE_APPLICATION_CREDENTIALS", 611 Option: &o.gcpServiceAccount, 612 Name: "--gcp-service-account", 613 }, 614 { 615 Env: network, 616 Option: &o.gcpNetwork, 617 Name: "--gcp-network", 618 }, 619 { 620 Env: "KUBE_NODE_OS_DISTRIBUTION", 621 Option: &o.gcpNodeImage, 622 Name: "--gcp-node-image", 623 }, 624 { 625 Env: "KUBE_MASTER_OS_DISTRIBUTION", 626 Option: &o.gcpMasterImage, 627 Name: "--gcp-master-image", 628 }, 629 { 630 Env: "NUM_NODES", 631 Option: &o.gcpNodes, 632 Name: "--gcp-nodes", 633 }, 634 { 635 Env: "NODE_SIZE", 636 Option: &o.gcpNodeSize, 637 Name: "--gcp-node-size", 638 }, 639 { 640 Env: "MASTER_SIZE", 641 Option: &o.gcpMasterSize, 642 Name: "--gcp-master-size", 643 }, 644 { 645 Env: "CLOUDSDK_BUCKET", 646 Option: &o.gcpCloudSdk, 647 Name: "--gcp-cloud-sdk", 648 SkipPush: true, 649 }, 650 }) 651 } 652 653 func prepareGcp(o *options) error { 654 if err := migrateGcpEnvAndOptions(o); err != nil { 655 return err 656 } 657 // Must happen before any gcloud commands 658 if err := activateServiceAccount(o.gcpServiceAccount); err != nil { 659 return err 660 } 661 662 if o.provider == "gce" { 663 if distro := os.Getenv("KUBE_OS_DISTRIBUTION"); distro != "" { 664 log.Printf("Please use --gcp-master-image=%s --gcp-node-image=%s (instead of deprecated KUBE_OS_DISTRIBUTION)", 665 distro, distro) 666 // Note: KUBE_OS_DISTRIBUTION takes precedence over 667 // KUBE_{MASTER,NODE}_OS_DISTRIBUTION, so override here 668 // after the migration above. 669 o.gcpNodeImage = distro 670 o.gcpMasterImage = distro 671 if err := os.Setenv("KUBE_NODE_OS_DISTRIBUTION", distro); err != nil { 672 return fmt.Errorf("could not set KUBE_NODE_OS_DISTRIBUTION=%s: %w", distro, err) 673 } 674 if err := os.Setenv("KUBE_MASTER_OS_DISTRIBUTION", distro); err != nil { 675 return fmt.Errorf("could not set KUBE_MASTER_OS_DISTRIBUTION=%s: %w", distro, err) 676 } 677 } 678 679 hasGCPImageFamily, hasGCPImageProject := len(o.gcpImageFamily) != 0, len(o.gcpImageProject) != 0 680 if hasGCPImageFamily != hasGCPImageProject { 681 return fmt.Errorf("--image-family and --image-project must be both set or unset") 682 } 683 if hasGCPImageFamily && hasGCPImageProject { 684 out, err := control.Output(exec.Command("gcloud", "compute", "images", "describe-from-family", o.gcpImageFamily, "--project", o.gcpImageProject)) 685 if err != nil { 686 return fmt.Errorf("failed to get latest image from family %q in project %q: %s", o.gcpImageFamily, o.gcpImageProject, err) 687 } 688 latestImage := "" 689 latestImageRegexp := regexp.MustCompile(`^name: *(\S+)`) 690 for _, line := range strings.Split(string(out), "\n") { 691 matches := latestImageRegexp.FindStringSubmatch(line) 692 if len(matches) == 2 { 693 latestImage = matches[1] 694 break 695 } 696 } 697 if len(latestImage) == 0 { 698 return fmt.Errorf("failed to get latest image from family %q in project %q", o.gcpImageFamily, o.gcpImageProject) 699 } 700 if o.deployment == "node" { 701 o.nodeArgs += fmt.Sprintf(" --images=%s --image-project=%s", latestImage, o.gcpImageProject) 702 } else { 703 os.Setenv("KUBE_GCE_NODE_IMAGE", latestImage) 704 os.Setenv("KUBE_GCE_NODE_PROJECT", o.gcpImageProject) 705 } 706 } 707 } else if o.provider == "gke" { 708 if o.deployment == "" { 709 o.deployment = "gke" 710 } 711 if o.deployment != "gke" { 712 return fmt.Errorf("expected --deployment=gke for --provider=gke, found --deployment=%s", o.deployment) 713 } 714 if o.gcpMasterImage != "" { 715 return fmt.Errorf("expected --gcp-master-image to be empty for --provider=gke, found --gcp-master-image=%s", o.gcpMasterImage) 716 } 717 if o.gcpNodes != "" { 718 return fmt.Errorf("--gcp-nodes cannot be set on GKE, use --gke-shape instead") 719 } 720 if o.gcpNodeSize != "" { 721 return fmt.Errorf("--gcp-node-size cannot be set on GKE, use --gke-shape instead") 722 } 723 if o.gcpMasterSize != "" { 724 return fmt.Errorf("--gcp-master-size cannot be set on GKE, where it's auto-computed") 725 } 726 727 // TODO(kubernetes/test-infra#3536): This is used by the 728 // ginkgo-e2e.sh wrapper. 729 nod := o.gcpNodeImage 730 if nod == "container_vm" { 731 // gcloud container clusters create understands 732 // "container_vm", e2es understand "debian". 733 nod = "debian" 734 } 735 if nod == "cos_containerd" { 736 // gcloud container clusters create understands 737 // "cos_containerd", e2es only understand 738 // "gci"/"cos", 739 nod = "gci" 740 } 741 os.Setenv("NODE_OS_DISTRIBUTION", nod) 742 } 743 if o.gcpProject == "" { 744 log.Print("--gcp-project is missing, trying to fetch a project from boskos.\n" + 745 "(for local runs please set --gcp-project to your dev project)") 746 747 var resType string 748 if o.gcpProjectType != "" { 749 resType = o.gcpProjectType 750 } else if o.provider == "gke" { 751 resType = "gke-project" 752 } else { 753 resType = "gce-project" 754 } 755 756 log.Printf("provider %v, will acquire project type %v from boskos", o.provider, resType) 757 758 // let's retry 5min to get next available resource 759 ctx, cancel := context.WithTimeout(context.Background(), o.boskosWaitDuration) 760 defer cancel() 761 p, err := boskos.AcquireWait(ctx, resType, "free", "busy") 762 if err != nil { 763 return fmt.Errorf("--provider=%s boskos failed to acquire project: %w", o.provider, err) 764 } 765 766 if p == nil { 767 return fmt.Errorf("boskos does not have a free %s at the moment", resType) 768 } 769 770 go func(c *client.Client, proj string) { 771 for range time.Tick(time.Minute * 5) { 772 if err := c.UpdateOne(p.Name, "busy", nil); err != nil { 773 log.Printf("[Boskos] Update of %s failed with %v", p.Name, err) 774 } 775 } 776 }(boskos, p.Name) 777 o.gcpProject = p.Name 778 } 779 780 if err := os.Setenv("CLOUDSDK_CORE_PRINT_UNHANDLED_TRACEBACKS", "1"); err != nil { 781 return fmt.Errorf("could not set CLOUDSDK_CORE_PRINT_UNHANDLED_TRACEBACKS=1: %w", err) 782 } 783 784 if err := control.FinishRunning(exec.Command("gcloud", "config", "set", "project", o.gcpProject)); err != nil { 785 return fmt.Errorf("fail to set project %s : err %w", o.gcpProject, err) 786 } 787 788 // TODO(krzyzacy):Remove this when we retire migrateGcpEnvAndOptions 789 // Note that a lot of scripts are still depend on this env in k/k repo. 790 if err := os.Setenv("PROJECT", o.gcpProject); err != nil { 791 return fmt.Errorf("fail to set env var PROJECT %s : err %w", o.gcpProject, err) 792 } 793 794 // Ensure ssh keys exist 795 log.Print("Checking existing of GCP ssh keys...") 796 k := filepath.Join(util.Home(".ssh"), "google_compute_engine") 797 if _, err := os.Stat(k); err != nil { 798 return err 799 } 800 pk := k + ".pub" 801 if _, err := os.Stat(pk); err != nil { 802 return err 803 } 804 805 log.Printf("Checking presence of public key in %s", o.gcpProject) 806 if out, err := control.Output(exec.Command("gcloud", "compute", "--project="+o.gcpProject, "project-info", "describe")); err != nil { 807 return err 808 } else if b, err := os.ReadFile(pk); err != nil { 809 return err 810 } else if !strings.Contains(string(out), string(b)) { 811 log.Print("Uploading public ssh key to project metadata...") 812 if err = control.FinishRunning(exec.Command("gcloud", "compute", "--project="+o.gcpProject, "config-ssh")); err != nil { 813 return err 814 } 815 } 816 817 // Install custom gcloud version if necessary 818 if o.gcpCloudSdk != "" { 819 for i := 0; i < 3; i++ { 820 if err := control.FinishRunning(exec.Command("gsutil", "-mq", "cp", "-r", o.gcpCloudSdk, util.Home())); err == nil { 821 break // Success! 822 } 823 time.Sleep(1 << uint(i) * time.Second) 824 } 825 for _, f := range []string{util.Home(".gsutil"), util.Home("repo"), util.Home("cloudsdk")} { 826 if _, err := os.Stat(f); err == nil || !os.IsNotExist(err) { 827 if err = os.RemoveAll(f); err != nil { 828 return err 829 } 830 } 831 } 832 833 install := util.Home("repo", "google-cloud-sdk.tar.gz") 834 if strings.HasSuffix(o.gcpCloudSdk, ".tar.gz") { 835 install = util.Home(filepath.Base(o.gcpCloudSdk)) 836 } else { 837 if err := os.Rename(util.Home(filepath.Base(o.gcpCloudSdk)), util.Home("repo")); err != nil { 838 return err 839 } 840 841 // Controls which gcloud components to install. 842 pop, err := util.PushEnv("CLOUDSDK_COMPONENT_MANAGER_SNAPSHOT_URL", "file://"+util.Home("repo", "components-2.json")) 843 if err != nil { 844 return err 845 } 846 defer pop() 847 } 848 849 if err := installGcloud(install, util.Home("cloudsdk")); err != nil { 850 return err 851 } 852 // gcloud creds may have changed 853 if err := activateServiceAccount(o.gcpServiceAccount); err != nil { 854 return err 855 } 856 } 857 858 if o.kubemark { 859 if p := os.Getenv("KUBEMARK_BAZEL_BUILD"); strings.ToLower(p) == "y" { 860 // we need docker-credential-gcr to get authed properly 861 // https://github.com/bazelbuild/rules_docker#authorization 862 if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "docker-credential-gcr")); err != nil { 863 return err 864 } 865 if err := control.FinishRunning(exec.Command("docker-credential-gcr", "configure-docker")); err != nil { 866 return err 867 } 868 } 869 } 870 871 return nil 872 } 873 874 func prepareAws(o *options) error { 875 // gcloud creds may have changed 876 if err := activateServiceAccount(o.gcpServiceAccount); err != nil { 877 return err 878 } 879 return control.FinishRunning(exec.Command("pip", "install", "awscli")) 880 } 881 882 // Activate GOOGLE_APPLICATION_CREDENTIALS if set or do nothing. 883 func activateServiceAccount(path string) error { 884 if path == "" { 885 return nil 886 } 887 return control.FinishRunning(exec.Command("gcloud", "auth", "activate-service-account", "--key-file="+path)) 888 } 889 890 func prepare(o *options) error { 891 if err := util.MigrateOptions([]util.MigratedOption{ 892 { 893 Env: "KUBERNETES_PROVIDER", 894 Option: &o.provider, 895 Name: "--provider", 896 }, 897 { 898 Env: "CLUSTER_NAME", 899 Option: &o.cluster, 900 Name: "--cluster", 901 }, 902 }); err != nil { 903 return err 904 } 905 if err := prepareGinkgoParallel(&o.ginkgoParallel); err != nil { 906 return err 907 } 908 909 switch o.provider { 910 case "gce", "gke", "node": 911 if err := prepareGcp(o); err != nil { 912 return err 913 } 914 case "aws": 915 if err := prepareAws(o); err != nil { 916 return err 917 } 918 } 919 920 if o.kubemark { 921 if err := util.MigrateOptions([]util.MigratedOption{ 922 { 923 Env: "KUBEMARK_NUM_NODES", 924 Option: &o.kubemarkNodes, 925 Name: "--kubemark-nodes", 926 }, 927 { 928 Env: "KUBEMARK_MASTER_SIZE", 929 Option: &o.kubemarkMasterSize, 930 Name: "--kubemark-master-size", 931 }, 932 }); err != nil { 933 return err 934 } 935 } 936 937 if err := os.MkdirAll(artifacts, 0777); err != nil { // Create artifacts 938 return err 939 } 940 941 return nil 942 } 943 944 type ginkgoParallelValue struct { 945 v int // 0 == not set (defaults to 1) 946 } 947 948 func (v *ginkgoParallelValue) IsBoolFlag() bool { 949 return true 950 } 951 952 func (v *ginkgoParallelValue) String() string { 953 if v.v == 0 { 954 return "1" 955 } 956 return strconv.Itoa(v.v) 957 } 958 959 func (v *ginkgoParallelValue) Set(s string) error { 960 if s == "" { 961 v.v = 0 962 return nil 963 } 964 if s == "true" { 965 v.v = defaultGinkgoParallel 966 return nil 967 } 968 p, err := strconv.Atoi(s) 969 if err != nil { 970 return fmt.Errorf("--ginkgo-parallel must be an integer, found %q", s) 971 } 972 if p < 1 { 973 return fmt.Errorf("--ginkgo-parallel must be >= 1, found %d", p) 974 } 975 v.v = p 976 return nil 977 } 978 979 func (v *ginkgoParallelValue) Type() string { 980 return "ginkgoParallelValue" 981 } 982 983 func (v *ginkgoParallelValue) Get() int { 984 if v.v == 0 { 985 return 1 986 } 987 return v.v 988 } 989 990 var _ flag.Value = &ginkgoParallelValue{} 991 992 // Hand migrate this option. GINKGO_PARALLEL => GINKGO_PARALLEL_NODES=25 993 func prepareGinkgoParallel(v *ginkgoParallelValue) error { 994 if p := os.Getenv("GINKGO_PARALLEL"); strings.ToLower(p) == "y" { 995 log.Printf("Please use kubetest --ginkgo-parallel (instead of deprecated GINKGO_PARALLEL=y)") 996 if err := v.Set("true"); err != nil { 997 return err 998 } 999 os.Unsetenv("GINKGO_PARALLEL") 1000 } 1001 if p := os.Getenv("GINKGO_PARALLEL_NODES"); p != "" { 1002 log.Printf("Please use kubetest --ginkgo-parallel=%s (instead of deprecated GINKGO_PARALLEL_NODES=%s)", p, p) 1003 if err := v.Set(p); err != nil { 1004 return err 1005 } 1006 } 1007 os.Setenv("GINKGO_PARALLEL_NODES", v.String()) 1008 return nil 1009 } 1010 1011 func publish(pub string) error { 1012 v, err := os.ReadFile("version") 1013 if err != nil { 1014 return err 1015 } 1016 log.Printf("Set %s version to %s", pub, string(v)) 1017 return gcsWrite(pub, v) 1018 }