github.com/maxgio92/test-infra@v0.1.0/kubetest/main.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package main 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "flag" 24 "fmt" 25 "log" 26 "math/rand" 27 "os" 28 "os/exec" 29 "os/signal" 30 "path/filepath" 31 "regexp" 32 "strconv" 33 "strings" 34 "time" 35 36 "github.com/spf13/pflag" 37 "github.com/maxgio92/test-infra/kubetest/boskos/client" 38 39 "github.com/maxgio92/test-infra/kubetest/conformance" 40 "github.com/maxgio92/test-infra/kubetest/kind" 41 "github.com/maxgio92/test-infra/kubetest/process" 42 "github.com/maxgio92/test-infra/kubetest/util" 43 ) 44 45 // Hardcoded in ginkgo-e2e.sh 46 const defaultGinkgoParallel = 25 47 48 var ( 49 artifacts = filepath.Join(os.Getenv("WORKSPACE"), "_artifacts") 50 boskos, _ = client.NewClient(os.Getenv("JOB_NAME"), "http://boskos.test-pods.svc.cluster.local.", "", "") 51 control = process.NewControl(timeout, interrupt, terminate, verbose) 52 gitTag = "" // initializing default zero value. ldflags will populate this during build time. 53 interrupt = time.NewTimer(time.Duration(0)) // interrupt testing at this time. 54 terminate = time.NewTimer(time.Duration(0)) // terminate testing at this time. 55 timeout = time.Duration(0) 56 verbose = false 57 ) 58 59 type options struct { 60 build buildStrategy 61 boskosWaitDuration time.Duration 62 charts bool 63 checkLeaks bool 64 checkSkew bool 65 cluster string 66 clusterIPRange string 67 deployment string 68 down bool 69 dump string 70 dumpPreTestLogs string 71 extract extractStrategies 72 extractCIBucket string 73 extractReleaseBucket string 74 extractSource bool 75 flushMemAfterBuild bool 76 focusRegex string 77 gcpCloudSdk string 78 gcpMasterImage string 79 gcpMasterSize string 80 gcpNetwork string 81 gcpNodeImage string 82 gcpImageFamily string 83 gcpImageProject string 84 gcpNodes string 85 gcpNodeSize string 86 gcpProject string 87 gcpProjectType string 88 gcpServiceAccount string 89 // gcpSSHProxyInstanceName is the name of the vm instance which ip address will be used to set the 90 // KUBE_SSH_BASTION env. If set, it will result in proxying ssh connections in tests through the 91 // "bastion". It's useful for clusters with nodes without public ssh access, e.g. nodes without 92 // public ip addresses. Works only for gcp providers (gce, gke). 93 gcpSSHProxyInstanceName string 94 gcpRegion string 95 gcpZone string 96 ginkgoParallel ginkgoParallelValue 97 kubecfg string 98 kubemark bool 99 kubemarkMasterSize string 100 kubemarkNodes string // TODO(fejta): switch to int after migration 101 logexporterGCSPath string 102 metadataSources string 103 noAllowDup bool 104 nodeArgs string 105 nodeTestArgs string 106 nodeTests bool 107 preTestCmd string 108 postTestCmd string 109 provider string 110 publish string 111 runtimeConfig string 112 save string 113 skew bool 114 skipRegex string 115 soak bool 116 soakDuration time.Duration 117 stage stageStrategy 118 storageTestDriverPath string 119 test bool 120 testArgs string 121 testCmd string 122 testCmdName string 123 testCmdArgs []string 124 up bool 125 upgradeArgs string 126 version bool 127 } 128 129 func defineFlags() *options { 130 o := options{} 131 flag.Var(&o.build, "build", "Rebuild k8s binaries, optionally forcing (release|quick|bazel) strategy") 132 flag.DurationVar(&o.boskosWaitDuration, "boskos-wait-duration", 5*time.Minute, "Defines how long it waits until quit getting Boskos resoure, default 5 minutes") 133 flag.BoolVar(&o.charts, "charts", false, "If true, run charts tests") 134 flag.BoolVar(&o.checkSkew, "check-version-skew", true, "Verify client and server versions match") 135 flag.BoolVar(&o.checkLeaks, "check-leaked-resources", false, "Ensure project ends with the same resources") 136 flag.StringVar(&o.cluster, "cluster", "", "Cluster name. Must be set for --deployment=gke (TODO: other deployments).") 137 flag.StringVar(&o.clusterIPRange, "cluster-ip-range", "", "Specifies CLUSTER_IP_RANGE value during --up and --test (only relevant for --deployment=bash). Auto-calculated if empty.") 138 flag.StringVar(&o.deployment, "deployment", "bash", "Choices: none/bash/conformance/gke/kind/kops/node/local") 139 flag.BoolVar(&o.down, "down", false, "If true, tear down the cluster before exiting.") 140 flag.StringVar(&o.dump, "dump", "", "If set, dump bring-up and cluster logs to this location on test or cluster-up failure") 141 flag.StringVar(&o.dumpPreTestLogs, "dump-pre-test-logs", "", "If set, dump cluster logs to this location before running tests") 142 flag.Var(&o.extract, "extract", "Extract k8s binaries from the specified release location") 143 flag.StringVar(&o.extractCIBucket, "extract-ci-bucket", "k8s-release-dev", "Extract k8s CI binaries from the specified GCS bucket") 144 flag.StringVar(&o.extractReleaseBucket, "extract-release-bucket", "kubernetes-release", "Extract k8s release binaries from the specified GCS bucket") 145 flag.BoolVar(&o.extractSource, "extract-source", false, "Extract k8s src together with other tarballs") 146 flag.BoolVar(&o.flushMemAfterBuild, "flush-mem-after-build", false, "If true, try to flush container memory after building") 147 flag.Var(&o.ginkgoParallel, "ginkgo-parallel", fmt.Sprintf("Run Ginkgo tests in parallel, default %d runners. Use --ginkgo-parallel=N to specify an exact count.", defaultGinkgoParallel)) 148 flag.StringVar(&o.gcpCloudSdk, "gcp-cloud-sdk", "", "Install/upgrade google-cloud-sdk to the gs:// path if set") 149 flag.StringVar(&o.gcpProject, "gcp-project", "", "For use with gcloud commands") 150 flag.StringVar(&o.gcpProjectType, "gcp-project-type", "", "Explicitly indicate which project type to select from boskos") 151 flag.StringVar(&o.gcpServiceAccount, "gcp-service-account", "", "Service account to activate before using gcloud") 152 flag.StringVar(&o.gcpZone, "gcp-zone", "", "For use with gcloud commands") 153 flag.StringVar(&o.gcpRegion, "gcp-region", "", "For use with gcloud commands") 154 flag.StringVar(&o.gcpNetwork, "gcp-network", "", "Cluster network. Must be set for --deployment=gke (TODO: other deployments).") 155 flag.StringVar(&o.gcpMasterImage, "gcp-master-image", "", "Master image type (cos|debian on GCE, n/a on GKE)") 156 flag.StringVar(&o.gcpMasterSize, "gcp-master-size", "", "(--provider=gce only) Size of master to create (e.g n1-standard-1). Auto-calculated if left empty.") 157 flag.StringVar(&o.gcpNodeImage, "gcp-node-image", "", "Node image type (cos|container_vm on GKE, cos|debian on GCE)") 158 flag.StringVar(&o.gcpImageFamily, "image-family", "", "Node image family from which to use the latest image, required when --gcp-node-image=CUSTOM") 159 flag.StringVar(&o.gcpImageProject, "image-project", "", "Project containing node image family, required when --gcp-node-image=CUSTOM") 160 flag.StringVar(&o.gcpNodes, "gcp-nodes", "", "(--provider=gce only) Number of nodes to create.") 161 flag.StringVar(&o.gcpNodeSize, "gcp-node-size", "", "(--provider=gce only) Size of nodes to create (e.g n1-standard-1).") 162 flag.StringVar(&o.gcpSSHProxyInstanceName, "gcp-ssh-proxy-instance-name", "", "(--provider=gce|gke only) If set, will result in proxing the ssh connections via the provided instance name while running tests") 163 flag.StringVar(&o.kubecfg, "kubeconfig", "", "The location of a kubeconfig file.") 164 flag.StringVar(&o.focusRegex, "ginkgo-focus", "", "The ginkgo regex to focus. Currently only respected for (dind).") 165 flag.StringVar(&o.skipRegex, "ginkgo-skip", "", "The ginkgo regex to skip. Currently only respected for (dind).") 166 flag.BoolVar(&o.kubemark, "kubemark", false, "If true, run kubemark tests.") 167 flag.StringVar(&o.kubemarkMasterSize, "kubemark-master-size", "", "Kubemark master size (only relevant if --kubemark=true). Auto-calculated based on '--kubemark-nodes' if left empty.") 168 flag.StringVar(&o.kubemarkNodes, "kubemark-nodes", "5", "Number of kubemark nodes to start (only relevant if --kubemark=true).") 169 flag.StringVar(&o.logexporterGCSPath, "logexporter-gcs-path", "", "Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty") 170 flag.StringVar(&o.metadataSources, "metadata-sources", "images.json", "Comma-separated list of files inside ./artifacts to merge into metadata.json") 171 flag.StringVar(&o.nodeArgs, "node-args", "", "Args for node e2e tests.") 172 flag.StringVar(&o.nodeTestArgs, "node-test-args", "", "Test args specifically for node e2e tests.") 173 flag.BoolVar(&o.noAllowDup, "no-allow-dup", false, "if set --allow-dup will not be passed to push-build and --stage will error if the build already exists on the gcs path") 174 flag.BoolVar(&o.nodeTests, "node-tests", false, "If true, run node-e2e tests.") 175 flag.StringVar(&o.preTestCmd, "pre-test-cmd", "", "If set, run the provided command before running any tests.") 176 flag.StringVar(&o.postTestCmd, "post-test-cmd", "", "If set, run the provided command after running all the tests.") 177 flag.StringVar(&o.provider, "provider", "", "Kubernetes provider such as gce, gke, aws, etc") 178 flag.StringVar(&o.publish, "publish", "", "Publish version to the specified gs:// path on success") 179 flag.StringVar(&o.runtimeConfig, "runtime-config", "", "If set, API versions can be turned on or off while bringing up the API server.") 180 flag.StringVar(&o.stage.dockerRegistry, "registry", "", "Push images to the specified docker registry (e.g. gcr.io/a-test-project)") 181 flag.StringVar(&o.save, "save", "", "Save credentials to gs:// path on --up if set (or load from there if not --up)") 182 flag.BoolVar(&o.skew, "skew", false, "If true, run tests in another version at ../kubernetes/kubernetes_skew") 183 flag.BoolVar(&o.soak, "soak", false, "If true, job runs in soak mode") 184 flag.DurationVar(&o.soakDuration, "soak-duration", 7*24*time.Hour, "Maximum age of a soak cluster before it gets recycled") 185 flag.Var(&o.stage, "stage", "Upload binaries to gs://bucket/devel/job-suffix if set") 186 flag.StringVar(&o.stage.versionSuffix, "stage-suffix", "", "Append suffix to staged version when set") 187 flag.StringVar(&o.storageTestDriverPath, "storage-testdriver-repo-path", "", "Relative path for external e2e test driver config in the csi driver repo") 188 flag.BoolVar(&o.test, "test", false, "Run Ginkgo tests.") 189 flag.StringVar(&o.testArgs, "test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.") 190 flag.StringVar(&o.testCmd, "test-cmd", "", "command to run against the cluster instead of Ginkgo e2e tests") 191 flag.StringVar(&o.testCmdName, "test-cmd-name", "", "name to log the test command as in xml results") 192 flag.DurationVar(&timeout, "timeout", time.Duration(0), "Terminate testing after the timeout duration (s/m/h)") 193 flag.BoolVar(&o.up, "up", false, "If true, start the e2e cluster. If cluster is already up, recreate it.") 194 flag.StringVar(&o.upgradeArgs, "upgrade_args", "", "If set, run upgrade tests before other tests") 195 flag.BoolVar(&o.version, "version", false, "Command to print version") 196 197 // The "-v" flag was also used by glog, which is used by k8s.io/client-go. Duplicate flags cause panics. 198 // 1. Even if we could convince glog to change, they have too many consumers to ever do so. 199 // 2. The glog lib parses flags during init. It is impossible to dynamically rewrite the args before they're parsed by glog. 200 // 3. The glog lib takes an int value, so "-v false" is an error. 201 // 4. It's possible, but unlikely, we could convince k8s.io/client-go to use a logging shim, because a library shouldn't force a logging implementation. This would take a major version release for the lib. 202 // 203 // The most reasonable solution is to accept that we shouldn't have made a single-letter global, and rename all references to this variable. 204 flag.BoolVar(&verbose, "verbose-commands", true, "If true, print all command output.") 205 206 // go flag does not support StringArrayVar 207 pflag.StringArrayVar(&o.testCmdArgs, "test-cmd-args", []string{}, "args for test-cmd") 208 return &o 209 } 210 211 var suite util.TestSuite = util.TestSuite{Name: "kubetest"} 212 213 func validWorkingDirectory() error { 214 cwd, err := os.Getwd() 215 if err != nil { 216 return fmt.Errorf("could not get pwd: %w", err) 217 } 218 acwd, err := filepath.Abs(cwd) 219 if err != nil { 220 return fmt.Errorf("failed to convert %s to an absolute path: %w", cwd, err) 221 } 222 // This also matches "kubernetes_skew" for upgrades. 223 if !strings.Contains(filepath.Base(acwd), "kubernetes") { 224 return fmt.Errorf("must run from kubernetes directory root. current: %s", acwd) 225 } 226 return nil 227 } 228 229 type deployer interface { 230 Up() error 231 IsUp() error 232 DumpClusterLogs(localPath, gcsPath string) error 233 TestSetup() error 234 Down() error 235 GetClusterCreated(gcpProject string) (time.Time, error) 236 KubectlCommand() (*exec.Cmd, error) 237 } 238 239 // publisher is implemented by deployers that want to publish status on success 240 type publisher interface { 241 // Publish is called when the tests were successful; the deployer should publish a success file 242 Publish() error 243 } 244 245 func getDeployer(o *options) (deployer, error) { 246 switch o.deployment { 247 case "bash": 248 return newBash(&o.clusterIPRange, o.gcpProject, o.gcpZone, o.gcpSSHProxyInstanceName, o.provider), nil 249 case "conformance": 250 return conformance.NewDeployer(o.kubecfg) 251 case "gke": 252 return newGKE(o.provider, o.gcpProject, o.gcpZone, o.gcpRegion, o.gcpNetwork, o.gcpNodeImage, o.gcpImageFamily, o.gcpImageProject, o.cluster, o.gcpSSHProxyInstanceName, &o.testArgs, &o.upgradeArgs) 253 case "kind": 254 return kind.NewDeployer(control, string(o.build)) 255 case "kops": 256 return newKops(o.provider, o.gcpProject, o.cluster) 257 case "node": 258 return nodeDeploy{provider: o.provider}, nil 259 case "none": 260 return noneDeploy{}, nil 261 case "local": 262 return newLocalCluster(), nil 263 case "aksengine": 264 return newAKSEngine() 265 case "aks": 266 return newAksDeployer() 267 default: 268 return nil, fmt.Errorf("unknown deployment strategy %q", o.deployment) 269 } 270 } 271 272 func validateFlags(o *options) error { 273 if !o.extract.Enabled() && o.extractSource { 274 return errors.New("--extract-source flag cannot be passed without --extract") 275 } 276 return nil 277 } 278 279 func main() { 280 log.SetFlags(log.LstdFlags | log.Lshortfile) 281 log.Printf("Running kubetest version: %s\n", gitTag) 282 283 // Initialize global pseudo random generator. Initializing it to select random AWS Zones. 284 rand.Seed(time.Now().UnixNano()) 285 286 pflag.CommandLine = pflag.NewFlagSet(os.Args[0], pflag.ContinueOnError) 287 o := defineFlags() 288 pflag.CommandLine.AddGoFlagSet(flag.CommandLine) 289 if err := pflag.CommandLine.Parse(os.Args[1:]); err != nil { 290 log.Fatalf("Flag parse failed: %v", err) 291 } 292 293 if err := validateFlags(o); err != nil { 294 log.Fatalf("Flags validation failed. err: %v", err) 295 } 296 297 if o.version { 298 log.Printf("kubetest version: %s\n", gitTag) 299 return 300 } 301 302 control = process.NewControl(timeout, interrupt, terminate, verbose) 303 304 // do things when we know we are running in the kubetest image 305 if os.Getenv("KUBETEST_IN_DOCKER") == "true" { 306 o.flushMemAfterBuild = true 307 } 308 // sanity fix for kind deployer, not set for other deployers to avoid 309 // breaking changes... 310 if o.deployment == "kind" { 311 // always default --dump for kind, in CI use $ARTIFACTS 312 artifacts := os.Getenv("ARTIFACTS") 313 if artifacts == "" { 314 artifacts = "./_artifacts" 315 } 316 o.dump = artifacts 317 } 318 319 err := complete(o) 320 321 if boskos.HasResource() { 322 if berr := boskos.ReleaseAll("dirty"); berr != nil { 323 log.Fatalf("[Boskos] Fail To Release: %v, kubetest err: %v", berr, err) 324 } 325 } 326 327 if err != nil { 328 log.Fatalf("Something went wrong: %v", err) 329 } 330 } 331 332 func complete(o *options) error { 333 if !terminate.Stop() { 334 <-terminate.C // Drain the value if necessary. 335 } 336 if !interrupt.Stop() { 337 <-interrupt.C // Drain value 338 } 339 340 if timeout > 0 { 341 log.Printf("Limiting testing to %s", timeout) 342 interrupt.Reset(timeout) 343 } 344 345 if o.dump != "" { 346 defer writeMetadata(o.dump, o.metadataSources) 347 defer control.WriteXML(&suite, o.dump, time.Now()) 348 } 349 if o.logexporterGCSPath != "" { 350 o.testArgs += fmt.Sprintf(" --logexporter-gcs-path=%s", o.logexporterGCSPath) 351 } 352 if err := control.XMLWrap(&suite, "Prepare", func() error { return prepare(o) }); err != nil { 353 return fmt.Errorf("failed to prepare test environment: %w", err) 354 } 355 // Get the deployer before we acquire k8s so any additional flag 356 // verifications happen early. 357 var deploy deployer 358 err := control.XMLWrap(&suite, "GetDeployer", func() error { 359 d, err := getDeployer(o) 360 deploy = d 361 return err 362 }) 363 if err != nil { 364 return fmt.Errorf("error creating deployer: %w", err) 365 } 366 367 // Check soaking before run tests 368 if o.soak { 369 if created, err := deploy.GetClusterCreated(o.gcpProject); err != nil { 370 // continue, but log the error 371 log.Printf("deploy %v, GetClusterCreated failed: %v", o.deployment, err) 372 } else { 373 if time.Now().After(created.Add(o.soakDuration)) { 374 // flip up on - which will tear down previous cluster and start a new one 375 log.Printf("Previous soak cluster created at %v, will recreate the cluster", created) 376 o.up = true 377 } 378 } 379 } 380 381 if err := acquireKubernetes(o, deploy); err != nil { 382 return fmt.Errorf("failed to acquire k8s binaries: %w", err) 383 } 384 if o.extract.Enabled() { 385 // If we specified `--extract-source` we will already be in the correct directory 386 if !o.extractSource { 387 if err := os.Chdir("kubernetes"); err != nil { 388 return fmt.Errorf("failed to chdir to kubernetes dir: %w", err) 389 } 390 } 391 } 392 if err := validWorkingDirectory(); err != nil { 393 return fmt.Errorf("called from invalid working directory: %w", err) 394 } 395 396 if o.down { 397 // listen for signals such as ^C and gracefully attempt to clean up 398 c := make(chan os.Signal, 1) 399 signal.Notify(c, os.Interrupt) 400 go func() { 401 for range c { 402 log.Print("Captured ^C, gracefully attempting to cleanup resources..") 403 if err = deploy.Down(); err != nil { 404 log.Printf("Tearing down deployment failed: %v", err) 405 } 406 if err != nil { 407 os.Exit(1) 408 } 409 410 os.Exit(2) 411 } 412 }() 413 } 414 415 if err := run(deploy, *o); err != nil { 416 return err 417 } 418 419 // Publish the successfully tested version when requested 420 if o.publish != "" { 421 if err := publish(o.publish); err != nil { 422 return err 423 } 424 } 425 return nil 426 } 427 428 func acquireKubernetes(o *options, d deployer) error { 429 // Potentially build kubernetes 430 if o.build.Enabled() { 431 var err error 432 // kind deployer manages build 433 if k, ok := d.(*kind.Deployer); ok { 434 err = control.XMLWrap(&suite, "Build", k.Build) 435 } else if c, ok := d.(*aksEngineDeployer); ok { // Azure deployer 436 err = control.XMLWrap(&suite, "Build", func() error { 437 return c.Build(o.build) 438 }) 439 } else { 440 err = control.XMLWrap(&suite, "Build", o.build.Build) 441 } 442 if o.flushMemAfterBuild { 443 util.FlushMem() 444 } 445 if err != nil { 446 return err 447 } 448 } 449 450 // Potentially stage build binaries somewhere on GCS 451 if o.stage.Enabled() { 452 if err := control.XMLWrap(&suite, "Stage", func() error { 453 return o.stage.Stage(o.noAllowDup) 454 }); err != nil { 455 return err 456 } 457 } 458 459 // Potentially download existing binaries and extract them. 460 if o.extract.Enabled() { 461 err := control.XMLWrap(&suite, "Extract", func() error { 462 // Should we restore a previous state? 463 // Restore if we are not upping the cluster 464 if o.save != "" { 465 if !o.up { 466 // Restore version and .kube/config from --up 467 log.Printf("Overwriting extract strategy to load kubeconfig and version from %s", o.save) 468 o.extract = extractStrategies{ 469 extractStrategy{ 470 mode: load, 471 option: o.save, 472 }, 473 } 474 } 475 } 476 477 // New deployment, extract new version 478 return o.extract.Extract(o.gcpProject, o.gcpZone, o.gcpRegion, o.extractCIBucket, o.extractReleaseBucket, o.extractSource) 479 }) 480 if err != nil { 481 return err 482 } 483 } 484 return nil 485 } 486 487 // Returns the k8s version name 488 func findVersion() string { 489 // The version may be in a version file 490 if _, err := os.Stat("version"); err == nil { 491 b, err := os.ReadFile("version") 492 if err == nil { 493 return strings.TrimSpace(string(b)) 494 } 495 log.Printf("Failed to read version: %v", err) 496 } 497 498 // We can also get it from the git repo. 499 if _, err := os.Stat("hack/lib/version.sh"); err == nil { 500 // TODO(fejta): do this in go. At least we removed the upload-to-gcs.sh dep. 501 gross := `. hack/lib/version.sh && KUBE_ROOT=. kube::version::get_version_vars && echo "${KUBE_GIT_VERSION-}"` 502 b, err := control.Output(exec.Command("bash", "-c", gross)) 503 if err == nil { 504 return strings.TrimSpace(string(b)) 505 } 506 log.Printf("Failed to get_version_vars: %v", err) 507 } 508 509 return "unknown" // Sad trombone 510 } 511 512 // maybeMergeMetadata will add new keyvals into the map; quietly eats errors. 513 func maybeMergeJSON(meta map[string]string, path string) { 514 if data, err := os.ReadFile(path); err == nil { 515 json.Unmarshal(data, &meta) 516 } 517 } 518 519 // Write metadata.json, including version and env arg data. 520 func writeMetadata(path, metadataSources string) error { 521 m := make(map[string]string) 522 523 // Look for any sources of metadata and load 'em 524 for _, f := range strings.Split(metadataSources, ",") { 525 maybeMergeJSON(m, filepath.Join(path, f)) 526 } 527 528 ver := findVersion() 529 m["job-version"] = ver // TODO(krzyzacy): retire 530 m["revision"] = ver 531 m["kubetest-version"] = gitTag 532 re := regexp.MustCompile(`^BUILD_METADATA_(.+)$`) 533 for _, e := range os.Environ() { 534 p := strings.SplitN(e, "=", 2) 535 r := re.FindStringSubmatch(p[0]) 536 if r == nil { 537 continue 538 } 539 k, v := strings.ToLower(r[1]), p[1] 540 m[k] = v 541 } 542 f, err := os.Create(filepath.Join(path, "metadata.json")) 543 if err != nil { 544 return err 545 } 546 defer f.Close() 547 e := json.NewEncoder(f) 548 return e.Encode(m) 549 } 550 551 // Install cloudsdk tarball to location, updating PATH 552 func installGcloud(tarball string, location string) error { 553 554 if err := os.MkdirAll(location, 0775); err != nil { 555 return err 556 } 557 558 if err := control.FinishRunning(exec.Command("tar", "xzf", tarball, "-C", location)); err != nil { 559 return err 560 } 561 562 if err := control.FinishRunning(exec.Command(filepath.Join(location, "google-cloud-sdk", "install.sh"), "--disable-installation-options", "--bash-completion=false", "--path-update=false", "--usage-reporting=false")); err != nil { 563 return err 564 } 565 566 if err := util.InsertPath(filepath.Join(location, "google-cloud-sdk", "bin")); err != nil { 567 return err 568 } 569 570 if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "alpha")); err != nil { 571 return err 572 } 573 574 if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "beta")); err != nil { 575 return err 576 } 577 578 if err := control.FinishRunning(exec.Command("gcloud", "info")); err != nil { 579 return err 580 } 581 return nil 582 } 583 584 func migrateGcpEnvAndOptions(o *options) error { 585 var network string 586 var zone string 587 switch o.provider { 588 case "gke": 589 network = "KUBE_GKE_NETWORK" 590 zone = "ZONE" 591 default: 592 network = "KUBE_GCE_NETWORK" 593 zone = "KUBE_GCE_ZONE" 594 } 595 return util.MigrateOptions([]util.MigratedOption{ 596 { 597 Env: "PROJECT", 598 Option: &o.gcpProject, 599 Name: "--gcp-project", 600 }, 601 { 602 Env: zone, 603 Option: &o.gcpZone, 604 Name: "--gcp-zone", 605 }, 606 { 607 Env: "REGION", 608 Option: &o.gcpRegion, 609 Name: "--gcp-region", 610 }, 611 { 612 Env: "GOOGLE_APPLICATION_CREDENTIALS", 613 Option: &o.gcpServiceAccount, 614 Name: "--gcp-service-account", 615 }, 616 { 617 Env: network, 618 Option: &o.gcpNetwork, 619 Name: "--gcp-network", 620 }, 621 { 622 Env: "KUBE_NODE_OS_DISTRIBUTION", 623 Option: &o.gcpNodeImage, 624 Name: "--gcp-node-image", 625 }, 626 { 627 Env: "KUBE_MASTER_OS_DISTRIBUTION", 628 Option: &o.gcpMasterImage, 629 Name: "--gcp-master-image", 630 }, 631 { 632 Env: "NUM_NODES", 633 Option: &o.gcpNodes, 634 Name: "--gcp-nodes", 635 }, 636 { 637 Env: "NODE_SIZE", 638 Option: &o.gcpNodeSize, 639 Name: "--gcp-node-size", 640 }, 641 { 642 Env: "MASTER_SIZE", 643 Option: &o.gcpMasterSize, 644 Name: "--gcp-master-size", 645 }, 646 { 647 Env: "CLOUDSDK_BUCKET", 648 Option: &o.gcpCloudSdk, 649 Name: "--gcp-cloud-sdk", 650 SkipPush: true, 651 }, 652 }) 653 } 654 655 func prepareGcp(o *options) error { 656 if err := migrateGcpEnvAndOptions(o); err != nil { 657 return err 658 } 659 // Must happen before any gcloud commands 660 if err := activateServiceAccount(o.gcpServiceAccount); err != nil { 661 return err 662 } 663 664 if o.provider == "gce" { 665 if distro := os.Getenv("KUBE_OS_DISTRIBUTION"); distro != "" { 666 log.Printf("Please use --gcp-master-image=%s --gcp-node-image=%s (instead of deprecated KUBE_OS_DISTRIBUTION)", 667 distro, distro) 668 // Note: KUBE_OS_DISTRIBUTION takes precedence over 669 // KUBE_{MASTER,NODE}_OS_DISTRIBUTION, so override here 670 // after the migration above. 671 o.gcpNodeImage = distro 672 o.gcpMasterImage = distro 673 if err := os.Setenv("KUBE_NODE_OS_DISTRIBUTION", distro); err != nil { 674 return fmt.Errorf("could not set KUBE_NODE_OS_DISTRIBUTION=%s: %w", distro, err) 675 } 676 if err := os.Setenv("KUBE_MASTER_OS_DISTRIBUTION", distro); err != nil { 677 return fmt.Errorf("could not set KUBE_MASTER_OS_DISTRIBUTION=%s: %w", distro, err) 678 } 679 } 680 681 hasGCPImageFamily, hasGCPImageProject := len(o.gcpImageFamily) != 0, len(o.gcpImageProject) != 0 682 if hasGCPImageFamily != hasGCPImageProject { 683 return fmt.Errorf("--image-family and --image-project must be both set or unset") 684 } 685 if hasGCPImageFamily && hasGCPImageProject { 686 out, err := control.Output(exec.Command("gcloud", "compute", "images", "describe-from-family", o.gcpImageFamily, "--project", o.gcpImageProject)) 687 if err != nil { 688 return fmt.Errorf("failed to get latest image from family %q in project %q: %s", o.gcpImageFamily, o.gcpImageProject, err) 689 } 690 latestImage := "" 691 latestImageRegexp := regexp.MustCompile(`^name: *(\S+)`) 692 for _, line := range strings.Split(string(out), "\n") { 693 matches := latestImageRegexp.FindStringSubmatch(line) 694 if len(matches) == 2 { 695 latestImage = matches[1] 696 break 697 } 698 } 699 if len(latestImage) == 0 { 700 return fmt.Errorf("failed to get latest image from family %q in project %q", o.gcpImageFamily, o.gcpImageProject) 701 } 702 if o.deployment == "node" { 703 o.nodeArgs += fmt.Sprintf(" --images=%s --image-project=%s", latestImage, o.gcpImageProject) 704 } else { 705 os.Setenv("KUBE_GCE_NODE_IMAGE", latestImage) 706 os.Setenv("KUBE_GCE_NODE_PROJECT", o.gcpImageProject) 707 } 708 } 709 } else if o.provider == "gke" { 710 if o.deployment == "" { 711 o.deployment = "gke" 712 } 713 if o.deployment != "gke" { 714 return fmt.Errorf("expected --deployment=gke for --provider=gke, found --deployment=%s", o.deployment) 715 } 716 if o.gcpMasterImage != "" { 717 return fmt.Errorf("expected --gcp-master-image to be empty for --provider=gke, found --gcp-master-image=%s", o.gcpMasterImage) 718 } 719 if o.gcpNodes != "" { 720 return fmt.Errorf("--gcp-nodes cannot be set on GKE, use --gke-shape instead") 721 } 722 if o.gcpNodeSize != "" { 723 return fmt.Errorf("--gcp-node-size cannot be set on GKE, use --gke-shape instead") 724 } 725 if o.gcpMasterSize != "" { 726 return fmt.Errorf("--gcp-master-size cannot be set on GKE, where it's auto-computed") 727 } 728 729 // TODO(kubernetes/test-infra#3536): This is used by the 730 // ginkgo-e2e.sh wrapper. 731 nod := o.gcpNodeImage 732 if nod == "container_vm" { 733 // gcloud container clusters create understands 734 // "container_vm", e2es understand "debian". 735 nod = "debian" 736 } 737 if nod == "cos_containerd" { 738 // gcloud container clusters create understands 739 // "cos_containerd", e2es only understand 740 // "gci"/"cos", 741 nod = "gci" 742 } 743 os.Setenv("NODE_OS_DISTRIBUTION", nod) 744 } 745 if o.gcpProject == "" { 746 log.Print("--gcp-project is missing, trying to fetch a project from boskos.\n" + 747 "(for local runs please set --gcp-project to your dev project)") 748 749 var resType string 750 if o.gcpProjectType != "" { 751 resType = o.gcpProjectType 752 } else if o.provider == "gke" { 753 resType = "gke-project" 754 } else { 755 resType = "gce-project" 756 } 757 758 log.Printf("provider %v, will acquire project type %v from boskos", o.provider, resType) 759 760 // let's retry 5min to get next available resource 761 ctx, cancel := context.WithTimeout(context.Background(), o.boskosWaitDuration) 762 defer cancel() 763 p, err := boskos.AcquireWait(ctx, resType, "free", "busy") 764 if err != nil { 765 return fmt.Errorf("--provider=%s boskos failed to acquire project: %w", o.provider, err) 766 } 767 768 if p == nil { 769 return fmt.Errorf("boskos does not have a free %s at the moment", resType) 770 } 771 772 go func(c *client.Client, proj string) { 773 for range time.Tick(time.Minute * 5) { 774 if err := c.UpdateOne(p.Name, "busy", nil); err != nil { 775 log.Printf("[Boskos] Update of %s failed with %v", p.Name, err) 776 } 777 } 778 }(boskos, p.Name) 779 o.gcpProject = p.Name 780 } 781 782 if err := os.Setenv("CLOUDSDK_CORE_PRINT_UNHANDLED_TRACEBACKS", "1"); err != nil { 783 return fmt.Errorf("could not set CLOUDSDK_CORE_PRINT_UNHANDLED_TRACEBACKS=1: %w", err) 784 } 785 786 if err := control.FinishRunning(exec.Command("gcloud", "config", "set", "project", o.gcpProject)); err != nil { 787 return fmt.Errorf("fail to set project %s : err %w", o.gcpProject, err) 788 } 789 790 // TODO(krzyzacy):Remove this when we retire migrateGcpEnvAndOptions 791 // Note that a lot of scripts are still depend on this env in k/k repo. 792 if err := os.Setenv("PROJECT", o.gcpProject); err != nil { 793 return fmt.Errorf("fail to set env var PROJECT %s : err %w", o.gcpProject, err) 794 } 795 796 // Ensure ssh keys exist 797 log.Print("Checking existing of GCP ssh keys...") 798 k := filepath.Join(util.Home(".ssh"), "google_compute_engine") 799 if _, err := os.Stat(k); err != nil { 800 return err 801 } 802 pk := k + ".pub" 803 if _, err := os.Stat(pk); err != nil { 804 return err 805 } 806 807 log.Printf("Checking presence of public key in %s", o.gcpProject) 808 if out, err := control.Output(exec.Command("gcloud", "compute", "--project="+o.gcpProject, "project-info", "describe")); err != nil { 809 return err 810 } else if b, err := os.ReadFile(pk); err != nil { 811 return err 812 } else if !strings.Contains(string(out), string(b)) { 813 log.Print("Uploading public ssh key to project metadata...") 814 if err = control.FinishRunning(exec.Command("gcloud", "compute", "--project="+o.gcpProject, "config-ssh")); err != nil { 815 return err 816 } 817 } 818 819 // Install custom gcloud version if necessary 820 if o.gcpCloudSdk != "" { 821 for i := 0; i < 3; i++ { 822 if err := control.FinishRunning(exec.Command("gsutil", "-mq", "cp", "-r", o.gcpCloudSdk, util.Home())); err == nil { 823 break // Success! 824 } 825 time.Sleep(1 << uint(i) * time.Second) 826 } 827 for _, f := range []string{util.Home(".gsutil"), util.Home("repo"), util.Home("cloudsdk")} { 828 if _, err := os.Stat(f); err == nil || !os.IsNotExist(err) { 829 if err = os.RemoveAll(f); err != nil { 830 return err 831 } 832 } 833 } 834 835 install := util.Home("repo", "google-cloud-sdk.tar.gz") 836 if strings.HasSuffix(o.gcpCloudSdk, ".tar.gz") { 837 install = util.Home(filepath.Base(o.gcpCloudSdk)) 838 } else { 839 if err := os.Rename(util.Home(filepath.Base(o.gcpCloudSdk)), util.Home("repo")); err != nil { 840 return err 841 } 842 843 // Controls which gcloud components to install. 844 pop, err := util.PushEnv("CLOUDSDK_COMPONENT_MANAGER_SNAPSHOT_URL", "file://"+util.Home("repo", "components-2.json")) 845 if err != nil { 846 return err 847 } 848 defer pop() 849 } 850 851 if err := installGcloud(install, util.Home("cloudsdk")); err != nil { 852 return err 853 } 854 // gcloud creds may have changed 855 if err := activateServiceAccount(o.gcpServiceAccount); err != nil { 856 return err 857 } 858 } 859 860 if o.kubemark { 861 if p := os.Getenv("KUBEMARK_BAZEL_BUILD"); strings.ToLower(p) == "y" { 862 // we need docker-credential-gcr to get authed properly 863 // https://github.com/bazelbuild/rules_docker#authorization 864 if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "docker-credential-gcr")); err != nil { 865 return err 866 } 867 if err := control.FinishRunning(exec.Command("docker-credential-gcr", "configure-docker")); err != nil { 868 return err 869 } 870 } 871 } 872 873 return nil 874 } 875 876 func prepareAws(o *options) error { 877 // gcloud creds may have changed 878 if err := activateServiceAccount(o.gcpServiceAccount); err != nil { 879 return err 880 } 881 return control.FinishRunning(exec.Command("pip", "install", "awscli")) 882 } 883 884 // Activate GOOGLE_APPLICATION_CREDENTIALS if set or do nothing. 885 func activateServiceAccount(path string) error { 886 if path == "" { 887 return nil 888 } 889 return control.FinishRunning(exec.Command("gcloud", "auth", "activate-service-account", "--key-file="+path)) 890 } 891 892 func prepare(o *options) error { 893 if err := util.MigrateOptions([]util.MigratedOption{ 894 { 895 Env: "KUBERNETES_PROVIDER", 896 Option: &o.provider, 897 Name: "--provider", 898 }, 899 { 900 Env: "CLUSTER_NAME", 901 Option: &o.cluster, 902 Name: "--cluster", 903 }, 904 }); err != nil { 905 return err 906 } 907 if err := prepareGinkgoParallel(&o.ginkgoParallel); err != nil { 908 return err 909 } 910 911 switch o.provider { 912 case "gce", "gke", "node": 913 if err := prepareGcp(o); err != nil { 914 return err 915 } 916 case "aws": 917 if err := prepareAws(o); err != nil { 918 return err 919 } 920 } 921 922 if o.kubemark { 923 if err := util.MigrateOptions([]util.MigratedOption{ 924 { 925 Env: "KUBEMARK_NUM_NODES", 926 Option: &o.kubemarkNodes, 927 Name: "--kubemark-nodes", 928 }, 929 { 930 Env: "KUBEMARK_MASTER_SIZE", 931 Option: &o.kubemarkMasterSize, 932 Name: "--kubemark-master-size", 933 }, 934 }); err != nil { 935 return err 936 } 937 } 938 939 if err := os.MkdirAll(artifacts, 0777); err != nil { // Create artifacts 940 return err 941 } 942 943 return nil 944 } 945 946 type ginkgoParallelValue struct { 947 v int // 0 == not set (defaults to 1) 948 } 949 950 func (v *ginkgoParallelValue) IsBoolFlag() bool { 951 return true 952 } 953 954 func (v *ginkgoParallelValue) String() string { 955 if v.v == 0 { 956 return "1" 957 } 958 return strconv.Itoa(v.v) 959 } 960 961 func (v *ginkgoParallelValue) Set(s string) error { 962 if s == "" { 963 v.v = 0 964 return nil 965 } 966 if s == "true" { 967 v.v = defaultGinkgoParallel 968 return nil 969 } 970 p, err := strconv.Atoi(s) 971 if err != nil { 972 return fmt.Errorf("--ginkgo-parallel must be an integer, found %q", s) 973 } 974 if p < 1 { 975 return fmt.Errorf("--ginkgo-parallel must be >= 1, found %d", p) 976 } 977 v.v = p 978 return nil 979 } 980 981 func (v *ginkgoParallelValue) Type() string { 982 return "ginkgoParallelValue" 983 } 984 985 func (v *ginkgoParallelValue) Get() int { 986 if v.v == 0 { 987 return 1 988 } 989 return v.v 990 } 991 992 var _ flag.Value = &ginkgoParallelValue{} 993 994 // Hand migrate this option. GINKGO_PARALLEL => GINKGO_PARALLEL_NODES=25 995 func prepareGinkgoParallel(v *ginkgoParallelValue) error { 996 if p := os.Getenv("GINKGO_PARALLEL"); strings.ToLower(p) == "y" { 997 log.Printf("Please use kubetest --ginkgo-parallel (instead of deprecated GINKGO_PARALLEL=y)") 998 if err := v.Set("true"); err != nil { 999 return err 1000 } 1001 os.Unsetenv("GINKGO_PARALLEL") 1002 } 1003 if p := os.Getenv("GINKGO_PARALLEL_NODES"); p != "" { 1004 log.Printf("Please use kubetest --ginkgo-parallel=%s (instead of deprecated GINKGO_PARALLEL_NODES=%s)", p, p) 1005 if err := v.Set(p); err != nil { 1006 return err 1007 } 1008 } 1009 os.Setenv("GINKGO_PARALLEL_NODES", v.String()) 1010 return nil 1011 } 1012 1013 func publish(pub string) error { 1014 v, err := os.ReadFile("version") 1015 if err != nil { 1016 return err 1017 } 1018 log.Printf("Set %s version to %s", pub, string(v)) 1019 return gcsWrite(pub, v) 1020 }