github.com/jenkins-x/test-infra@v0.0.7/kubetest/main.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"encoding/json"
    21  	"errors"
    22  	"flag"
    23  	"fmt"
    24  	"io/ioutil"
    25  	"log"
    26  	"math/rand"
    27  	"os"
    28  	"os/exec"
    29  	"os/signal"
    30  	"path/filepath"
    31  	"regexp"
    32  	"strconv"
    33  	"strings"
    34  	"time"
    35  
    36  	"github.com/spf13/pflag"
    37  
    38  	"k8s.io/test-infra/boskos/client"
    39  	"k8s.io/test-infra/kubetest/conformance"
    40  	"k8s.io/test-infra/kubetest/eks"
    41  	"k8s.io/test-infra/kubetest/kubeadmdind"
    42  	"k8s.io/test-infra/kubetest/process"
    43  	"k8s.io/test-infra/kubetest/util"
    44  )
    45  
    46  // Hardcoded in ginkgo-e2e.sh
    47  const defaultGinkgoParallel = 25
    48  
    49  var (
    50  	artifacts = filepath.Join(os.Getenv("WORKSPACE"), "_artifacts")
    51  	interrupt = time.NewTimer(time.Duration(0)) // interrupt testing at this time.
    52  	terminate = time.NewTimer(time.Duration(0)) // terminate testing at this time.
    53  	verbose   = false
    54  	timeout   = time.Duration(0)
    55  	boskos    = client.NewClient(os.Getenv("JOB_NAME"), "http://boskos.test-pods.svc.cluster.local.")
    56  	control   = process.NewControl(timeout, interrupt, terminate, verbose)
    57  )
    58  
    59  type options struct {
    60  	build               buildStrategy
    61  	buildFederation     buildFederationStrategy
    62  	charts              bool
    63  	checkLeaks          bool
    64  	checkSkew           bool
    65  	cluster             string
    66  	clusterIPRange      string
    67  	deployment          string
    68  	down                bool
    69  	dump                string
    70  	dumpPreTestLogs     string
    71  	extract             extractStrategies
    72  	extractFederation   extractFederationStrategies
    73  	extractSource       bool
    74  	federation          bool
    75  	flushMemAfterBuild  bool
    76  	focusRegex          string
    77  	gcpCloudSdk         string
    78  	gcpMasterImage      string
    79  	gcpMasterSize       string
    80  	gcpNetwork          string
    81  	gcpNodeImage        string
    82  	gcpImageFamily      string
    83  	gcpImageProject     string
    84  	gcpNodes            string
    85  	gcpNodeSize         string
    86  	gcpProject          string
    87  	gcpProjectType      string
    88  	gcpServiceAccount   string
    89  	gcpRegion           string
    90  	gcpZone             string
    91  	ginkgoParallel      ginkgoParallelValue
    92  	kubecfg             string
    93  	kubemark            bool
    94  	kubemarkMasterSize  string
    95  	kubemarkNodes       string // TODO(fejta): switch to int after migration
    96  	logexporterGCSPath  string
    97  	metadataSources     string
    98  	multiClusters       multiClusterDeployment
    99  	multipleFederations bool
   100  	noAllowDup          bool
   101  	nodeArgs            string
   102  	nodeTestArgs        string
   103  	nodeTests           bool
   104  	provider            string
   105  	publish             string
   106  	runtimeConfig       string
   107  	save                string
   108  	skew                bool
   109  	skipRegex           string
   110  	soak                bool
   111  	soakDuration        time.Duration
   112  	sshUser             string
   113  	stage               stageStrategy
   114  	stageFederation     stageFederationStrategy
   115  	test                bool
   116  	testArgs            string
   117  	testCmd             string
   118  	testCmdName         string
   119  	testCmdArgs         []string
   120  	up                  bool
   121  	upgradeArgs         string
   122  }
   123  
   124  func defineFlags() *options {
   125  	o := options{}
   126  	flag.Var(&o.build, "build", "Rebuild k8s binaries, optionally forcing (release|quick|bazel) strategy")
   127  	flag.Var(&o.buildFederation, "build-federation", "Rebuild federation binaries, optionally forcing (release|quick|bazel) strategy")
   128  	flag.BoolVar(&o.charts, "charts", false, "If true, run charts tests")
   129  	flag.BoolVar(&o.checkSkew, "check-version-skew", true, "Verify client and server versions match")
   130  	flag.BoolVar(&o.checkLeaks, "check-leaked-resources", false, "Ensure project ends with the same resources")
   131  	flag.StringVar(&o.cluster, "cluster", "", "Cluster name. Must be set for --deployment=gke (TODO: other deployments).")
   132  	flag.StringVar(&o.clusterIPRange, "cluster-ip-range", "", "Specifies CLUSTER_IP_RANGE value during --up and --test (only relevant for --deployment=bash). Auto-calculated if empty.")
   133  	flag.StringVar(&o.deployment, "deployment", "bash", "Choices: none/bash/conformance/gke/eks/kops/kubernetes-anywhere/node/local")
   134  	flag.BoolVar(&o.down, "down", false, "If true, tear down the cluster before exiting.")
   135  	flag.StringVar(&o.dump, "dump", "", "If set, dump bring-up and cluster logs to this location on test or cluster-up failure")
   136  	flag.StringVar(&o.dumpPreTestLogs, "dump-pre-test-logs", "", "If set, dump cluster logs to this location before running tests")
   137  	flag.Var(&o.extract, "extract", "Extract k8s binaries from the specified release location")
   138  	flag.Var(&o.extractFederation, "extract-federation", "Extract federation binaries from the specified release location")
   139  	flag.BoolVar(&o.extractSource, "extract-source", false, "Extract k8s src together with other tarballs")
   140  	flag.BoolVar(&o.federation, "federation", false, "If true, start/tear down the federation control plane along with the clusters. To only start/tear down the federation control plane, specify --deployment=none")
   141  	flag.BoolVar(&o.flushMemAfterBuild, "flush-mem-after-build", false, "If true, try to flush container memory after building")
   142  	flag.Var(&o.ginkgoParallel, "ginkgo-parallel", fmt.Sprintf("Run Ginkgo tests in parallel, default %d runners. Use --ginkgo-parallel=N to specify an exact count.", defaultGinkgoParallel))
   143  	flag.StringVar(&o.gcpCloudSdk, "gcp-cloud-sdk", "", "Install/upgrade google-cloud-sdk to the gs:// path if set")
   144  	flag.StringVar(&o.gcpProject, "gcp-project", "", "For use with gcloud commands")
   145  	flag.StringVar(&o.gcpProjectType, "gcp-project-type", "", "Explicitly indicate which project type to select from boskos")
   146  	flag.StringVar(&o.gcpServiceAccount, "gcp-service-account", "", "Service account to activate before using gcloud")
   147  	flag.StringVar(&o.gcpZone, "gcp-zone", "", "For use with gcloud commands")
   148  	flag.StringVar(&o.gcpRegion, "gcp-region", "", "For use with gcloud commands")
   149  	flag.StringVar(&o.gcpNetwork, "gcp-network", "", "Cluster network. Must be set for --deployment=gke (TODO: other deployments).")
   150  	flag.StringVar(&o.gcpMasterImage, "gcp-master-image", "", "Master image type (cos|debian on GCE, n/a on GKE)")
   151  	flag.StringVar(&o.gcpMasterSize, "gcp-master-size", "", "(--provider=gce only) Size of master to create (e.g n1-standard-1). Auto-calculated if left empty.")
   152  	flag.StringVar(&o.gcpNodeImage, "gcp-node-image", "", "Node image type (cos|container_vm on GKE, cos|debian on GCE)")
   153  	flag.StringVar(&o.gcpImageFamily, "image-family", "", "Node image family from which to use the latest image, required when --gcp-node-image=CUSTOM")
   154  	flag.StringVar(&o.gcpImageProject, "image-project", "", "Project containing node image family, required when --gcp-node-image=CUSTOM")
   155  	flag.StringVar(&o.gcpNodes, "gcp-nodes", "", "(--provider=gce only) Number of nodes to create.")
   156  	flag.StringVar(&o.gcpNodeSize, "gcp-node-size", "", "(--provider=gce only) Size of nodes to create (e.g n1-standard-1).")
   157  	flag.StringVar(&o.kubecfg, "kubeconfig", "", "The location of a kubeconfig file.")
   158  	flag.StringVar(&o.focusRegex, "ginkgo-focus", "", "The ginkgo regex to focus. Currently only respected for (dind).")
   159  	flag.StringVar(&o.skipRegex, "ginkgo-skip", "", "The ginkgo regex to skip. Currently only respected for (dind).")
   160  	flag.BoolVar(&o.kubemark, "kubemark", false, "If true, run kubemark tests.")
   161  	flag.StringVar(&o.kubemarkMasterSize, "kubemark-master-size", "", "Kubemark master size (only relevant if --kubemark=true). Auto-calculated based on '--kubemark-nodes' if left empty.")
   162  	flag.StringVar(&o.kubemarkNodes, "kubemark-nodes", "5", "Number of kubemark nodes to start (only relevant if --kubemark=true).")
   163  	flag.StringVar(&o.logexporterGCSPath, "logexporter-gcs-path", "", "Path to the GCS artifacts directory to dump logs from nodes. Logexporter gets enabled if this is non-empty")
   164  	flag.StringVar(&o.metadataSources, "metadata-sources", "images.json", "Comma-separated list of files inside ./artifacts to merge into metadata.json")
   165  	flag.Var(&o.multiClusters, "multi-clusters", "If set, bring up/down multiple clusters specified. Format is [Zone1:]Cluster1[,[ZoneN:]ClusterN]]*. Zone is optional and default zone is used if zone is not specified")
   166  	flag.BoolVar(&o.multipleFederations, "multiple-federations", false, "If true, enable running multiple federation control planes in parallel")
   167  	flag.StringVar(&o.nodeArgs, "node-args", "", "Args for node e2e tests.")
   168  	flag.StringVar(&o.nodeTestArgs, "node-test-args", "", "Test args specifically for node e2e tests.")
   169  	flag.BoolVar(&o.noAllowDup, "no-allow-dup", false, "if set --allow-dup will not be passed to push-build and --stage will error if the build already exists on the gcs path")
   170  	flag.BoolVar(&o.nodeTests, "node-tests", false, "If true, run node-e2e tests.")
   171  	flag.StringVar(&o.provider, "provider", "", "Kubernetes provider such as gce, gke, aws, eks, etc")
   172  	flag.StringVar(&o.publish, "publish", "", "Publish version to the specified gs:// path on success")
   173  	flag.StringVar(&o.runtimeConfig, "runtime-config", "batch/v2alpha1=true", "If set, API versions can be turned on or off while bringing up the API server.")
   174  	flag.StringVar(&o.stage.dockerRegistry, "registry", "", "Push images to the specified docker registry (e.g. gcr.io/a-test-project)")
   175  	flag.StringVar(&o.save, "save", "", "Save credentials to gs:// path on --up if set (or load from there if not --up)")
   176  	flag.BoolVar(&o.skew, "skew", false, "If true, run tests in another version at ../kubernetes/hack/e2e.go")
   177  	flag.BoolVar(&o.soak, "soak", false, "If true, job runs in soak mode")
   178  	flag.DurationVar(&o.soakDuration, "soak-duration", 7*24*time.Hour, "Maximum age of a soak cluster before it gets recycled")
   179  	flag.Var(&o.stage, "stage", "Upload binaries to gs://bucket/devel/job-suffix if set")
   180  	flag.Var(&o.stageFederation, "stage-federation", "Upload federation binaries to gs://bucket/devel/job-suffix if set")
   181  	flag.StringVar(&o.stage.versionSuffix, "stage-suffix", "", "Append suffix to staged version when set")
   182  	flag.BoolVar(&o.test, "test", false, "Run Ginkgo tests.")
   183  	flag.StringVar(&o.testArgs, "test_args", "", "Space-separated list of arguments to pass to Ginkgo test runner.")
   184  	flag.StringVar(&o.testCmd, "test-cmd", "", "command to run against the cluster instead of Ginkgo e2e tests")
   185  	flag.StringVar(&o.testCmdName, "test-cmd-name", "", "name to log the test command as in xml results")
   186  	flag.DurationVar(&timeout, "timeout", time.Duration(0), "Terminate testing after the timeout duration (s/m/h)")
   187  	flag.BoolVar(&o.up, "up", false, "If true, start the e2e cluster. If cluster is already up, recreate it.")
   188  	flag.StringVar(&o.upgradeArgs, "upgrade_args", "", "If set, run upgrade tests before other tests")
   189  
   190  	// The "-v" flag was also used by glog, which is used by k8s.io/client-go. Duplicate flags cause panics.
   191  	// 1. Even if we could convince glog to change, they have too many consumers to ever do so.
   192  	// 2. The glog lib parses flags during init. It is impossible to dynamically rewrite the args before they're parsed by glog.
   193  	// 3. The glog lib takes an int value, so "-v false" is an error.
   194  	// 4. It's possible, but unlikely, we could convince k8s.io/client-go to use a logging shim, because a library shouldn't force a logging implementation. This would take a major version release for the lib.
   195  	//
   196  	// The most reasonable solution is to accept that we shouldn't have made a single-letter global, and rename all references to this variable.
   197  	flag.BoolVar(&verbose, "verbose-commands", true, "If true, print all command output.")
   198  
   199  	// go flag does not support StringArrayVar
   200  	pflag.StringArrayVar(&o.testCmdArgs, "test-cmd-args", []string{}, "args for test-cmd")
   201  	return &o
   202  }
   203  
   204  var suite util.TestSuite
   205  
   206  func validWorkingDirectory() error {
   207  	cwd, err := os.Getwd()
   208  	if err != nil {
   209  		return fmt.Errorf("could not get pwd: %v", err)
   210  	}
   211  	acwd, err := filepath.Abs(cwd)
   212  	if err != nil {
   213  		return fmt.Errorf("failed to convert %s to an absolute path: %v", cwd, err)
   214  	}
   215  	// This also matches "kubernetes_skew" for upgrades.
   216  	if !strings.Contains(filepath.Base(acwd), "kubernetes") {
   217  		return fmt.Errorf("must run from kubernetes directory root: %v", acwd)
   218  	}
   219  	return nil
   220  }
   221  
   222  type deployer interface {
   223  	Up() error
   224  	IsUp() error
   225  	DumpClusterLogs(localPath, gcsPath string) error
   226  	TestSetup() error
   227  	Down() error
   228  	GetClusterCreated(gcpProject string) (time.Time, error)
   229  	KubectlCommand() (*exec.Cmd, error)
   230  }
   231  
   232  // publisher is implemented by deployers that want to publish status on success
   233  type publisher interface {
   234  	// Publish is called when the tests were successful; the deployer should publish a success file
   235  	Publish() error
   236  }
   237  
   238  func getDeployer(o *options) (deployer, error) {
   239  	switch o.deployment {
   240  	case "bash":
   241  		return newBash(&o.clusterIPRange), nil
   242  	case "conformance":
   243  		return conformance.NewDeployer(o.kubecfg)
   244  	case "gke":
   245  		return newGKE(o.provider, o.gcpProject, o.gcpZone, o.gcpRegion, o.gcpNetwork, o.gcpNodeImage, o.gcpImageFamily, o.gcpImageProject, o.cluster, &o.testArgs, &o.upgradeArgs)
   246  	case "eks":
   247  		return eks.NewDeployer(timeout, verbose)
   248  	case "kops":
   249  		return newKops(o.provider, o.gcpProject, o.cluster)
   250  	case "kubeadm-dind":
   251  		return kubeadmdind.NewDeployer(control)
   252  	case "kubernetes-anywhere":
   253  		if o.multiClusters.Enabled() {
   254  			return newKubernetesAnywhereMultiCluster(o.gcpProject, o.gcpZone, o.multiClusters)
   255  		}
   256  		return newKubernetesAnywhere(o.gcpProject, o.gcpZone)
   257  	case "node":
   258  		return nodeDeploy{}, nil
   259  	case "none":
   260  		return noneDeploy{}, nil
   261  	case "local":
   262  		return newLocalCluster(), nil
   263  	case "acsengine":
   264  		return newAcsEngine()
   265  	default:
   266  		return nil, fmt.Errorf("unknown deployment strategy %q", o.deployment)
   267  	}
   268  }
   269  
   270  func validateFlags(o *options) error {
   271  	if o.multiClusters.Enabled() && o.deployment != "kubernetes-anywhere" {
   272  		return errors.New("--multi-clusters flag cannot be passed with deployments other than 'kubernetes-anywhere'")
   273  	}
   274  	if !o.extract.Enabled() && o.extractSource {
   275  		return errors.New("--extract-source flag cannot be passed without --extract")
   276  	}
   277  	return nil
   278  }
   279  
   280  func main() {
   281  	log.SetFlags(log.LstdFlags | log.Lshortfile)
   282  
   283  	// Initialize global pseudo random generator. Initializing it to select random AWS Zones.
   284  	rand.Seed(time.Now().UnixNano())
   285  
   286  	pflag.CommandLine = pflag.NewFlagSet(os.Args[0], pflag.ContinueOnError)
   287  	o := defineFlags()
   288  	pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
   289  	if err := pflag.CommandLine.Parse(os.Args[1:]); err != nil {
   290  		log.Fatalf("Flag parse failed: %v", err)
   291  	}
   292  
   293  	if err := validateFlags(o); err != nil {
   294  		log.Fatalf("Flags validation failed. err: %v", err)
   295  	}
   296  
   297  	control = process.NewControl(timeout, interrupt, terminate, verbose)
   298  
   299  	// do things when we know we are running in the kubetest image
   300  	if os.Getenv("KUBETEST_IN_DOCKER") == "true" {
   301  		o.flushMemAfterBuild = true
   302  	}
   303  
   304  	err := complete(o)
   305  
   306  	if boskos.HasResource() {
   307  		if berr := boskos.ReleaseAll("dirty"); berr != nil {
   308  			log.Fatalf("[Boskos] Fail To Release: %v, kubetest err: %v", berr, err)
   309  		}
   310  	}
   311  
   312  	if err != nil {
   313  		log.Fatalf("Something went wrong: %v", err)
   314  	}
   315  }
   316  
   317  func complete(o *options) error {
   318  	if !terminate.Stop() {
   319  		<-terminate.C // Drain the value if necessary.
   320  	}
   321  	if !interrupt.Stop() {
   322  		<-interrupt.C // Drain value
   323  	}
   324  
   325  	if timeout > 0 {
   326  		log.Printf("Limiting testing to %s", timeout)
   327  		interrupt.Reset(timeout)
   328  	}
   329  
   330  	if o.dump != "" {
   331  		defer writeMetadata(o.dump, o.metadataSources)
   332  		defer control.WriteXML(&suite, o.dump, time.Now())
   333  	}
   334  	if o.logexporterGCSPath != "" {
   335  		o.testArgs += fmt.Sprintf(" --logexporter-gcs-path=%s", o.logexporterGCSPath)
   336  	}
   337  	if err := prepare(o); err != nil {
   338  		return fmt.Errorf("failed to prepare test environment: %v", err)
   339  	}
   340  	if err := prepareFederation(o); err != nil {
   341  		return fmt.Errorf("failed to prepare federation test environment: %v", err)
   342  	}
   343  	// Get the deployer before we acquire k8s so any additional flag
   344  	// verifications happen early.
   345  	deploy, err := getDeployer(o)
   346  	if err != nil {
   347  		return fmt.Errorf("error creating deployer: %v", err)
   348  	}
   349  
   350  	// Check soaking before run tests
   351  	if o.soak {
   352  		if created, err := deploy.GetClusterCreated(o.gcpProject); err != nil {
   353  			// continue, but log the error
   354  			log.Printf("deploy %v, GetClusterCreated failed: %v", o.deployment, err)
   355  		} else {
   356  			if time.Now().After(created.Add(o.soakDuration)) {
   357  				// flip up on - which will tear down previous cluster and start a new one
   358  				log.Printf("Previous soak cluster created at %v, will recreate the cluster", created)
   359  				o.up = true
   360  			}
   361  		}
   362  	}
   363  
   364  	if err := acquireKubernetes(o); err != nil {
   365  		return fmt.Errorf("failed to acquire k8s binaries: %v", err)
   366  	}
   367  	if err := acquireFederation(o); err != nil {
   368  		return fmt.Errorf("failed to acquire federation binaries: %v", err)
   369  	}
   370  	if o.extract.Enabled() {
   371  		// If we specified `--extract-source` we will already be in the correct directory
   372  		if !o.extractSource {
   373  			if err := os.Chdir("kubernetes"); err != nil {
   374  				return fmt.Errorf("failed to chdir to kubernetes dir: %v", err)
   375  			}
   376  		}
   377  	}
   378  	if err := validWorkingDirectory(); err != nil {
   379  		return fmt.Errorf("called from invalid working directory: %v", err)
   380  	}
   381  
   382  	if o.down {
   383  		// listen for signals such as ^C and gracefully attempt to clean up
   384  		c := make(chan os.Signal, 1)
   385  		signal.Notify(c, os.Interrupt)
   386  		go func() {
   387  			for range c {
   388  				log.Print("Captured ^C, gracefully attempting to cleanup resources..")
   389  				var fedErr, err error
   390  				if o.federation {
   391  					if fedErr = fedDown(); fedErr != nil {
   392  						log.Printf("Tearing down federation failed: %v", fedErr)
   393  					}
   394  				}
   395  				if err = deploy.Down(); err != nil {
   396  					log.Printf("Tearing down deployment failed: %v", err)
   397  				}
   398  				if fedErr != nil || err != nil {
   399  					os.Exit(1)
   400  				}
   401  
   402  				os.Exit(2)
   403  			}
   404  		}()
   405  	}
   406  
   407  	if err := run(deploy, *o); err != nil {
   408  		return err
   409  	}
   410  
   411  	// Publish the successfully tested version when requested
   412  	if o.publish != "" {
   413  		if err := publish(o.publish); err != nil {
   414  			return err
   415  		}
   416  	}
   417  	return nil
   418  }
   419  
   420  func acquireKubernetes(o *options) error {
   421  	// Potentially build kubernetes
   422  	if o.build.Enabled() {
   423  		err := control.XMLWrap(&suite, "Build", o.build.Build)
   424  		if o.flushMemAfterBuild {
   425  			util.FlushMem()
   426  		}
   427  		if err != nil {
   428  			return err
   429  		}
   430  	}
   431  
   432  	// Potentially stage build binaries somewhere on GCS
   433  	if o.stage.Enabled() {
   434  		if err := control.XMLWrap(&suite, "Stage", func() error {
   435  			return o.stage.Stage(o.federation, o.noAllowDup)
   436  		}); err != nil {
   437  			return err
   438  		}
   439  	}
   440  
   441  	// Potentially download existing binaries and extract them.
   442  	if o.extract.Enabled() {
   443  		err := control.XMLWrap(&suite, "Extract", func() error {
   444  			// Should we restore a previous state?
   445  			// Restore if we are not upping the cluster or we are bringing up
   446  			// a federation control plane without the federated clusters.
   447  			if o.save != "" {
   448  				if !o.up {
   449  					// Restore version and .kube/config from --up
   450  					log.Printf("Overwriting extract strategy to load kubeconfig and version from %s", o.save)
   451  					o.extract = extractStrategies{
   452  						extractStrategy{
   453  							mode:   load,
   454  							option: o.save,
   455  						},
   456  					}
   457  				} else if o.federation && o.up && o.deployment == "none" {
   458  					// Only restore .kube/config from previous --up, use the regular
   459  					// extraction strategy to restore version.
   460  					log.Printf("Load kubeconfig from %s", o.save)
   461  					loadKubeconfig(o.save)
   462  				}
   463  			}
   464  
   465  			// New deployment, extract new version
   466  			return o.extract.Extract(o.gcpProject, o.gcpZone, o.gcpRegion, o.extractSource)
   467  		})
   468  		if err != nil {
   469  			return err
   470  		}
   471  	}
   472  	return nil
   473  }
   474  
   475  func acquireFederation(o *options) error {
   476  	// Potentially build federation
   477  	if o.buildFederation.Enabled() {
   478  		err := control.XMLWrap(&suite, "BuildFederation", o.buildFederation.Build)
   479  		if o.flushMemAfterBuild {
   480  			util.FlushMem()
   481  		}
   482  		if err != nil {
   483  			return err
   484  		}
   485  	}
   486  
   487  	// Potentially stage federation binaries somewhere on GCS
   488  	if o.stageFederation.Enabled() {
   489  		if err := control.XMLWrap(&suite, "StageFederation", func() error {
   490  			return o.stageFederation.Stage()
   491  		}); err != nil {
   492  			return err
   493  		}
   494  	}
   495  
   496  	// Potentially download existing federation binaries and extract them.
   497  	if o.extractFederation.Enabled() {
   498  		err := control.XMLWrap(&suite, "ExtractFederation", func() error {
   499  			return o.extractFederation.Extract(o.gcpProject, o.gcpZone)
   500  		})
   501  		return err
   502  	}
   503  	return nil
   504  }
   505  
   506  // Returns the k8s version name
   507  func findVersion() string {
   508  	// The version may be in a version file
   509  	if _, err := os.Stat("version"); err == nil {
   510  		b, err := ioutil.ReadFile("version")
   511  		if err == nil {
   512  			return strings.TrimSpace(string(b))
   513  		}
   514  		log.Printf("Failed to read version: %v", err)
   515  	}
   516  
   517  	// We can also get it from the git repo.
   518  	if _, err := os.Stat("hack/lib/version.sh"); err == nil {
   519  		// TODO(fejta): do this in go. At least we removed the upload-to-gcs.sh dep.
   520  		gross := `. hack/lib/version.sh && KUBE_ROOT=. kube::version::get_version_vars && echo "${KUBE_GIT_VERSION-}"`
   521  		b, err := control.Output(exec.Command("bash", "-c", gross))
   522  		if err == nil {
   523  			return strings.TrimSpace(string(b))
   524  		}
   525  		log.Printf("Failed to get_version_vars: %v", err)
   526  	}
   527  
   528  	return "unknown" // Sad trombone
   529  }
   530  
   531  // maybeMergeMetadata will add new keyvals into the map; quietly eats errors.
   532  func maybeMergeJSON(meta map[string]string, path string) {
   533  	if data, err := ioutil.ReadFile(path); err == nil {
   534  		json.Unmarshal(data, &meta)
   535  	}
   536  }
   537  
   538  // Write metadata.json, including version and env arg data.
   539  func writeMetadata(path, metadataSources string) error {
   540  	m := make(map[string]string)
   541  
   542  	// Look for any sources of metadata and load 'em
   543  	for _, f := range strings.Split(metadataSources, ",") {
   544  		maybeMergeJSON(m, filepath.Join(path, f))
   545  	}
   546  
   547  	ver := findVersion()
   548  	m["job-version"] = ver // TODO(krzyzacy): retire
   549  	m["revision"] = ver
   550  	re := regexp.MustCompile(`^BUILD_METADATA_(.+)$`)
   551  	for _, e := range os.Environ() {
   552  		p := strings.SplitN(e, "=", 2)
   553  		r := re.FindStringSubmatch(p[0])
   554  		if r == nil {
   555  			continue
   556  		}
   557  		k, v := strings.ToLower(r[1]), p[1]
   558  		m[k] = v
   559  	}
   560  	f, err := os.Create(filepath.Join(path, "metadata.json"))
   561  	if err != nil {
   562  		return err
   563  	}
   564  	defer f.Close()
   565  	e := json.NewEncoder(f)
   566  	return e.Encode(m)
   567  }
   568  
   569  // Install cloudsdk tarball to location, updating PATH
   570  func installGcloud(tarball string, location string) error {
   571  
   572  	if err := os.MkdirAll(location, 0775); err != nil {
   573  		return err
   574  	}
   575  
   576  	if err := control.FinishRunning(exec.Command("tar", "xzf", tarball, "-C", location)); err != nil {
   577  		return err
   578  	}
   579  
   580  	if err := control.FinishRunning(exec.Command(filepath.Join(location, "google-cloud-sdk", "install.sh"), "--disable-installation-options", "--bash-completion=false", "--path-update=false", "--usage-reporting=false")); err != nil {
   581  		return err
   582  	}
   583  
   584  	if err := util.InsertPath(filepath.Join(location, "google-cloud-sdk", "bin")); err != nil {
   585  		return err
   586  	}
   587  
   588  	if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "alpha")); err != nil {
   589  		return err
   590  	}
   591  
   592  	if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "beta")); err != nil {
   593  		return err
   594  	}
   595  
   596  	if err := control.FinishRunning(exec.Command("gcloud", "info")); err != nil {
   597  		return err
   598  	}
   599  	return nil
   600  }
   601  
   602  func migrateGcpEnvAndOptions(o *options) error {
   603  	var network string
   604  	var zone string
   605  	switch o.provider {
   606  	case "gke":
   607  		network = "KUBE_GKE_NETWORK"
   608  		zone = "ZONE"
   609  	default:
   610  		network = "KUBE_GCE_NETWORK"
   611  		zone = "KUBE_GCE_ZONE"
   612  	}
   613  	return util.MigrateOptions([]util.MigratedOption{
   614  		{
   615  			Env:    "PROJECT",
   616  			Option: &o.gcpProject,
   617  			Name:   "--gcp-project",
   618  		},
   619  		{
   620  			Env:    zone,
   621  			Option: &o.gcpZone,
   622  			Name:   "--gcp-zone",
   623  		},
   624  		{
   625  			Env:    "REGION",
   626  			Option: &o.gcpRegion,
   627  			Name:   "--gcp-region",
   628  		},
   629  		{
   630  			Env:    "GOOGLE_APPLICATION_CREDENTIALS",
   631  			Option: &o.gcpServiceAccount,
   632  			Name:   "--gcp-service-account",
   633  		},
   634  		{
   635  			Env:    network,
   636  			Option: &o.gcpNetwork,
   637  			Name:   "--gcp-network",
   638  		},
   639  		{
   640  			Env:    "KUBE_NODE_OS_DISTRIBUTION",
   641  			Option: &o.gcpNodeImage,
   642  			Name:   "--gcp-node-image",
   643  		},
   644  		{
   645  			Env:    "KUBE_MASTER_OS_DISTRIBUTION",
   646  			Option: &o.gcpMasterImage,
   647  			Name:   "--gcp-master-image",
   648  		},
   649  		{
   650  			Env:    "NUM_NODES",
   651  			Option: &o.gcpNodes,
   652  			Name:   "--gcp-nodes",
   653  		},
   654  		{
   655  			Env:    "NODE_SIZE",
   656  			Option: &o.gcpNodeSize,
   657  			Name:   "--gcp-node-size",
   658  		},
   659  		{
   660  			Env:    "MASTER_SIZE",
   661  			Option: &o.gcpMasterSize,
   662  			Name:   "--gcp-master-size",
   663  		},
   664  		{
   665  			Env:      "CLOUDSDK_BUCKET",
   666  			Option:   &o.gcpCloudSdk,
   667  			Name:     "--gcp-cloud-sdk",
   668  			SkipPush: true,
   669  		},
   670  	})
   671  }
   672  
   673  func prepareGcp(o *options) error {
   674  	if err := migrateGcpEnvAndOptions(o); err != nil {
   675  		return err
   676  	}
   677  	if o.provider == "gce" {
   678  		if distro := os.Getenv("KUBE_OS_DISTRIBUTION"); distro != "" {
   679  			log.Printf("Please use --gcp-master-image=%s --gcp-node-image=%s (instead of deprecated KUBE_OS_DISTRIBUTION)",
   680  				distro, distro)
   681  			// Note: KUBE_OS_DISTRIBUTION takes precedence over
   682  			// KUBE_{MASTER,NODE}_OS_DISTRIBUTION, so override here
   683  			// after the migration above.
   684  			o.gcpNodeImage = distro
   685  			o.gcpMasterImage = distro
   686  			if err := os.Setenv("KUBE_NODE_OS_DISTRIBUTION", distro); err != nil {
   687  				return fmt.Errorf("could not set KUBE_NODE_OS_DISTRIBUTION=%s: %v", distro, err)
   688  			}
   689  			if err := os.Setenv("KUBE_MASTER_OS_DISTRIBUTION", distro); err != nil {
   690  				return fmt.Errorf("could not set KUBE_MASTER_OS_DISTRIBUTION=%s: %v", distro, err)
   691  			}
   692  		}
   693  
   694  		hasGCPImageFamily, hasGCPImageProject := len(o.gcpImageFamily) != 0, len(o.gcpImageProject) != 0
   695  		if hasGCPImageFamily != hasGCPImageProject {
   696  			return fmt.Errorf("--image-family and --image-project must be both set or unset")
   697  		}
   698  		if hasGCPImageFamily && hasGCPImageProject {
   699  			out, err := control.Output(exec.Command("gcloud", "compute", "images", "describe-from-family", o.gcpImageFamily, "--project", o.gcpImageProject))
   700  			if err != nil {
   701  				return fmt.Errorf("failed to get latest image from family %q in project %q: %s", o.gcpImageFamily, o.gcpImageProject, err)
   702  			}
   703  			latestImage := ""
   704  			latestImageRegexp := regexp.MustCompile("^name: *(\\S+)")
   705  			for _, line := range strings.Split(string(out), "\n") {
   706  				matches := latestImageRegexp.FindStringSubmatch(line)
   707  				if len(matches) == 2 {
   708  					latestImage = matches[1]
   709  					break
   710  				}
   711  			}
   712  			if len(latestImage) == 0 {
   713  				return fmt.Errorf("failed to get latest image from family %q in project %q", o.gcpImageFamily, o.gcpImageProject)
   714  			}
   715  			if o.deployment == "node" {
   716  				o.nodeArgs += fmt.Sprintf(" --images=%s --image-project=%s", latestImage, o.gcpImageProject)
   717  			} else {
   718  				os.Setenv("KUBE_GCE_NODE_IMAGE", latestImage)
   719  				os.Setenv("KUBE_GCE_NODE_PROJECT", o.gcpImageProject)
   720  			}
   721  		}
   722  	} else if o.provider == "gke" {
   723  		if o.deployment == "" {
   724  			o.deployment = "gke"
   725  		}
   726  		if o.deployment != "gke" {
   727  			return fmt.Errorf("expected --deployment=gke for --provider=gke, found --deployment=%s", o.deployment)
   728  		}
   729  		if o.gcpNodeImage == "" {
   730  			return fmt.Errorf("--gcp-node-image must be set for GKE")
   731  		}
   732  		if o.gcpMasterImage != "" {
   733  			return fmt.Errorf("expected --gcp-master-image to be empty for --provider=gke, found --gcp-master-image=%s", o.gcpMasterImage)
   734  		}
   735  		if o.gcpNodes != "" {
   736  			return fmt.Errorf("--gcp-nodes cannot be set on GKE, use --gke-shape instead")
   737  		}
   738  		if o.gcpNodeSize != "" {
   739  			return fmt.Errorf("--gcp-node-size cannot be set on GKE, use --gke-shape instead")
   740  		}
   741  		if o.gcpMasterSize != "" {
   742  			return fmt.Errorf("--gcp-master-size cannot be set on GKE, where it's auto-computed")
   743  		}
   744  
   745  		// TODO(kubernetes/test-infra#3536): This is used by the
   746  		// ginkgo-e2e.sh wrapper.
   747  		nod := o.gcpNodeImage
   748  		if nod == "container_vm" {
   749  			// gcloud container clusters create understands
   750  			// "container_vm", e2es understand "debian".
   751  			nod = "debian"
   752  		}
   753  		if nod == "cos_containerd" {
   754  			// gcloud container clusters create understands
   755  			// "cos_containerd", e2es only understand
   756  			// "gci"/"cos",
   757  			nod = "gci"
   758  		}
   759  		os.Setenv("NODE_OS_DISTRIBUTION", nod)
   760  	}
   761  	if o.gcpProject == "" {
   762  		log.Print("--gcp-project is missing, trying to fetch a project from boskos.\n" +
   763  			"(for local runs please set --gcp-project to your dev project)")
   764  
   765  		var resType string
   766  		if o.gcpProjectType != "" {
   767  			resType = o.gcpProjectType
   768  		} else if o.provider == "gke" {
   769  			resType = "gke-project"
   770  		} else {
   771  			resType = "gce-project"
   772  		}
   773  
   774  		log.Printf("provider %v, will acquire project type %v from boskos", o.provider, resType)
   775  
   776  		p, err := boskos.Acquire(resType, "free", "busy")
   777  		if err != nil {
   778  			return fmt.Errorf("--provider=%s boskos failed to acquire project: %v", o.provider, err)
   779  		}
   780  
   781  		if p == nil {
   782  			return fmt.Errorf("boskos does not have a free %s at the moment", resType)
   783  		}
   784  
   785  		go func(c *client.Client, proj string) {
   786  			for range time.Tick(time.Minute * 5) {
   787  				if err := c.UpdateOne(p.Name, "busy", nil); err != nil {
   788  					log.Printf("[Boskos] Update of %s failed with %v", p.Name, err)
   789  				}
   790  			}
   791  		}(boskos, p.Name)
   792  		o.gcpProject = p.Name
   793  	}
   794  
   795  	if err := os.Setenv("CLOUDSDK_CORE_PRINT_UNHANDLED_TRACEBACKS", "1"); err != nil {
   796  		return fmt.Errorf("could not set CLOUDSDK_CORE_PRINT_UNHANDLED_TRACEBACKS=1: %v", err)
   797  	}
   798  
   799  	if err := control.FinishRunning(exec.Command("gcloud", "config", "set", "project", o.gcpProject)); err != nil {
   800  		return fmt.Errorf("fail to set project %s : err %v", o.gcpProject, err)
   801  	}
   802  
   803  	// TODO(krzyzacy):Remove this when we retire migrateGcpEnvAndOptions
   804  	// Note that a lot of scripts are still depend on this env in k/k repo.
   805  	if err := os.Setenv("PROJECT", o.gcpProject); err != nil {
   806  		return fmt.Errorf("fail to set env var PROJECT %s : err %v", o.gcpProject, err)
   807  	}
   808  
   809  	// gcloud creds may have changed
   810  	if err := activateServiceAccount(o.gcpServiceAccount); err != nil {
   811  		return err
   812  	}
   813  
   814  	// Ensure ssh keys exist
   815  	log.Print("Checking existing of GCP ssh keys...")
   816  	k := filepath.Join(util.Home(".ssh"), "google_compute_engine")
   817  	if _, err := os.Stat(k); err != nil {
   818  		return err
   819  	}
   820  	pk := k + ".pub"
   821  	if _, err := os.Stat(pk); err != nil {
   822  		return err
   823  	}
   824  
   825  	log.Printf("Checking presence of public key in %s", o.gcpProject)
   826  	if out, err := control.Output(exec.Command("gcloud", "compute", "--project="+o.gcpProject, "project-info", "describe")); err != nil {
   827  		return err
   828  	} else if b, err := ioutil.ReadFile(pk); err != nil {
   829  		return err
   830  	} else if !strings.Contains(string(out), string(b)) {
   831  		log.Print("Uploading public ssh key to project metadata...")
   832  		if err = control.FinishRunning(exec.Command("gcloud", "compute", "--project="+o.gcpProject, "config-ssh")); err != nil {
   833  			return err
   834  		}
   835  	}
   836  
   837  	// Install custom gcloud version if necessary
   838  	if o.gcpCloudSdk != "" {
   839  		for i := 0; i < 3; i++ {
   840  			if err := control.FinishRunning(exec.Command("gsutil", "-mq", "cp", "-r", o.gcpCloudSdk, util.Home())); err == nil {
   841  				break // Success!
   842  			}
   843  			time.Sleep(1 << uint(i) * time.Second)
   844  		}
   845  		for _, f := range []string{util.Home(".gsutil"), util.Home("repo"), util.Home("cloudsdk")} {
   846  			if _, err := os.Stat(f); err == nil || !os.IsNotExist(err) {
   847  				if err = os.RemoveAll(f); err != nil {
   848  					return err
   849  				}
   850  			}
   851  		}
   852  
   853  		install := util.Home("repo", "google-cloud-sdk.tar.gz")
   854  		if strings.HasSuffix(o.gcpCloudSdk, ".tar.gz") {
   855  			install = util.Home(filepath.Base(o.gcpCloudSdk))
   856  		} else {
   857  			if err := os.Rename(util.Home(filepath.Base(o.gcpCloudSdk)), util.Home("repo")); err != nil {
   858  				return err
   859  			}
   860  
   861  			// Controls which gcloud components to install.
   862  			pop, err := util.PushEnv("CLOUDSDK_COMPONENT_MANAGER_SNAPSHOT_URL", "file://"+util.Home("repo", "components-2.json"))
   863  			if err != nil {
   864  				return err
   865  			}
   866  			defer pop()
   867  		}
   868  
   869  		if err := installGcloud(install, util.Home("cloudsdk")); err != nil {
   870  			return err
   871  		}
   872  		// gcloud creds may have changed
   873  		if err := activateServiceAccount(o.gcpServiceAccount); err != nil {
   874  			return err
   875  		}
   876  	}
   877  
   878  	if o.kubemark {
   879  		if p := os.Getenv("KUBEMARK_BAZEL_BUILD"); strings.ToLower(p) == "y" {
   880  			// we need docker-credential-gcr to get authed properly
   881  			// https://github.com/bazelbuild/rules_docker#authorization
   882  			if err := control.FinishRunning(exec.Command("gcloud", "components", "install", "docker-credential-gcr")); err != nil {
   883  				return err
   884  			}
   885  			if err := control.FinishRunning(exec.Command("docker-credential-gcr", "configure-docker")); err != nil {
   886  				return err
   887  			}
   888  		}
   889  	}
   890  
   891  	return nil
   892  }
   893  
   894  func prepareAws(o *options) error {
   895  	// gcloud creds may have changed
   896  	if err := activateServiceAccount(o.gcpServiceAccount); err != nil {
   897  		return err
   898  	}
   899  	return control.FinishRunning(exec.Command("pip", "install", "awscli"))
   900  }
   901  
   902  // Activate GOOGLE_APPLICATION_CREDENTIALS if set or do nothing.
   903  func activateServiceAccount(path string) error {
   904  	if path == "" {
   905  		return nil
   906  	}
   907  	return control.FinishRunning(exec.Command("gcloud", "auth", "activate-service-account", "--key-file="+path))
   908  }
   909  
   910  // Make all artifacts world readable.
   911  // The root user winds up owning the files when the container exists.
   912  // Ensure that other users can read these files at that time.
   913  func chmodArtifacts() error {
   914  	return control.FinishRunning(exec.Command("chmod", "-R", "o+r", artifacts))
   915  }
   916  
   917  func prepare(o *options) error {
   918  	if err := util.MigrateOptions([]util.MigratedOption{
   919  		{
   920  			Env:    "KUBERNETES_PROVIDER",
   921  			Option: &o.provider,
   922  			Name:   "--provider",
   923  		},
   924  		{
   925  			Env:    "CLUSTER_NAME",
   926  			Option: &o.cluster,
   927  			Name:   "--cluster",
   928  		},
   929  	}); err != nil {
   930  		return err
   931  	}
   932  	if err := prepareGinkgoParallel(&o.ginkgoParallel); err != nil {
   933  		return err
   934  	}
   935  
   936  	switch o.provider {
   937  	case "gce", "gke", "node":
   938  		if err := prepareGcp(o); err != nil {
   939  			return err
   940  		}
   941  	case "aws":
   942  		if err := prepareAws(o); err != nil {
   943  			return err
   944  		}
   945  	}
   946  	// For kubernetes-anywhere as the deployer, call prepareGcp()
   947  	// independent of the specified provider.
   948  	if o.deployment == "kubernetes-anywhere" {
   949  		if err := prepareGcp(o); err != nil {
   950  			return err
   951  		}
   952  	}
   953  
   954  	if o.kubemark {
   955  		if err := util.MigrateOptions([]util.MigratedOption{
   956  			{
   957  				Env:    "KUBEMARK_NUM_NODES",
   958  				Option: &o.kubemarkNodes,
   959  				Name:   "--kubemark-nodes",
   960  			},
   961  			{
   962  				Env:    "KUBEMARK_MASTER_SIZE",
   963  				Option: &o.kubemarkMasterSize,
   964  				Name:   "--kubemark-master-size",
   965  			},
   966  		}); err != nil {
   967  			return err
   968  		}
   969  	}
   970  
   971  	if err := os.MkdirAll(artifacts, 0777); err != nil { // Create artifacts
   972  		return err
   973  	}
   974  
   975  	return nil
   976  }
   977  
   978  func prepareFederation(o *options) error {
   979  	if o.multipleFederations {
   980  		// TODO(fejta): use boskos to grab a federation cluster
   981  		// Note: EXECUTOR_NUMBER and NODE_NAME are Jenkins
   982  		// specific environment variables. So this doesn't work
   983  		// when we move away from Jenkins.
   984  		execNum := os.Getenv("EXECUTOR_NUMBER")
   985  		if execNum == "" {
   986  			execNum = "0"
   987  		}
   988  		suffix := fmt.Sprintf("%s-%s", os.Getenv("NODE_NAME"), execNum)
   989  		federationName := fmt.Sprintf("e2e-f8n-%s", suffix)
   990  		federationSystemNamespace := fmt.Sprintf("f8n-system-%s", suffix)
   991  		err := os.Setenv("FEDERATION_NAME", federationName)
   992  		if err != nil {
   993  			return err
   994  		}
   995  		return os.Setenv("FEDERATION_NAMESPACE", federationSystemNamespace)
   996  	}
   997  	return nil
   998  }
   999  
  1000  type ginkgoParallelValue struct {
  1001  	v int // 0 == not set (defaults to 1)
  1002  }
  1003  
  1004  func (v *ginkgoParallelValue) IsBoolFlag() bool {
  1005  	return true
  1006  }
  1007  
  1008  func (v *ginkgoParallelValue) String() string {
  1009  	if v.v == 0 {
  1010  		return "1"
  1011  	}
  1012  	return strconv.Itoa(v.v)
  1013  }
  1014  
  1015  func (v *ginkgoParallelValue) Set(s string) error {
  1016  	if s == "" {
  1017  		v.v = 0
  1018  		return nil
  1019  	}
  1020  	if s == "true" {
  1021  		v.v = defaultGinkgoParallel
  1022  		return nil
  1023  	}
  1024  	p, err := strconv.Atoi(s)
  1025  	if err != nil {
  1026  		return fmt.Errorf("--ginkgo-parallel must be an integer, found %q", s)
  1027  	}
  1028  	if p < 1 {
  1029  		return fmt.Errorf("--ginkgo-parallel must be >= 1, found %d", p)
  1030  	}
  1031  	v.v = p
  1032  	return nil
  1033  }
  1034  
  1035  func (v *ginkgoParallelValue) Type() string {
  1036  	return "ginkgoParallelValue"
  1037  }
  1038  
  1039  func (v *ginkgoParallelValue) Get() int {
  1040  	if v.v == 0 {
  1041  		return 1
  1042  	}
  1043  	return v.v
  1044  }
  1045  
  1046  var _ flag.Value = &ginkgoParallelValue{}
  1047  
  1048  // Hand migrate this option. GINKGO_PARALLEL => GINKGO_PARALLEL_NODES=25
  1049  func prepareGinkgoParallel(v *ginkgoParallelValue) error {
  1050  	if p := os.Getenv("GINKGO_PARALLEL"); strings.ToLower(p) == "y" {
  1051  		log.Printf("Please use kubetest --ginkgo-parallel (instead of deprecated GINKGO_PARALLEL=y)")
  1052  		if err := v.Set("true"); err != nil {
  1053  			return err
  1054  		}
  1055  		os.Unsetenv("GINKGO_PARALLEL")
  1056  	}
  1057  	if p := os.Getenv("GINKGO_PARALLEL_NODES"); p != "" {
  1058  		log.Printf("Please use kubetest --ginkgo-parallel=%s (instead of deprecated GINKGO_PARALLEL_NODES=%s)", p, p)
  1059  		if err := v.Set(p); err != nil {
  1060  			return err
  1061  		}
  1062  	}
  1063  	os.Setenv("GINKGO_PARALLEL_NODES", v.String())
  1064  	return nil
  1065  }
  1066  
  1067  func publish(pub string) error {
  1068  	v, err := ioutil.ReadFile("version")
  1069  	if err != nil {
  1070  		return err
  1071  	}
  1072  	log.Printf("Set %s version to %s", pub, string(v))
  1073  	return gcsWrite(pub, v)
  1074  }