github.com/jenkins-x/test-infra@v0.0.7/scenarios/kubernetes_e2e.py

github.com/jenkins-x/test-infra@v0.0.7/scenarios/kubernetes_e2e.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2017 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Need to figure out why this only fails on travis
    18  # pylint: disable=bad-continuation
    19  
    20  """Runs kubernetes e2e test with specified config"""
    21  
    22  import argparse
    23  import hashlib
    24  import os
    25  import random
    26  import re
    27  import shutil
    28  import subprocess
    29  import sys
    30  import urllib2
    31  import time
    32  
    33  ORIG_CWD = os.getcwd()  # Checkout changes cwd
    34  
    35  # The zones below are the zones available in the CNCF account (in theory, zones vary by account)
    36  # We aim for 3 zones per region to try to maintain even spreading.
    37  # We also remove a few zones where our preferred instance type is not available,
    38  # though really this needs a better fix (likely in kops)
    39  DEFAULT_AWS_ZONES = [
    40      'ap-northeast-1a',
    41      'ap-northeast-1c',
    42      'ap-northeast-1d',
    43      'ap-northeast-2a',
    44      #'ap-northeast-2b' - AZ does not exist, so we're breaking the 3 AZs per region target here
    45      'ap-northeast-2c',
    46      'ap-south-1a',
    47      'ap-south-1b',
    48      'ap-southeast-1a',
    49      'ap-southeast-1b',
    50      'ap-southeast-1c',
    51      'ap-southeast-2a',
    52      'ap-southeast-2b',
    53      'ap-southeast-2c',
    54      'ca-central-1a',
    55      'ca-central-1b',
    56      'eu-central-1a',
    57      'eu-central-1b',
    58      'eu-central-1c',
    59      'eu-west-1a',
    60      'eu-west-1b',
    61      'eu-west-1c',
    62      'eu-west-2a',
    63      'eu-west-2b',
    64      'eu-west-2c',
    65      #'eu-west-3a', documented to not support c4 family
    66      #'eu-west-3b', documented to not support c4 family
    67      #'eu-west-3c', documented to not support c4 family
    68      'sa-east-1a',
    69      #'sa-east-1b', AZ does not exist, so we're breaking the 3 AZs per region target here
    70      'sa-east-1c',
    71      #'us-east-1a', # temporarily removing due to lack of quota #10043
    72      #'us-east-1b', # temporarily removing due to lack of quota #10043
    73      #'us-east-1c', # temporarily removing due to lack of quota #10043
    74      #'us-east-1d', # limiting to 3 zones to not overallocate
    75      #'us-east-1e', # limiting to 3 zones to not overallocate
    76      #'us-east-1f', # limiting to 3 zones to not overallocate
    77      #'us-east-2a', InsufficientInstanceCapacity for c4.large 2018-05-30
    78      #'us-east-2b', InsufficientInstanceCapacity for c4.large 2018-05-30
    79      #'us-east-2c', InsufficientInstanceCapacity for c4.large 2018-05-30
    80      'us-west-1a',
    81      'us-west-1b',
    82      #'us-west-1c', AZ does not exist, so we're breaking the 3 AZs per region target here
    83      #'us-west-2a', # temporarily removing due to lack of quota #10043
    84      #'us-west-2b', # temporarily removing due to lack of quota #10043
    85      #'us-west-2c', # temporarily removing due to lack of quota #10043
    86  ]
    87  
    88  def test_infra(*paths):
    89      """Return path relative to root of test-infra repo."""
    90      return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths)
    91  
    92  
    93  def check(*cmd):
    94      """Log and run the command, raising on errors."""
    95      print >>sys.stderr, 'Run:', cmd
    96      subprocess.check_call(cmd)
    97  
    98  
    99  def check_output(*cmd):
   100      """Log and run the command, raising on errors, return output"""
   101      print >>sys.stderr, 'Run:', cmd
   102      return subprocess.check_output(cmd)
   103  
   104  
   105  def check_env(env, *cmd):
   106      """Log and run the command with a specific env, raising on errors."""
   107      print >>sys.stderr, 'Environment:'
   108      for key, value in sorted(env.items()):
   109          print >>sys.stderr, '%s=%s' % (key, value)
   110      print >>sys.stderr, 'Run:', cmd
   111      subprocess.check_call(cmd, env=env)
   112  
   113  
   114  def kubekins(tag):
   115      """Return full path to kubekins-e2e:tag."""
   116      return 'gcr.io/k8s-testimages/kubekins-e2e:%s' % tag
   117  
   118  def parse_env(env):
   119      """Returns (FOO, BAR=MORE) for FOO=BAR=MORE."""
   120      return env.split('=', 1)
   121  
   122  def aws_role_config(profile, arn):
   123      return (('[profile jenkins-assumed-role]\n' +
   124               'role_arn = %s\n' +
   125               'source_profile = %s\n') %
   126              (arn, profile))
   127  
   128  def kubeadm_version(mode, shared_build_gcs_path):
   129      """Return string to use for kubeadm version, given the job's mode (ci/pull/periodic)."""
   130      version = ''
   131      if mode in ['ci', 'periodic']:
   132          # This job only runs against the kubernetes repo, and bootstrap.py leaves the
   133          # current working directory at the repository root. Grab the SCM_REVISION so we
   134          # can use the .debs built during the bazel-build job that should have already
   135          # succeeded.
   136          status = re.search(
   137              r'STABLE_BUILD_SCM_REVISION ([^\n]+)',
   138              check_output('hack/print-workspace-status.sh')
   139          )
   140          if not status:
   141              raise ValueError('STABLE_BUILD_SCM_REVISION not found')
   142          version = status.group(1)
   143  
   144          # The path given here should match ci-kubernetes-bazel-build
   145          return 'gs://kubernetes-release-dev/ci/%s-bazel/bin/linux/amd64/' % version
   146  
   147      elif mode == 'pull':
   148          # The format of shared_build_gcs_path looks like:
   149          # gs://kubernetes-release-dev/bazel/<git-describe-output>
   150          # Add bin/linux/amd64 yet to that path so it points to the dir with the debs
   151          return '%s/bin/linux/amd64/' % shared_build_gcs_path
   152  
   153      elif mode == 'stable':
   154          # This job need not run against the kubernetes repo and uses the stable version
   155          # of kubeadm packages. This mode may be desired when kubeadm itself is not the
   156          # SUT (System Under Test).
   157          return 'stable'
   158  
   159      else:
   160          raise ValueError("Unknown kubeadm mode given: %s" % mode)
   161  
   162  class LocalMode(object):
   163      """Runs e2e tests by calling kubetest."""
   164      def __init__(self, workspace, artifacts):
   165          self.command = 'kubetest'
   166          self.workspace = workspace
   167          self.artifacts = artifacts
   168          self.env = []
   169          self.os_env = []
   170          self.env_files = []
   171          self.add_environment(
   172              'HOME=%s' % workspace,
   173              'WORKSPACE=%s' % workspace,
   174              'PATH=%s' % os.getenv('PATH'),
   175          )
   176  
   177      def add_environment(self, *envs):
   178          """Adds FOO=BAR to the list of environment overrides."""
   179          self.env.extend(parse_env(e) for e in envs)
   180  
   181      def add_os_environment(self, *envs):
   182          """Adds FOO=BAR to the list of os environment overrides."""
   183          self.os_env.extend(parse_env(e) for e in envs)
   184  
   185      def add_file(self, env_file):
   186          """Reads all FOO=BAR lines from env_file."""
   187          with open(env_file) as fp:
   188              for line in fp:
   189                  line = line.rstrip()
   190                  if not line or line.startswith('#'):
   191                      continue
   192                  self.env_files.append(parse_env(line))
   193  
   194      def add_env(self, env):
   195          self.env_files.append(parse_env(env))
   196  
   197      def add_aws_cred(self, priv, pub, cred):
   198          """Sets aws keys and credentials."""
   199          ssh_dir = os.path.join(self.workspace, '.ssh')
   200          if not os.path.isdir(ssh_dir):
   201              os.makedirs(ssh_dir)
   202  
   203          cred_dir = os.path.join(self.workspace, '.aws')
   204          if not os.path.isdir(cred_dir):
   205              os.makedirs(cred_dir)
   206  
   207          aws_ssh = os.path.join(ssh_dir, 'kube_aws_rsa')
   208          aws_pub = os.path.join(ssh_dir, 'kube_aws_rsa.pub')
   209          aws_cred = os.path.join(cred_dir, 'credentials')
   210          shutil.copy(priv, aws_ssh)
   211          shutil.copy(pub, aws_pub)
   212          shutil.copy(cred, aws_cred)
   213  
   214          self.add_environment(
   215              'JENKINS_AWS_SSH_PRIVATE_KEY_FILE=%s' % priv,
   216              'JENKINS_AWS_SSH_PUBLIC_KEY_FILE=%s' % pub,
   217              'JENKINS_AWS_CREDENTIALS_FILE=%s' % cred,
   218          )
   219  
   220      def add_aws_role(self, profile, arn):
   221          with open(os.path.join(self.workspace, '.aws', 'config'), 'w') as cfg:
   222              cfg.write(aws_role_config(profile, arn))
   223          self.add_environment('AWS_SDK_LOAD_CONFIG=true')
   224          return 'jenkins-assumed-role'
   225  
   226      def add_gce_ssh(self, priv, pub):
   227          """Copies priv, pub keys to $WORKSPACE/.ssh."""
   228          ssh_dir = os.path.join(self.workspace, '.ssh')
   229          if not os.path.isdir(ssh_dir):
   230              os.makedirs(ssh_dir)
   231  
   232          gce_ssh = os.path.join(ssh_dir, 'google_compute_engine')
   233          gce_pub = os.path.join(ssh_dir, 'google_compute_engine.pub')
   234          shutil.copy(priv, gce_ssh)
   235          shutil.copy(pub, gce_pub)
   236          self.add_environment(
   237              'JENKINS_GCE_SSH_PRIVATE_KEY_FILE=%s' % gce_ssh,
   238              'JENKINS_GCE_SSH_PUBLIC_KEY_FILE=%s' % gce_pub,
   239          )
   240  
   241      @staticmethod
   242      def add_service_account(path):
   243          """Returns path."""
   244          return path
   245  
   246      def add_k8s(self, *a, **kw):
   247          """Add specified k8s.io repos (noop)."""
   248          pass
   249  
   250      def add_aws_runner(self):
   251          """Start with kops-e2e-runner.sh"""
   252          # TODO(Krzyzacy):retire kops-e2e-runner.sh
   253          self.command = os.path.join(self.workspace, 'kops-e2e-runner.sh')
   254  
   255      def start(self, args):
   256          """Starts kubetest."""
   257          print >>sys.stderr, 'starts with local mode'
   258          env = {}
   259          env.update(self.os_env)
   260          env.update(self.env_files)
   261          env.update(self.env)
   262          check_env(env, self.command, *args)
   263  
   264  
   265  def cluster_name(cluster, tear_down_previous=False):
   266      """Return or select a cluster name."""
   267      if cluster:
   268          return cluster
   269      # Create a suffix based on the build number and job name.
   270      # This ensures no conflict across runs of different jobs (see #7592).
   271      # For PR jobs, we use PR number instead of build number to ensure the
   272      # name is constant across different runs of the presubmit on the PR.
   273      # This helps clean potentially leaked resources from earlier run that
   274      # could've got evicted midway (see #7673).
   275      job_type = os.getenv('JOB_TYPE')
   276      if job_type == 'batch':
   277          suffix = 'batch-%s' % os.getenv('BUILD_ID', 0)
   278      elif job_type == 'presubmit' and tear_down_previous:
   279          suffix = '%s' % os.getenv('PULL_NUMBER', 0)
   280      else:
   281          suffix = '%s' % os.getenv('BUILD_ID', 0)
   282      if len(suffix) > 10:
   283          suffix = hashlib.md5(suffix).hexdigest()[:10]
   284      job_hash = hashlib.md5(os.getenv('JOB_NAME', '')).hexdigest()[:5]
   285      return 'e2e-%s-%s' % (suffix, job_hash)
   286  
   287  
   288  # TODO(krzyzacy): Move this into kubetest
   289  def build_kops(kops, mode):
   290      """Build kops, set kops related envs."""
   291      if not os.path.basename(kops) == 'kops':
   292          raise ValueError(kops)
   293      version = 'pull-' + check_output('git', 'describe', '--always').strip()
   294      job = os.getenv('JOB_NAME', 'pull-kops-e2e-kubernetes-aws')
   295      gcs = 'gs://kops-ci/pulls/%s' % job
   296      gapi = 'https://storage.googleapis.com/kops-ci/pulls/%s' % job
   297      mode.add_environment(
   298          'KOPS_BASE_URL=%s/%s' % (gapi, version),
   299          'GCS_LOCATION=%s' % gcs
   300          )
   301      check('make', 'gcs-publish-ci', 'VERSION=%s' % version, 'GCS_LOCATION=%s' % gcs)
   302  
   303  
   304  def set_up_kops_gce(workspace, args, mode, cluster, runner_args):
   305      """Set up kops on GCE envs."""
   306      for path in [args.gce_ssh, args.gce_pub]:
   307          if not os.path.isfile(os.path.expandvars(path)):
   308              raise IOError(path, os.path.expandvars(path))
   309      mode.add_gce_ssh(args.gce_ssh, args.gce_pub)
   310  
   311      gce_ssh = os.path.join(workspace, '.ssh', 'google_compute_engine')
   312  
   313      zones = args.kops_zones or random.choice([
   314          'us-central1-a',
   315          'us-central1-b',
   316          'us-central1-c',
   317          'us-central1-f',
   318      ])
   319  
   320      runner_args.extend([
   321          '--kops-cluster=%s' % cluster,
   322          '--kops-zones=%s' % zones,
   323          '--kops-state=%s' % args.kops_state_gce,
   324          '--kops-nodes=%s' % args.kops_nodes,
   325          '--kops-ssh-key=%s' % gce_ssh,
   326      ])
   327  
   328  
   329  def set_up_kops_aws(workspace, args, mode, cluster, runner_args):
   330      """Set up aws related envs for kops.  Will replace set_up_aws."""
   331      for path in [args.aws_ssh, args.aws_pub, args.aws_cred]:
   332          if not os.path.isfile(os.path.expandvars(path)):
   333              raise IOError(path, os.path.expandvars(path))
   334      mode.add_aws_cred(args.aws_ssh, args.aws_pub, args.aws_cred)
   335  
   336      aws_ssh = os.path.join(workspace, '.ssh', 'kube_aws_rsa')
   337      profile = args.aws_profile
   338      if args.aws_role_arn:
   339          profile = mode.add_aws_role(profile, args.aws_role_arn)
   340  
   341      # kubetest for kops now support select random regions and zones.
   342      # For initial testing we are not sending in zones when the
   343      # --kops-multiple-zones flag is set.  If the flag is not set then
   344      # we use the older functionality of passing in zones.
   345      if args.kops_multiple_zones:
   346          runner_args.extend(["--kops-multiple-zones"])
   347      else:
   348          # TODO(@chrislovecnm): once we have tested we can remove the zones
   349          # and region logic from this code and have kubetest handle that
   350          # logic
   351          zones = args.kops_zones or random.choice(DEFAULT_AWS_ZONES)
   352          regions = ','.join([zone[:-1] for zone in zones.split(',')])
   353          runner_args.extend(['--kops-zones=%s' % zones])
   354          mode.add_environment(
   355            'KOPS_REGIONS=%s' % regions,
   356          )
   357  
   358      mode.add_environment(
   359        'AWS_PROFILE=%s' % profile,
   360        'AWS_DEFAULT_PROFILE=%s' % profile,
   361      )
   362  
   363      if args.aws_cluster_domain:
   364          cluster = '%s.%s' % (cluster, args.aws_cluster_domain)
   365  
   366      # AWS requires a username (and it varies per-image)
   367      ssh_user = args.kops_ssh_user or 'admin'
   368  
   369      runner_args.extend([
   370          '--kops-cluster=%s' % cluster,
   371          '--kops-state=%s' % args.kops_state,
   372          '--kops-nodes=%s' % args.kops_nodes,
   373          '--kops-ssh-key=%s' % aws_ssh,
   374          '--kops-ssh-user=%s' % ssh_user,
   375      ])
   376  
   377  
   378  def set_up_aws(workspace, args, mode, cluster, runner_args):
   379      """Set up aws related envs.  Legacy; will be replaced by set_up_kops_aws."""
   380      for path in [args.aws_ssh, args.aws_pub, args.aws_cred]:
   381          if not os.path.isfile(os.path.expandvars(path)):
   382              raise IOError(path, os.path.expandvars(path))
   383      mode.add_aws_cred(args.aws_ssh, args.aws_pub, args.aws_cred)
   384  
   385      aws_ssh = os.path.join(workspace, '.ssh', 'kube_aws_rsa')
   386      profile = args.aws_profile
   387      if args.aws_role_arn:
   388          profile = mode.add_aws_role(profile, args.aws_role_arn)
   389  
   390      zones = args.kops_zones or random.choice(DEFAULT_AWS_ZONES)
   391      regions = ','.join([zone[:-1] for zone in zones.split(',')])
   392  
   393      mode.add_environment(
   394        'AWS_PROFILE=%s' % profile,
   395        'AWS_DEFAULT_PROFILE=%s' % profile,
   396        'KOPS_REGIONS=%s' % regions,
   397      )
   398  
   399      if args.aws_cluster_domain:
   400          cluster = '%s.%s' % (cluster, args.aws_cluster_domain)
   401  
   402      # AWS requires a username (and it varies per-image)
   403      ssh_user = args.kops_ssh_user or 'admin'
   404  
   405      runner_args.extend([
   406          '--kops-cluster=%s' % cluster,
   407          '--kops-zones=%s' % zones,
   408          '--kops-state=%s' % args.kops_state,
   409          '--kops-nodes=%s' % args.kops_nodes,
   410          '--kops-ssh-key=%s' % aws_ssh,
   411          '--kops-ssh-user=%s' % ssh_user,
   412      ])
   413      # TODO(krzyzacy):Remove after retire kops-e2e-runner.sh
   414      mode.add_aws_runner()
   415  
   416  def read_gcs_path(gcs_path):
   417      """reads a gcs path (gs://...) by HTTP GET to storage.googleapis.com"""
   418      link = gcs_path.replace('gs://', 'https://storage.googleapis.com/')
   419      loc = urllib2.urlopen(link).read()
   420      print >>sys.stderr, "Read GCS Path: %s" % loc
   421      return loc
   422  
   423  def get_shared_gcs_path(gcs_shared, use_shared_build):
   424      """return the shared path for this set of jobs using args and $PULL_REFS."""
   425      build_file = ''
   426      if use_shared_build:
   427          build_file += use_shared_build + '-'
   428      build_file += 'build-location.txt'
   429      return os.path.join(gcs_shared, os.getenv('PULL_REFS', ''), build_file)
   430  
   431  def main(args):
   432      """Set up env, start kubekins-e2e, handle termination. """
   433      # pylint: disable=too-many-branches,too-many-statements,too-many-locals
   434  
   435      # Rules for env var priority here in docker:
   436      # -e FOO=a -e FOO=b -> FOO=b
   437      # --env-file FOO=a --env-file FOO=b -> FOO=b
   438      # -e FOO=a --env-file FOO=b -> FOO=a(!!!!)
   439      # --env-file FOO=a -e FOO=b -> FOO=b
   440      #
   441      # So if you overwrite FOO=c for a local run it will take precedence.
   442      #
   443  
   444      # Set up workspace/artifacts dir
   445      workspace = os.environ.get('WORKSPACE', os.getcwd())
   446      artifacts = os.environ.get('ARTIFACTS', os.path.join(workspace, '_artifacts'))
   447      if not os.path.isdir(artifacts):
   448          os.makedirs(artifacts)
   449  
   450      mode = LocalMode(workspace, artifacts)
   451  
   452      for env_file in args.env_file:
   453          mode.add_file(test_infra(env_file))
   454      for env in args.env:
   455          mode.add_env(env)
   456  
   457      # TODO(fejta): remove after next image push
   458      mode.add_environment('KUBETEST_MANUAL_DUMP=y')
   459      if args.dump_before_and_after:
   460          before_dir = os.path.join(mode.artifacts, 'before')
   461          if not os.path.exists(before_dir):
   462              os.makedirs(before_dir)
   463          after_dir = os.path.join(mode.artifacts, 'after')
   464          if not os.path.exists(after_dir):
   465              os.makedirs(after_dir)
   466  
   467          runner_args = [
   468              '--dump-pre-test-logs=%s' % before_dir,
   469              '--dump=%s' % after_dir,
   470              ]
   471      else:
   472          runner_args = [
   473              '--dump=%s' % mode.artifacts,
   474          ]
   475  
   476      if args.service_account:
   477          runner_args.append(
   478              '--gcp-service-account=%s' % mode.add_service_account(args.service_account))
   479  
   480      shared_build_gcs_path = ""
   481      if args.use_shared_build is not None:
   482          # find shared build location from GCS
   483          gcs_path = get_shared_gcs_path(args.gcs_shared, args.use_shared_build)
   484          print >>sys.stderr, 'Getting shared build location from: '+gcs_path
   485          # retry loop for reading the location
   486          attempts_remaining = 12
   487          while True:
   488              attempts_remaining -= 1
   489              try:
   490                  # tell kubetest to extract from this location
   491                  shared_build_gcs_path = read_gcs_path(gcs_path)
   492                  args.kubetest_args.append('--extract=' + shared_build_gcs_path)
   493                  args.build = None
   494                  break
   495              except urllib2.URLError as err:
   496                  print >>sys.stderr, 'Failed to get shared build location: %s' % err
   497                  if attempts_remaining > 0:
   498                      print >>sys.stderr, 'Waiting 5 seconds and retrying...'
   499                      time.sleep(5)
   500                  else:
   501                      raise RuntimeError('Failed to get shared build location too many times!')
   502  
   503      elif args.build is not None:
   504          if args.build == '':
   505              # Empty string means --build was passed without any arguments;
   506              # if --build wasn't passed, args.build would be None
   507              runner_args.append('--build')
   508          else:
   509              runner_args.append('--build=%s' % args.build)
   510          k8s = os.getcwd()
   511          if not os.path.basename(k8s) == 'kubernetes':
   512              raise ValueError(k8s)
   513          mode.add_k8s(os.path.dirname(k8s), 'kubernetes', 'release')
   514  
   515      if args.build_federation is not None:
   516          if args.build_federation == '':
   517              runner_args.append('--build-federation')
   518          else:
   519              runner_args.append('--build-federation=%s' % args.build_federation)
   520          fed = os.getcwd()
   521          if not os.path.basename(fed) == 'federation':
   522              raise ValueError(fed)
   523          mode.add_k8s(os.path.dirname(fed), 'federation', 'release')
   524  
   525      if args.kops_build:
   526          build_kops(os.getcwd(), mode)
   527  
   528      if args.stage is not None:
   529          runner_args.append('--stage=%s' % args.stage)
   530          if args.aws:
   531              for line in check_output('hack/print-workspace-status.sh').split('\n'):
   532                  if 'gitVersion' in line:
   533                      _, version = line.strip().split(' ')
   534                      break
   535              else:
   536                  raise ValueError('kubernetes version not found in workspace status')
   537              runner_args.append('--kops-kubernetes-version=%s/%s' % (
   538                  args.stage.replace('gs://', 'https://storage.googleapis.com/'),
   539                  version))
   540  
   541      # TODO(fejta): move these out of this file
   542      if args.up == 'true':
   543          runner_args.append('--up')
   544      if args.down == 'true':
   545          runner_args.append('--down')
   546      if args.test == 'true':
   547          runner_args.append('--test')
   548  
   549      # Passthrough some args to kubetest
   550      if args.deployment:
   551          runner_args.append('--deployment=%s' % args.deployment)
   552      if args.provider:
   553          runner_args.append('--provider=%s' % args.provider)
   554  
   555      cluster = cluster_name(args.cluster, args.tear_down_previous)
   556      runner_args.append('--cluster=%s' % cluster)
   557      runner_args.append('--gcp-network=%s' % cluster)
   558      runner_args.extend(args.kubetest_args)
   559  
   560      if args.use_logexporter:
   561          # TODO(fejta): Take the below value through a flag instead of env var.
   562          runner_args.append('--logexporter-gcs-path=%s' % os.environ.get('GCS_ARTIFACTS_DIR', ''))
   563  
   564      if args.kubeadm:
   565          version = kubeadm_version(args.kubeadm, shared_build_gcs_path)
   566          runner_args.extend([
   567              '--kubernetes-anywhere-path=%s' % os.path.join(workspace, 'k8s.io',
   568                  'kubernetes-anywhere'),
   569              '--kubernetes-anywhere-phase2-provider=kubeadm',
   570              '--kubernetes-anywhere-cluster=%s' % cluster,
   571              '--kubernetes-anywhere-kubeadm-version=%s' % version,
   572          ])
   573  
   574          if args.kubeadm == "pull":
   575              # If this is a pull job; the kubelet version should equal
   576              # the kubeadm version here: we should use debs from the PR build
   577              runner_args.extend([
   578                  '--kubernetes-anywhere-kubelet-version=%s' % version,
   579              ])
   580  
   581      if args.aws:
   582          # Legacy - prefer passing --deployment=kops, --provider=aws,
   583          # which does not use kops-e2e-runner.sh
   584          set_up_aws(mode.workspace, args, mode, cluster, runner_args)
   585      elif args.deployment == 'kops' and args.provider == 'aws':
   586          set_up_kops_aws(mode.workspace, args, mode, cluster, runner_args)
   587      elif args.deployment == 'kops' and args.provider == 'gce':
   588          set_up_kops_gce(mode.workspace, args, mode, cluster, runner_args)
   589      elif args.gce_ssh:
   590          mode.add_gce_ssh(args.gce_ssh, args.gce_pub)
   591  
   592      # TODO(fejta): delete this?
   593      mode.add_os_environment(*(
   594          '%s=%s' % (k, v) for (k, v) in os.environ.items()))
   595  
   596      mode.add_environment(
   597        # Boilerplate envs
   598        # Skip gcloud update checking
   599        'CLOUDSDK_COMPONENT_MANAGER_DISABLE_UPDATE_CHECK=true',
   600        # Use default component update behavior
   601        'CLOUDSDK_EXPERIMENTAL_FAST_COMPONENT_UPDATE=false',
   602        # AWS
   603        'KUBE_AWS_INSTANCE_PREFIX=%s' % cluster,
   604        # GCE
   605        'INSTANCE_PREFIX=%s' % cluster,
   606        'KUBE_GCE_INSTANCE_PREFIX=%s' % cluster,
   607      )
   608  
   609      mode.start(runner_args)
   610  
   611  def create_parser():
   612      """Create argparser."""
   613      parser = argparse.ArgumentParser()
   614      parser.add_argument(
   615          '--env-file', default=[], action="append",
   616          help='Job specific environment file')
   617      parser.add_argument(
   618          '--env', default=[], action="append",
   619          help='Job specific environment setting ' +
   620          '(usage: "--env=VAR=SETTING" will set VAR to SETTING).')
   621      parser.add_argument(
   622          '--gce-ssh',
   623          default=os.environ.get('JENKINS_GCE_SSH_PRIVATE_KEY_FILE'),
   624          help='Path to .ssh/google_compute_engine keys')
   625      parser.add_argument(
   626          '--gce-pub',
   627          default=os.environ.get('JENKINS_GCE_SSH_PUBLIC_KEY_FILE'),
   628          help='Path to pub gce ssh key')
   629      parser.add_argument(
   630          '--service-account',
   631          default=os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'),
   632          help='Path to service-account.json')
   633      parser.add_argument(
   634          '--build', nargs='?', default=None, const='',
   635          help='Build kubernetes binaries if set, optionally specifying strategy')
   636      parser.add_argument(
   637          '--build-federation', nargs='?', default=None, const='',
   638          help='Build federation binaries if set, optionally specifying strategy')
   639      parser.add_argument(
   640          '--use-shared-build', nargs='?', default=None, const='',
   641          help='Use prebuilt kubernetes binaries if set, optionally specifying strategy')
   642      parser.add_argument(
   643          '--gcs-shared',
   644          default='gs://kubernetes-jenkins/shared-results/',
   645          help='Get shared build from this bucket')
   646      parser.add_argument(
   647          '--cluster', default='bootstrap-e2e', help='Name of the cluster')
   648      parser.add_argument(
   649          '--kubeadm', choices=['ci', 'periodic', 'pull', 'stable'])
   650      parser.add_argument(
   651          '--stage', default=None, help='Stage release to GCS path provided')
   652      parser.add_argument(
   653          '--test', default='true', help='If we need to run any actual test within kubetest')
   654      parser.add_argument(
   655          '--down', default='true', help='If we need to tear down the e2e cluster')
   656      parser.add_argument(
   657          '--up', default='true', help='If we need to bring up a e2e cluster')
   658      parser.add_argument(
   659          '--tear-down-previous', action='store_true',
   660          help='If we need to tear down previous e2e cluster')
   661      parser.add_argument(
   662          '--use-logexporter',
   663          action='store_true',
   664          help='If we need to use logexporter tool to upload logs from nodes to GCS directly')
   665      parser.add_argument(
   666          '--kubetest_args',
   667          action='append',
   668          default=[],
   669          help='Send unrecognized args directly to kubetest')
   670      parser.add_argument(
   671          '--dump-before-and-after', action='store_true',
   672          help='Dump artifacts from both before and after the test run')
   673  
   674  
   675      # kops & aws
   676      # TODO(justinsb): replace with --provider=aws --deployment=kops
   677      parser.add_argument(
   678          '--aws', action='store_true', help='E2E job runs in aws')
   679      parser.add_argument(
   680          '--aws-profile',
   681          default=(
   682              os.environ.get('AWS_PROFILE') or
   683              os.environ.get('AWS_DEFAULT_PROFILE') or
   684              'default'
   685          ),
   686          help='Profile within --aws-cred to use')
   687      parser.add_argument(
   688          '--aws-role-arn',
   689          default=os.environ.get('KOPS_E2E_ROLE_ARN'),
   690          help='Use --aws-profile to run as --aws-role-arn if set')
   691      parser.add_argument(
   692          '--aws-ssh',
   693          default=os.environ.get('JENKINS_AWS_SSH_PRIVATE_KEY_FILE'),
   694          help='Path to private aws ssh keys')
   695      parser.add_argument(
   696          '--aws-pub',
   697          default=os.environ.get('JENKINS_AWS_SSH_PUBLIC_KEY_FILE'),
   698          help='Path to pub aws ssh key')
   699      parser.add_argument(
   700          '--aws-cred',
   701          default=os.environ.get('JENKINS_AWS_CREDENTIALS_FILE'),
   702          help='Path to aws credential file')
   703      parser.add_argument(
   704          '--aws-cluster-domain', help='Domain of the aws cluster for aws-pr jobs')
   705      parser.add_argument(
   706          '--kops-nodes', default=4, type=int, help='Number of nodes to start')
   707      parser.add_argument(
   708          '--kops-ssh-user', default='',
   709          help='Username for ssh connections to instances')
   710      parser.add_argument(
   711          '--kops-state', default='s3://k8s-kops-prow/',
   712          help='Name of the aws state storage')
   713      parser.add_argument(
   714          '--kops-state-gce', default='gs://k8s-kops-gce/',
   715          help='Name of the kops state storage for GCE')
   716      parser.add_argument(
   717          '--kops-zones', help='Comma-separated list of zones else random choice')
   718      parser.add_argument(
   719          '--kops-build', action='store_true', help='If we need to build kops locally')
   720      parser.add_argument(
   721          '--kops-multiple-zones', action='store_true', help='Use multiple zones')
   722  
   723  
   724      # kubetest flags that also trigger behaviour here
   725      parser.add_argument(
   726          '--provider', help='provider flag as used by kubetest')
   727      parser.add_argument(
   728          '--deployment', help='deployment flag as used by kubetest')
   729  
   730      return parser
   731  
   732  
   733  def parse_args(args=None):
   734      """Return args, adding unrecognized args to kubetest_args."""
   735      parser = create_parser()
   736      args, extra = parser.parse_known_args(args)
   737      args.kubetest_args += extra
   738  
   739      if args.aws or args.provider == 'aws':
   740          # If aws keys are missing, try to fetch from HOME dir
   741          if not args.aws_ssh or not args.aws_pub or not args.aws_cred:
   742              home = os.environ.get('HOME')
   743              if not home:
   744                  raise ValueError('HOME dir not set!')
   745              if not args.aws_ssh:
   746                  args.aws_ssh = '%s/.ssh/kube_aws_rsa' % home
   747                  print >>sys.stderr, '-aws-ssh key not set. Defaulting to %s' % args.aws_ssh
   748              if not args.aws_pub:
   749                  args.aws_pub = '%s/.ssh/kube_aws_rsa.pub' % home
   750                  print >>sys.stderr, '--aws-pub key not set. Defaulting to %s' % args.aws_pub
   751              if not args.aws_cred:
   752                  args.aws_cred = '%s/.aws/credentials' % home
   753                  print >>sys.stderr, '--aws-cred not set. Defaulting to %s' % args.aws_cred
   754      return args
   755  
   756  
   757  if __name__ == '__main__':
   758      main(parse_args())