github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/jenkins/bootstrap.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2016 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Need to figure out why this only fails on travis
    18  # pylint: disable=bad-continuation
    19  
    20  """Bootstraps starting a test job.
    21  
    22  The following should already be done:
    23    git checkout http://k8s.io/test-infra
    24    cd $WORKSPACE
    25    test-infra/jenkins/bootstrap.py <--repo=R || --bare> <--job=J> <--pull=P || --branch=B>
    26  
    27  The bootstrapper now does the following:
    28    # Note start time
    29    # check out repoes defined in --repo
    30    # note job started
    31    # call runner defined in $JOB.json
    32    # upload artifacts (this will change later)
    33    # upload build-log.txt
    34    # note job ended
    35  
    36  The contract with the runner is as follows:
    37    * Runner must exit non-zero if job fails for any reason.
    38  """
    39  
    40  
    41  import argparse
    42  import contextlib
    43  import json
    44  import logging
    45  import os
    46  import pipes
    47  import random
    48  import re
    49  import select
    50  import signal
    51  import socket
    52  import subprocess
    53  import sys
    54  import tempfile
    55  import time
    56  
    57  ORIG_CWD = os.getcwd()  # Checkout changes cwd
    58  
    59  
    60  def read_all(end, stream, append):
    61      """Read all buffered lines from a stream."""
    62      while not end or time.time() < end:
    63          line = stream.readline()
    64          if not line:
    65              return True  # Read everything
    66          # Strip \n at the end if any. Last line of file may not have one.
    67          append(line.rstrip('\n'))
    68          # Is there more on the buffer?
    69          ret = select.select([stream.fileno()], [], [], 0.1)
    70          if not ret[0]:
    71              return False  # Cleared buffer but not at the end
    72      return False  # Time expired
    73  
    74  
    75  def elapsed(since):
    76      """Return the number of minutes elapsed since a time."""
    77      return (time.time() - since) / 60
    78  
    79  
    80  def terminate(end, proc, kill):
    81      """Terminate or kill the process after end."""
    82      if not end or time.time() <= end:
    83          return False
    84      if kill:  # Process will not die, kill everything
    85          pgid = os.getpgid(proc.pid)
    86          logging.info(
    87              'Kill %d and process group %d', proc.pid, pgid)
    88          os.killpg(pgid, signal.SIGKILL)
    89          proc.kill()
    90          return True
    91      logging.info(
    92          'Terminate %d on timeout', proc.pid)
    93      proc.terminate()
    94      return True
    95  
    96  
    97  def _call(end, cmd, stdin=None, check=True, output=None, log_failures=True):
    98      """Start a subprocess."""
    99      logging.info('Call:  %s', ' '.join(pipes.quote(c) for c in cmd))
   100      begin = time.time()
   101      if end:
   102          end = max(end, time.time() + 60)  # Allow at least 60s per command
   103      proc = subprocess.Popen(
   104          cmd,
   105          stdin=subprocess.PIPE if stdin is not None else None,
   106          stdout=subprocess.PIPE,
   107          stderr=subprocess.PIPE,
   108          preexec_fn=os.setsid,
   109      )
   110      if stdin:
   111          proc.stdin.write(stdin)
   112          proc.stdin.close()
   113      out = []
   114      code = None
   115      timeout = False
   116      reads = {
   117          proc.stderr.fileno(): (proc.stderr, logging.warning),
   118          proc.stdout.fileno(): (
   119              proc.stdout, (out.append if output else logging.info)),
   120      }
   121      while reads:
   122          if terminate(end, proc, timeout):
   123              if timeout:  # We killed everything
   124                  break
   125              # Give subprocess some cleanup time before killing.
   126              end = time.time() + 15 * 60
   127              timeout = True
   128          ret = select.select(reads, [], [], 0.1)
   129          for fdesc in ret[0]:
   130              if read_all(end, *reads[fdesc]):
   131                  reads.pop(fdesc)
   132          if not ret[0] and proc.poll() is not None:
   133              break  # process exited without closing pipes (timeout?)
   134  
   135      code = proc.wait()
   136      if timeout:
   137          code = code or 124
   138          logging.error('Build timed out')
   139      if code and log_failures:
   140          logging.error('Command failed')
   141      logging.info(
   142          'process %d exited with code %d after %.1fm',
   143          proc.pid, code, elapsed(begin))
   144      out.append('')
   145      lines = output and '\n'.join(out)
   146      if check and code:
   147          raise subprocess.CalledProcessError(code, cmd, lines)
   148      return lines
   149  
   150  
   151  def ref_has_shas(ref):
   152      """Determine if a reference specifies shas (contains ':')"""
   153      return isinstance(ref, basestring) and ':' in ref
   154  
   155  
   156  def pull_numbers(pull):
   157      """Turn a pull reference list into a list of PR numbers to merge."""
   158      if ref_has_shas(pull):
   159          return [r.split(':')[0] for r in pull.split(',')][1:]
   160      return [str(pull)]
   161  
   162  
   163  def pull_ref(pull):
   164      """Turn a PR number of list of refs into specific refs to fetch and check out."""
   165      if isinstance(pull, int) or ',' not in pull:
   166          return ['+refs/pull/%d/merge' % int(pull)], ['FETCH_HEAD']
   167      pulls = pull.split(',')
   168      refs = []
   169      checkouts = []
   170      for ref in pulls:
   171          if ':' in ref:  # master:abcd or 1234:abcd
   172              name, sha = ref.split(':')
   173          elif not refs:  # master
   174              name, sha = ref, 'FETCH_HEAD'
   175          else:
   176              name = ref
   177              sha = 'refs/pr/%s' % ref
   178  
   179          checkouts.append(sha)
   180          if not refs:  # First ref should be branch to merge into
   181              refs.append(name)
   182          else:  # Subsequent refs should be PR numbers
   183              num = int(name)
   184              refs.append('+refs/pull/%d/head:refs/pr/%d' % (num, num))
   185      return refs, checkouts
   186  
   187  
   188  def branch_ref(branch):
   189      """Split branch:sha if necessary."""
   190      if ref_has_shas(branch):
   191          split_refs = branch.split(':')
   192          return [split_refs[0]], [split_refs[1]]
   193      return [branch], ['FETCH_HEAD']
   194  
   195  
   196  def repository(repo, ssh):
   197      """Return the url associated with the repo."""
   198      if repo.startswith('k8s.io/'):
   199          repo = 'github.com/kubernetes/%s' % (repo[len('k8s.io/'):])
   200      if ssh:
   201          if ":" not in repo:
   202              parts = repo.split('/', 1)
   203              repo = '%s:%s' % (parts[0], parts[1])
   204          return 'git@%s' % repo
   205      return 'https://%s' % repo
   206  
   207  
   208  def random_sleep(attempt):
   209      """Sleep 2**attempt seconds with a random fractional offset."""
   210      time.sleep(random.random() + attempt ** 2)
   211  
   212  
   213  def checkout(call, repo, branch, pull, ssh='', git_cache='', clean=False):
   214      """Fetch and checkout the repository at the specified branch/pull."""
   215      # pylint: disable=too-many-locals
   216      if bool(branch) == bool(pull):
   217          raise ValueError('Must specify one of --branch or --pull')
   218  
   219      if pull:
   220          refs, checkouts = pull_ref(pull)
   221      else:
   222          refs, checkouts = branch_ref(branch)
   223  
   224      git = 'git'
   225      if git_cache:
   226          cache_dir = '%s/%s' % (git_cache, repo)
   227          try:
   228              os.makedirs(cache_dir)
   229          except OSError:
   230              pass
   231          call([git, 'init', repo, '--separate-git-dir=%s' % cache_dir])
   232          call(['rm', '-f', '%s/index.lock' % cache_dir])
   233      else:
   234          call([git, 'init', repo])
   235      os.chdir(repo)
   236  
   237      if clean:
   238          call([git, 'clean', '-dfx'])
   239          call([git, 'reset', '--hard'])
   240  
   241      # To make a merge commit, a user needs to be set. It's okay to use a dummy
   242      # user here, since we're not exporting the history.
   243      call([git, 'config', '--local', 'user.name', 'K8S Bootstrap'])
   244      call([git, 'config', '--local', 'user.email', 'k8s_bootstrap@localhost'])
   245      retries = 3
   246      for attempt in range(retries):
   247          try:
   248              call([git, 'fetch', '--quiet', '--tags', repository(repo, ssh)] + refs)
   249              break
   250          except subprocess.CalledProcessError as cpe:
   251              if attempt >= retries - 1:
   252                  raise
   253              if cpe.returncode != 128:
   254                  raise
   255              logging.warning('git fetch failed')
   256              random_sleep(attempt)
   257      call([git, 'checkout', '-B', 'test', checkouts[0]])
   258      for ref, head in zip(refs, checkouts)[1:]:
   259          call(['git', 'merge', '--no-ff', '-m', 'Merge %s' % ref, head])
   260  
   261  
   262  def repos_dict(repos):
   263      """Returns {"repo1": "branch", "repo2": "pull"}."""
   264      return {r: b or p for (r, (b, p)) in repos.items()}
   265  
   266  
   267  def start(gsutil, paths, stamp, node_name, version, repos):
   268      """Construct and upload started.json."""
   269      data = {
   270          'timestamp': int(stamp),
   271          'jenkins-node': node_name,
   272          'node': node_name,
   273      }
   274      if version:
   275          data['repo-version'] = version
   276          data['version'] = version  # TODO(fejta): retire
   277      if repos:
   278          pull = repos[repos.main]
   279          if ref_has_shas(pull[1]):
   280              data['pull'] = pull[1]
   281          data['repos'] = repos_dict(repos)
   282  
   283      gsutil.upload_json(paths.started, data)
   284      # Upload a link to the build path in the directory
   285      if paths.pr_build_link:
   286          gsutil.upload_text(
   287              paths.pr_build_link,
   288              paths.pr_path,
   289              additional_headers=['-h', 'x-goog-meta-link: %s' % paths.pr_path]
   290          )
   291  
   292  
   293  class GSUtil(object):
   294      """A helper class for making gsutil commands."""
   295      gsutil = 'gsutil'
   296  
   297      def __init__(self, call):
   298          self.call = call
   299  
   300      def stat(self, path):
   301          """Return metadata about the object, such as generation."""
   302          cmd = [self.gsutil, 'stat', path]
   303          return self.call(cmd, output=True, log_failures=False)
   304  
   305      def ls(self, path):
   306          """List a bucket or subdir."""
   307          cmd = [self.gsutil, 'ls', path]
   308          return self.call(cmd, output=True)
   309  
   310      def upload_json(self, path, jdict, generation=None):
   311          """Upload the dictionary object to path."""
   312          if generation is not None:  # generation==0 means object does not exist
   313              gen = ['-h', 'x-goog-if-generation-match:%s' % generation]
   314          else:
   315              gen = []
   316          cmd = [
   317              self.gsutil, '-q',
   318              '-h', 'Content-Type:application/json'] + gen + [
   319              'cp', '-', path]
   320          self.call(cmd, stdin=json.dumps(jdict, indent=2))
   321  
   322      def copy_file(self, dest, orig):
   323          """Copy the file to the specified path using compressed encoding."""
   324          cmd = [self.gsutil, '-q', 'cp', '-Z', orig, dest]
   325          self.call(cmd)
   326  
   327      def upload_text(self, path, txt, additional_headers=None, cached=True):
   328          """Copy the text to path, optionally disabling caching."""
   329          headers = ['-h', 'Content-Type:text/plain']
   330          if not cached:
   331              headers += ['-h', 'Cache-Control:private, max-age=0, no-transform']
   332          if additional_headers:
   333              headers += additional_headers
   334          cmd = [self.gsutil, '-q'] + headers + ['cp', '-', path]
   335          self.call(cmd, stdin=txt)
   336  
   337      def cat(self, path, generation):
   338          """Return contents of path#generation"""
   339          cmd = [self.gsutil, '-q', 'cat', '%s#%s' % (path, generation)]
   340          return self.call(cmd, output=True)
   341  
   342  
   343      def upload_artifacts(self, gsutil, path, artifacts):
   344          """Upload artifacts to the specified path."""
   345          # Upload artifacts
   346          if not os.path.isdir(artifacts):
   347              logging.warning('Artifacts dir %s is missing.', artifacts)
   348              return
   349          try:
   350              # If remote path exists, it will create .../_artifacts subdir instead
   351              gsutil.ls(path)
   352              # Success means remote path exists
   353              remote_base = os.path.basename(path)
   354              local_base = os.path.basename(artifacts)
   355              if remote_base != local_base:
   356                  # if basename are different, need to copy things over first.
   357                  localpath = artifacts.replace(local_base, remote_base)
   358                  os.rename(artifacts, localpath)
   359                  artifacts = localpath
   360              path = path[:-len(remote_base + '/')]
   361          except subprocess.CalledProcessError:
   362              logging.warning('Remote dir %s not exist yet', path)
   363          cmd = [
   364              self.gsutil, '-m', '-q',
   365              '-o', 'GSUtil:use_magicfile=True',
   366              'cp', '-r', '-c', '-z', 'log,txt,xml',
   367              artifacts, path,
   368          ]
   369          self.call(cmd)
   370  
   371  
   372  def append_result(gsutil, path, build, version, passed):
   373      """Download a json list and append metadata about this build to it."""
   374      # TODO(fejta): delete the clone of this logic in upload-to-gcs.sh
   375      #                  (this is update_job_result_cache)
   376      end = time.time() + 300  # try for up to five minutes
   377      errors = 0
   378      while time.time() < end:
   379          if errors:
   380              random_sleep(min(errors, 3))
   381          try:
   382              out = gsutil.stat(path)
   383              gen = re.search(r'Generation:\s+(\d+)', out).group(1)
   384          except subprocess.CalledProcessError:
   385              gen = 0
   386          if gen:
   387              try:
   388                  cache = json.loads(gsutil.cat(path, gen))
   389                  if not isinstance(cache, list):
   390                      raise ValueError(cache)
   391              except ValueError as exc:
   392                  logging.warning('Failed to decode JSON: %s', exc)
   393                  cache = []
   394              except subprocess.CalledProcessError:  # gen doesn't exist
   395                  errors += 1
   396                  continue
   397          else:
   398              cache = []
   399          cache.append({
   400              'version': version,  # TODO(fejta): retire
   401              'job-version': version,
   402              'buildnumber': build,
   403              'passed': bool(passed),
   404              'result': 'SUCCESS' if passed else 'FAILURE',
   405          })
   406          cache = cache[-300:]
   407          try:
   408              gsutil.upload_json(path, cache, generation=gen)
   409              return
   410          except subprocess.CalledProcessError:
   411              logging.warning('Failed to append to %s#%s', path, gen)
   412          errors += 1
   413  
   414  
   415  def metadata(repos, artifacts, call):
   416      """Return metadata associated for the build, including inside artifacts."""
   417      path = os.path.join(artifacts or '', 'metadata.json')
   418      meta = None
   419      if os.path.isfile(path):
   420          try:
   421              with open(path) as fp:
   422                  meta = json.loads(fp.read())
   423          except (IOError, ValueError):
   424              pass
   425  
   426      if not meta or not isinstance(meta, dict):
   427          meta = {}
   428      if repos:
   429          meta['repo'] = repos.main
   430          meta['repos'] = repos_dict(repos)
   431  
   432      try:
   433          commit = call(['git', 'rev-parse', 'HEAD'], output=True)
   434          if commit:
   435              meta['repo-commit'] = commit.strip()
   436      except subprocess.CalledProcessError:
   437          pass
   438  
   439      cwd = os.getcwd()
   440      os.chdir(test_infra('.'))
   441      try:
   442          commit = call(['git', 'rev-parse', 'HEAD'], output=True)
   443          if commit:
   444              meta['infra-commit'] = commit.strip()[:9]
   445      except subprocess.CalledProcessError:
   446          pass
   447      os.chdir(cwd)
   448  
   449      return meta
   450  
   451  
   452  def finish(gsutil, paths, success, artifacts, build, version, repos, call):
   453      """
   454      Args:
   455          paths: a Paths instance.
   456          success: the build passed if true.
   457          artifacts: a dir containing artifacts to upload.
   458          build: identifier of this build.
   459          version: identifies what version of the code the build tested.
   460          repo: the target repo
   461      """
   462  
   463      if os.path.isdir(artifacts) and any(f for _, _, f in os.walk(artifacts)):
   464          try:
   465              gsutil.upload_artifacts(gsutil, paths.artifacts, artifacts)
   466          except subprocess.CalledProcessError:
   467              logging.warning('Failed to upload artifacts')
   468      else:
   469          logging.warning('Missing local artifacts : %s', artifacts)
   470  
   471      meta = metadata(repos, artifacts, call)
   472      if not version:
   473          version = meta.get('job-version')
   474      if not version:  # TODO(fejta): retire
   475          version = meta.get('version')
   476      # github.com/kubernetes/release/find_green_build depends on append_result()
   477      # TODO(fejta): reconsider whether this is how we want to solve this problem.
   478      append_result(gsutil, paths.result_cache, build, version, success)
   479      if paths.pr_result_cache:
   480          append_result(gsutil, paths.pr_result_cache, build, version, success)
   481  
   482      data = {
   483          # TODO(fejta): update utils.go in contrib to accept a float
   484          'timestamp': int(time.time()),
   485          'result': 'SUCCESS' if success else 'FAILURE',
   486          'passed': bool(success),
   487          'metadata': meta,
   488      }
   489      if version:
   490          data['job-version'] = version
   491          data['version'] = version  # TODO(fejta): retire
   492      gsutil.upload_json(paths.finished, data)
   493  
   494      # Upload the latest build for the job.
   495      # Do this last, since other tools expect the rest of the data to be
   496      # published when this file is created.
   497      for path in {paths.latest, paths.pr_latest}:
   498          if path:
   499              try:
   500                  gsutil.upload_text(path, str(build), cached=False)
   501              except subprocess.CalledProcessError:
   502                  logging.warning('Failed to update %s', path)
   503  
   504  
   505  def test_infra(*paths):
   506      """Return path relative to root of test-infra repo."""
   507      return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths)
   508  
   509  
   510  def node():
   511      """Return the name of the node running the build."""
   512      # TODO(fejta): jenkins sets the node name and our infra expect this value.
   513      # TODO(fejta): Consider doing something different here.
   514      if NODE_ENV not in os.environ:
   515          os.environ[NODE_ENV] = ''.join(socket.gethostname().split('.')[:1])
   516      return os.environ[NODE_ENV]
   517  
   518  
   519  def find_version(call):
   520      """Determine and return the version of the build."""
   521      # TODO(fejta): once job-version is functional switch this to
   522      # git rev-parse [--short=N] HEAD^{commit}
   523      version_file = 'version'
   524      if os.path.isfile(version_file):
   525          # e2e tests which download kubernetes use this path:
   526          with open(version_file) as fp:
   527              return fp.read().strip()
   528  
   529      version_script = 'hack/lib/version.sh'
   530      if os.path.isfile(version_script):
   531          cmd = [
   532              'bash', '-c', (
   533  """
   534  set -o errexit
   535  set -o nounset
   536  export KUBE_ROOT=.
   537  source %s
   538  kube::version::get_version_vars
   539  echo $KUBE_GIT_VERSION
   540  """ % version_script)
   541          ]
   542          return call(cmd, output=True).strip()
   543  
   544      return 'unknown'
   545  
   546  
   547  class Paths(object):  # pylint: disable=too-many-instance-attributes,too-few-public-methods
   548      """Links to remote gcs-paths for uploading results."""
   549      def __init__(  # pylint: disable=too-many-arguments
   550          self,
   551          artifacts,  # artifacts folder (in build)
   552          build_log,  # build-log.txt (in build)
   553          pr_path,  # path to build
   554          finished,  # finished.json (metadata from end of build)
   555          latest,   # latest-build.txt (in job)
   556          pr_build_link,  # file containng pr_path (in job directory)
   557          pr_latest,  # latest-build.txt (in pr job)
   558          pr_result_cache,  # jobResultsCache.json (in pr job)
   559          result_cache,  # jobResultsCache.json (cache of latest results in job)
   560          started,  # started.json  (metadata from start of build)
   561      ):
   562          self.artifacts = artifacts
   563          self.build_log = build_log
   564          self.pr_path = pr_path
   565          self.finished = finished
   566          self.latest = latest
   567          self.pr_build_link = pr_build_link
   568          self.pr_latest = pr_latest
   569          self.pr_result_cache = pr_result_cache
   570          self.result_cache = result_cache
   571          self.started = started
   572  
   573  
   574  
   575  def ci_paths(base, job, build):
   576      """Return a Paths() instance for a continuous build."""
   577      latest = os.path.join(base, job, 'latest-build.txt')
   578      return Paths(
   579          artifacts=os.path.join(base, job, build, 'artifacts'),
   580          build_log=os.path.join(base, job, build, 'build-log.txt'),
   581          pr_path=None,
   582          finished=os.path.join(base, job, build, 'finished.json'),
   583          latest=latest,
   584          pr_build_link=None,
   585          pr_latest=None,
   586          pr_result_cache=None,
   587          result_cache=os.path.join(base, job, 'jobResultsCache.json'),
   588          started=os.path.join(base, job, build, 'started.json'),
   589      )
   590  
   591  
   592  
   593  def pr_paths(base, repos, job, build):
   594      """Return a Paths() instance for a PR."""
   595      if not repos:
   596          raise ValueError('repos is empty')
   597      repo = repos.main
   598      pull = str(repos[repo][1])
   599      if repo in ['k8s.io/kubernetes', 'kubernetes/kubernetes']:
   600          prefix = ''
   601      elif repo.startswith('k8s.io/'):
   602          prefix = repo[len('k8s.io/'):]
   603      elif repo.startswith('kubernetes/'):
   604          prefix = repo[len('kubernetes/'):]
   605      elif repo.startswith('github.com/'):
   606          prefix = repo[len('github.com/'):].replace('/', '_')
   607      else:
   608          prefix = repo.replace('/', '_')
   609      # Batch merges are those with more than one PR specified.
   610      pr_nums = pull_numbers(pull)
   611      if len(pr_nums) > 1:
   612          pull = os.path.join(prefix, 'batch')
   613      else:
   614          pull = os.path.join(prefix, pr_nums[0])
   615      pr_path = os.path.join(base, 'pull', pull, job, build)
   616      result_cache = os.path.join(
   617              base, 'directory', job, 'jobResultsCache.json')
   618      pr_result_cache = os.path.join(
   619              base, 'pull', pull, job, 'jobResultsCache.json')
   620      return Paths(
   621          artifacts=os.path.join(pr_path, 'artifacts'),
   622          build_log=os.path.join(pr_path, 'build-log.txt'),
   623          pr_path=pr_path,
   624          finished=os.path.join(pr_path, 'finished.json'),
   625          latest=os.path.join(base, 'directory', job, 'latest-build.txt'),
   626          pr_build_link=os.path.join(base, 'directory', job, '%s.txt' % build),
   627          pr_latest=os.path.join(base, 'pull', pull, job, 'latest-build.txt'),
   628          pr_result_cache=pr_result_cache,
   629          result_cache=result_cache,
   630          started=os.path.join(pr_path, 'started.json'),
   631      )
   632  
   633  
   634  
   635  BUILD_ENV = 'BUILD_NUMBER'
   636  BOOTSTRAP_ENV = 'BOOTSTRAP_MIGRATION'
   637  CLOUDSDK_ENV = 'CLOUDSDK_CONFIG'
   638  GCE_KEY_ENV = 'JENKINS_GCE_SSH_PRIVATE_KEY_FILE'
   639  GUBERNATOR = 'https://k8s-gubernator.appspot.com/build'
   640  HOME_ENV = 'HOME'
   641  JENKINS_HOME_ENV = 'JENKINS_HOME'
   642  JOB_ENV = 'JOB_NAME'
   643  NODE_ENV = 'NODE_NAME'
   644  SERVICE_ACCOUNT_ENV = 'GOOGLE_APPLICATION_CREDENTIALS'
   645  WORKSPACE_ENV = 'WORKSPACE'
   646  GCS_ARTIFACTS_ENV = 'GCS_ARTIFACTS_DIR'
   647  
   648  
   649  def build_name(started):
   650      """Return the unique(ish) string representing this build."""
   651      # TODO(fejta): right now jenkins sets the BUILD_NUMBER and does this
   652      #              in an environment variable. Consider migrating this to a
   653      #              bootstrap.py flag
   654      if BUILD_ENV not in os.environ:
   655          # Automatically generate a build number if none is set
   656          uniq = '%x-%d' % (hash(node()), os.getpid())
   657          autogen = time.strftime('%Y%m%d-%H%M%S-' + uniq, time.gmtime(started))
   658          os.environ[BUILD_ENV] = autogen
   659      return os.environ[BUILD_ENV]
   660  
   661  
   662  def setup_credentials(call, robot, upload):
   663      """Activate the service account unless robot is none."""
   664      # TODO(fejta): stop activating inside the image
   665      # TODO(fejta): allow use of existing gcloud auth
   666      if robot:
   667          os.environ[SERVICE_ACCOUNT_ENV] = robot
   668      if not os.getenv(SERVICE_ACCOUNT_ENV) and upload:
   669          logging.warning('Cannot --upload=%s, no active gcloud account.', upload)
   670          raise ValueError('--upload requires --service-account')
   671      if not os.getenv(SERVICE_ACCOUNT_ENV) and not upload:
   672          logging.info('Will not upload results.')
   673          return
   674      if not os.path.isfile(os.environ[SERVICE_ACCOUNT_ENV]):
   675          raise IOError(
   676              'Cannot find service account credentials',
   677              os.environ[SERVICE_ACCOUNT_ENV],
   678              'Create service account and then create key at '
   679              'https://console.developers.google.com/iam-admin/serviceaccounts/project',  # pylint: disable=line-too-long
   680          )
   681      call([
   682          'gcloud',
   683          'auth',
   684          'activate-service-account',
   685          '--key-file=%s' % os.environ[SERVICE_ACCOUNT_ENV],
   686      ])
   687      try:  # Old versions of gcloud may not support this value
   688          account = call(
   689              ['gcloud', 'config', 'get-value', 'account'], output=True).strip()
   690      except subprocess.CalledProcessError:
   691          account = 'unknown'
   692      logging.info('Will upload results to %s using %s', upload, account)
   693  
   694  
   695  def setup_logging(path):
   696      """Initialize logging to screen and path."""
   697      # See https://docs.python.org/2/library/logging.html#logrecord-attributes
   698      # [IWEF]mmdd HH:MM:SS.mmm] msg
   699      fmt = '%(levelname).1s%(asctime)s.%(msecs)03d] %(message)s'  # pylint: disable=line-too-long
   700      datefmt = '%m%d %H:%M:%S'
   701      logging.basicConfig(
   702          level=logging.INFO,
   703          format=fmt,
   704          datefmt=datefmt,
   705      )
   706      build_log = logging.FileHandler(filename=path, mode='w')
   707      build_log.setLevel(logging.INFO)
   708      formatter = logging.Formatter(fmt, datefmt=datefmt)
   709      build_log.setFormatter(formatter)
   710      logging.getLogger('').addHandler(build_log)
   711      return build_log
   712  
   713  
   714  def setup_magic_environment(job):
   715      """Set magic environment variables scripts currently expect."""
   716      home = os.environ[HOME_ENV]
   717      # TODO(fejta): jenkins sets these values. Consider migrating to using
   718      #              a secret volume instead and passing the path to this volume
   719      #              into bootstrap.py as a flag.
   720      os.environ.setdefault(
   721          GCE_KEY_ENV,
   722          os.path.join(home, '.ssh/google_compute_engine'),
   723      )
   724      os.environ.setdefault(
   725          'JENKINS_GCE_SSH_PUBLIC_KEY_FILE',
   726          os.path.join(home, '.ssh/google_compute_engine.pub'),
   727      )
   728      os.environ.setdefault(
   729          'JENKINS_AWS_SSH_PRIVATE_KEY_FILE',
   730          os.path.join(home, '.ssh/kube_aws_rsa'),
   731      )
   732      os.environ.setdefault(
   733          'JENKINS_AWS_SSH_PUBLIC_KEY_FILE',
   734          os.path.join(home, '.ssh/kube_aws_rsa.pub'),
   735      )
   736  
   737  
   738      cwd = os.getcwd()
   739      # TODO(fejta): jenkins sets WORKSPACE and pieces of our infra expect this
   740      #              value. Consider doing something else in the future.
   741      # Furthermore, in the Jenkins and Prow environments, this is already set
   742      # to something reasonable, but using cwd will likely cause all sorts of
   743      # problems. Thus, only set this if we really need to.
   744      if WORKSPACE_ENV not in os.environ:
   745          os.environ[WORKSPACE_ENV] = cwd
   746      # By default, Jenkins sets HOME to JENKINS_HOME, which is shared by all
   747      # jobs. To avoid collisions, set it to the cwd instead, but only when
   748      # running on Jenkins.
   749      if os.environ.get(HOME_ENV, None) == os.environ.get(JENKINS_HOME_ENV, None):
   750          os.environ[HOME_ENV] = cwd
   751      # TODO(fejta): jenkins sets JOB_ENV and pieces of our infra expect this
   752      #              value. Consider making everything below here agnostic to the
   753      #              job name.
   754      if JOB_ENV not in os.environ:
   755          os.environ[JOB_ENV] = job
   756      elif os.environ[JOB_ENV] != job:
   757          logging.warning('%s=%s (overrides %s)', JOB_ENV, job, os.environ[JOB_ENV])
   758          os.environ[JOB_ENV] = job
   759      # TODO(fejta): Magic value to tell our test code not do upload started.json
   760      # TODO(fejta): delete upload-to-gcs.sh and then this value.
   761      os.environ[BOOTSTRAP_ENV] = 'yes'
   762      # This helps prevent reuse of cloudsdk configuration. It also reduces the
   763      # risk that running a job on a workstation corrupts the user's config.
   764      os.environ[CLOUDSDK_ENV] = '%s/.config/gcloud' % cwd
   765  
   766  
   767  def job_args(args):
   768      """Converts 'a ${FOO} $bar' into 'a wildly different string'."""
   769      return [os.path.expandvars(a) for a in args]
   770  
   771  
   772  def job_script(job):
   773      """Return path to script for job."""
   774      with open(test_infra('jobs/config.json')) as fp:
   775          config = json.loads(fp.read())
   776      job_config = config[job]
   777      cmd = test_infra('scenarios/%s.py' % job_config['scenario'])
   778      return [cmd] + job_args(job_config.get('args', []))
   779  
   780  
   781  def gubernator_uri(paths):
   782      """Return a gubernator link for this build."""
   783      job = os.path.dirname(paths.build_log)
   784      if job.startswith('gs:/'):
   785          return job.replace('gs:/', GUBERNATOR, 1)
   786      return job
   787  
   788  
   789  @contextlib.contextmanager
   790  def choose_ssh_key(ssh):
   791      """Creates a script for GIT_SSH that uses -i ssh if set."""
   792      if not ssh:  # Nothing to do
   793          yield
   794          return
   795  
   796      # Create a script for use with GIT_SSH, which defines the program git uses
   797      # during git fetch. In the future change this to GIT_SSH_COMMAND
   798      # https://superuser.com/questions/232373/how-to-tell-git-which-private-key-to-use
   799      with tempfile.NamedTemporaryFile(prefix='ssh', delete=False) as fp:
   800          fp.write('#!/bin/sh\nssh -o StrictHostKeyChecking=no -i \'%s\' -F /dev/null "${@}"\n' % ssh)
   801      try:
   802          os.chmod(fp.name, 0500)
   803          had = 'GIT_SSH' in os.environ
   804          old = os.getenv('GIT_SSH')
   805          os.environ['GIT_SSH'] = fp.name
   806  
   807          yield
   808  
   809          del os.environ['GIT_SSH']
   810          if had:
   811              os.environ['GIT_SSH'] = old
   812      finally:
   813          os.unlink(fp.name)
   814  
   815  
   816  def setup_root(call, root, repos, ssh, git_cache, clean):
   817      """Create root dir, checkout repo and cd into resulting dir."""
   818      if not os.path.exists(root):
   819          os.makedirs(root)
   820      root_dir = os.path.realpath(root)
   821      logging.info('Root: %s', root_dir)
   822      os.chdir(root_dir)
   823      logging.info('cd to %s', root_dir)
   824  
   825      with choose_ssh_key(ssh):
   826          for repo, (branch, pull) in repos.items():
   827              os.chdir(root_dir)
   828              logging.info(
   829                  'Checkout: %s %s',
   830                  os.path.join(root_dir, repo),
   831                  pull and pull or branch)
   832              checkout(call, repo, branch, pull, ssh, git_cache, clean)
   833      if len(repos) > 1:  # cd back into the primary repo
   834          os.chdir(root_dir)
   835          os.chdir(repos.main)
   836  
   837  
   838  class Repos(dict):
   839      """{"repo": (branch, pull)} dict with a .main attribute."""
   840      main = ''
   841  
   842      def __setitem__(self, k, v):
   843          if not self:
   844              self.main = k
   845          return super(Repos, self).__setitem__(k, v)
   846  
   847  
   848  def parse_repos(args):
   849      """Convert --repo=foo=this,123:abc,555:ddd into a Repos()."""
   850      repos = args.repo or {}
   851      if not repos and not args.bare:
   852          raise ValueError('--bare or --repo required')
   853      ret = Repos()
   854      if len(repos) != 1:
   855          if args.pull:
   856              raise ValueError('Multi --repo does not support --pull, use --repo=R=branch,p1,p2')
   857          if args.branch:
   858              raise ValueError('Multi --repo does not support --branch, use --repo=R=branch')
   859      elif len(repos) == 1 and (args.branch or args.pull):
   860          repo = repos[0]
   861          if '=' in repo or ':' in repo:
   862              raise ValueError('--repo cannot contain = or : with --branch or --pull')
   863          ret[repo] = (args.branch, args.pull)
   864          return ret
   865      for repo in repos:
   866          mat = re.match(r'([^=]+)(=([^:,~^\s]+(:[0-9a-fA-F]+)?(,|$))+)?$', repo)
   867          if not mat:
   868              raise ValueError('bad repo', repo, repos)
   869          this_repo = mat.group(1)
   870          if not mat.group(2):
   871              ret[this_repo] = ('master', '')
   872              continue
   873          commits = mat.group(2)[1:].split(',')
   874          if len(commits) == 1:
   875              # Checking out a branch, possibly at a specific commit
   876              ret[this_repo] = (commits[0], '')
   877              continue
   878          # Checking out one or more PRs
   879          ret[this_repo] = ('', ','.join(commits))
   880      return ret
   881  
   882  
   883  def bootstrap(args):
   884      """Clone repo at pull/branch into root and run job script."""
   885      # pylint: disable=too-many-locals,too-many-branches,too-many-statements
   886      job = args.job
   887      repos = parse_repos(args)
   888      upload = args.upload
   889  
   890      build_log_path = os.path.abspath('build-log.txt')
   891      build_log = setup_logging(build_log_path)
   892      started = time.time()
   893      if args.timeout:
   894          end = started + args.timeout * 60
   895      else:
   896          end = 0
   897      call = lambda *a, **kw: _call(end, *a, **kw)
   898      gsutil = GSUtil(call)
   899  
   900      logging.info('Bootstrap %s...', job)
   901      build = build_name(started)
   902  
   903      if upload:
   904          if repos and repos[repos.main][1]:  # merging commits, a pr
   905              paths = pr_paths(upload, repos, job, build)
   906          else:
   907              paths = ci_paths(upload, job, build)
   908          logging.info('Gubernator results at %s', gubernator_uri(paths))
   909          # TODO(fejta): Replace env var below with a flag eventually.
   910          os.environ[GCS_ARTIFACTS_ENV] = paths.artifacts
   911  
   912      version = 'unknown'
   913      exc_type = None
   914      setup_creds = False
   915  
   916      try:
   917          setup_root(call, args.root, repos, args.ssh, args.git_cache, args.clean)
   918          logging.info('Configure environment...')
   919          if repos:
   920              version = find_version(call)
   921          else:
   922              version = ''
   923          setup_magic_environment(job)
   924          setup_credentials(call, args.service_account, upload)
   925          setup_creds = True
   926          logging.info('Start %s at %s...', build, version)
   927          if upload:
   928              start(gsutil, paths, started, node(), version, repos)
   929          success = False
   930          try:
   931              call(job_script(job))
   932              logging.info('PASS: %s', job)
   933              success = True
   934          except subprocess.CalledProcessError:
   935              logging.error('FAIL: %s', job)
   936      except Exception:  # pylint: disable=broad-except
   937          exc_type, exc_value, exc_traceback = sys.exc_info()
   938          logging.exception('unexpected error')
   939          success = False
   940      if not setup_creds:
   941          setup_credentials(call, args.service_account, upload)
   942      if upload:
   943          logging.info('Upload result and artifacts...')
   944          logging.info('Gubernator results at %s', gubernator_uri(paths))
   945          try:
   946              finish(
   947                  gsutil, paths, success,
   948                  os.path.join(os.getenv(WORKSPACE_ENV, os.getcwd()), '_artifacts'),
   949                  build, version, repos, call
   950                  )
   951          except subprocess.CalledProcessError:  # Still try to upload build log
   952              success = False
   953      logging.getLogger('').removeHandler(build_log)
   954      build_log.close()
   955      if upload:
   956          gsutil.copy_file(paths.build_log, build_log_path)
   957      if exc_type:
   958          raise exc_type, exc_value, exc_traceback  # pylint: disable=raising-bad-type
   959      if not success:
   960          # TODO(fejta/spxtr): we should distinguish infra and non-infra problems
   961          # by exit code and automatically retrigger after an infra-problem.
   962          sys.exit(1)
   963  
   964  def parse_args(arguments=None):
   965      """Parse arguments or sys.argv[1:]."""
   966      parser = argparse.ArgumentParser()
   967      parser.add_argument('--root', default='.', help='Root dir to work with')
   968      parser.add_argument(
   969          '--timeout', type=float, default=0, help='Timeout in minutes if set')
   970      parser.add_argument(
   971          '--repo',
   972          action='append',
   973          help='Fetch the specified repositories, with the first one considered primary')
   974      parser.add_argument(
   975          '--bare',
   976          action='store_true',
   977          help='Do not check out a repository')
   978      parser.add_argument('--job', required=True, help='Name of the job to run')
   979      parser.add_argument(
   980          '--upload',
   981          help='Upload results here if set, requires --service-account')
   982      parser.add_argument(
   983          '--service-account',
   984          help='Activate and use path/to/service-account.json if set.')
   985      parser.add_argument(
   986          '--ssh',
   987          help='Use the ssh key to fetch the repository instead of https if set.')
   988      parser.add_argument(
   989          '--git-cache',
   990          help='Location of the git cache.')
   991      parser.add_argument(
   992          '--clean',
   993          action='store_true',
   994          help='Clean the git repo before running tests.')
   995      args = parser.parse_args(arguments)
   996      # --pull is deprecated, use --repo=k8s.io/foo=master:abcd,12:ef12,45:ff65
   997      setattr(args, 'pull', None)
   998      # --branch is deprecated, use --repo=k8s.io/foo=master
   999      setattr(args, 'branch', None)
  1000      if bool(args.repo) == bool(args.bare):
  1001          raise argparse.ArgumentTypeError(
  1002              'Expected --repo xor --bare:', args.repo, args.bare)
  1003      return args
  1004  
  1005  
  1006  if __name__ == '__main__':
  1007      ARGS = parse_args()
  1008      bootstrap(ARGS)