github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/jenkins/bootstrap.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2016 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Need to figure out why this only fails on travis 18 # pylint: disable=bad-continuation 19 20 """Bootstraps starting a test job. 21 22 The following should already be done: 23 git checkout http://k8s.io/test-infra 24 cd $WORKSPACE 25 test-infra/jenkins/bootstrap.py <--repo=R || --bare> <--job=J> <--pull=P || --branch=B> 26 27 The bootstrapper now does the following: 28 # Note start time 29 # check out repoes defined in --repo 30 # note job started 31 # call runner defined in $JOB.json 32 # upload artifacts (this will change later) 33 # upload build-log.txt 34 # note job ended 35 36 The contract with the runner is as follows: 37 * Runner must exit non-zero if job fails for any reason. 38 """ 39 40 41 import argparse 42 import contextlib 43 import json 44 import logging 45 import os 46 import pipes 47 import random 48 import re 49 import select 50 import signal 51 import socket 52 import subprocess 53 import sys 54 import tempfile 55 import time 56 57 ORIG_CWD = os.getcwd() # Checkout changes cwd 58 59 60 def read_all(end, stream, append): 61 """Read all buffered lines from a stream.""" 62 while not end or time.time() < end: 63 line = stream.readline() 64 if not line: 65 return True # Read everything 66 # Strip \n at the end if any. Last line of file may not have one. 67 append(line.rstrip('\n')) 68 # Is there more on the buffer? 69 ret = select.select([stream.fileno()], [], [], 0.1) 70 if not ret[0]: 71 return False # Cleared buffer but not at the end 72 return False # Time expired 73 74 75 def elapsed(since): 76 """Return the number of minutes elapsed since a time.""" 77 return (time.time() - since) / 60 78 79 80 def terminate(end, proc, kill): 81 """Terminate or kill the process after end.""" 82 if not end or time.time() <= end: 83 return False 84 if kill: # Process will not die, kill everything 85 pgid = os.getpgid(proc.pid) 86 logging.info( 87 'Kill %d and process group %d', proc.pid, pgid) 88 os.killpg(pgid, signal.SIGKILL) 89 proc.kill() 90 return True 91 logging.info( 92 'Terminate %d on timeout', proc.pid) 93 proc.terminate() 94 return True 95 96 97 def _call(end, cmd, stdin=None, check=True, output=None, log_failures=True): 98 """Start a subprocess.""" 99 logging.info('Call: %s', ' '.join(pipes.quote(c) for c in cmd)) 100 begin = time.time() 101 if end: 102 end = max(end, time.time() + 60) # Allow at least 60s per command 103 proc = subprocess.Popen( 104 cmd, 105 stdin=subprocess.PIPE if stdin is not None else None, 106 stdout=subprocess.PIPE, 107 stderr=subprocess.PIPE, 108 preexec_fn=os.setsid, 109 ) 110 if stdin: 111 proc.stdin.write(stdin) 112 proc.stdin.close() 113 out = [] 114 code = None 115 timeout = False 116 reads = { 117 proc.stderr.fileno(): (proc.stderr, logging.warning), 118 proc.stdout.fileno(): ( 119 proc.stdout, (out.append if output else logging.info)), 120 } 121 while reads: 122 if terminate(end, proc, timeout): 123 if timeout: # We killed everything 124 break 125 # Give subprocess some cleanup time before killing. 126 end = time.time() + 15 * 60 127 timeout = True 128 ret = select.select(reads, [], [], 0.1) 129 for fdesc in ret[0]: 130 if read_all(end, *reads[fdesc]): 131 reads.pop(fdesc) 132 if not ret[0] and proc.poll() is not None: 133 break # process exited without closing pipes (timeout?) 134 135 code = proc.wait() 136 if timeout: 137 code = code or 124 138 logging.error('Build timed out') 139 if code and log_failures: 140 logging.error('Command failed') 141 logging.info( 142 'process %d exited with code %d after %.1fm', 143 proc.pid, code, elapsed(begin)) 144 out.append('') 145 lines = output and '\n'.join(out) 146 if check and code: 147 raise subprocess.CalledProcessError(code, cmd, lines) 148 return lines 149 150 151 def ref_has_shas(ref): 152 """Determine if a reference specifies shas (contains ':')""" 153 return isinstance(ref, basestring) and ':' in ref 154 155 156 def pull_numbers(pull): 157 """Turn a pull reference list into a list of PR numbers to merge.""" 158 if ref_has_shas(pull): 159 return [r.split(':')[0] for r in pull.split(',')][1:] 160 return [str(pull)] 161 162 163 def pull_ref(pull): 164 """Turn a PR number of list of refs into specific refs to fetch and check out.""" 165 if isinstance(pull, int) or ',' not in pull: 166 return ['+refs/pull/%d/merge' % int(pull)], ['FETCH_HEAD'] 167 pulls = pull.split(',') 168 refs = [] 169 checkouts = [] 170 for ref in pulls: 171 if ':' in ref: # master:abcd or 1234:abcd 172 name, sha = ref.split(':') 173 elif not refs: # master 174 name, sha = ref, 'FETCH_HEAD' 175 else: 176 name = ref 177 sha = 'refs/pr/%s' % ref 178 179 checkouts.append(sha) 180 if not refs: # First ref should be branch to merge into 181 refs.append(name) 182 else: # Subsequent refs should be PR numbers 183 num = int(name) 184 refs.append('+refs/pull/%d/head:refs/pr/%d' % (num, num)) 185 return refs, checkouts 186 187 188 def branch_ref(branch): 189 """Split branch:sha if necessary.""" 190 if ref_has_shas(branch): 191 split_refs = branch.split(':') 192 return [split_refs[0]], [split_refs[1]] 193 return [branch], ['FETCH_HEAD'] 194 195 196 def repository(repo, ssh): 197 """Return the url associated with the repo.""" 198 if repo.startswith('k8s.io/'): 199 repo = 'github.com/kubernetes/%s' % (repo[len('k8s.io/'):]) 200 if ssh: 201 if ":" not in repo: 202 parts = repo.split('/', 1) 203 repo = '%s:%s' % (parts[0], parts[1]) 204 return 'git@%s' % repo 205 return 'https://%s' % repo 206 207 208 def random_sleep(attempt): 209 """Sleep 2**attempt seconds with a random fractional offset.""" 210 time.sleep(random.random() + attempt ** 2) 211 212 213 def checkout(call, repo, branch, pull, ssh='', git_cache='', clean=False): 214 """Fetch and checkout the repository at the specified branch/pull.""" 215 # pylint: disable=too-many-locals 216 if bool(branch) == bool(pull): 217 raise ValueError('Must specify one of --branch or --pull') 218 219 if pull: 220 refs, checkouts = pull_ref(pull) 221 else: 222 refs, checkouts = branch_ref(branch) 223 224 git = 'git' 225 if git_cache: 226 cache_dir = '%s/%s' % (git_cache, repo) 227 try: 228 os.makedirs(cache_dir) 229 except OSError: 230 pass 231 call([git, 'init', repo, '--separate-git-dir=%s' % cache_dir]) 232 call(['rm', '-f', '%s/index.lock' % cache_dir]) 233 else: 234 call([git, 'init', repo]) 235 os.chdir(repo) 236 237 if clean: 238 call([git, 'clean', '-dfx']) 239 call([git, 'reset', '--hard']) 240 241 # To make a merge commit, a user needs to be set. It's okay to use a dummy 242 # user here, since we're not exporting the history. 243 call([git, 'config', '--local', 'user.name', 'K8S Bootstrap']) 244 call([git, 'config', '--local', 'user.email', 'k8s_bootstrap@localhost']) 245 retries = 3 246 for attempt in range(retries): 247 try: 248 call([git, 'fetch', '--quiet', '--tags', repository(repo, ssh)] + refs) 249 break 250 except subprocess.CalledProcessError as cpe: 251 if attempt >= retries - 1: 252 raise 253 if cpe.returncode != 128: 254 raise 255 logging.warning('git fetch failed') 256 random_sleep(attempt) 257 call([git, 'checkout', '-B', 'test', checkouts[0]]) 258 for ref, head in zip(refs, checkouts)[1:]: 259 call(['git', 'merge', '--no-ff', '-m', 'Merge %s' % ref, head]) 260 261 262 def repos_dict(repos): 263 """Returns {"repo1": "branch", "repo2": "pull"}.""" 264 return {r: b or p for (r, (b, p)) in repos.items()} 265 266 267 def start(gsutil, paths, stamp, node_name, version, repos): 268 """Construct and upload started.json.""" 269 data = { 270 'timestamp': int(stamp), 271 'jenkins-node': node_name, 272 'node': node_name, 273 } 274 if version: 275 data['repo-version'] = version 276 data['version'] = version # TODO(fejta): retire 277 if repos: 278 pull = repos[repos.main] 279 if ref_has_shas(pull[1]): 280 data['pull'] = pull[1] 281 data['repos'] = repos_dict(repos) 282 283 gsutil.upload_json(paths.started, data) 284 # Upload a link to the build path in the directory 285 if paths.pr_build_link: 286 gsutil.upload_text( 287 paths.pr_build_link, 288 paths.pr_path, 289 additional_headers=['-h', 'x-goog-meta-link: %s' % paths.pr_path] 290 ) 291 292 293 class GSUtil(object): 294 """A helper class for making gsutil commands.""" 295 gsutil = 'gsutil' 296 297 def __init__(self, call): 298 self.call = call 299 300 def stat(self, path): 301 """Return metadata about the object, such as generation.""" 302 cmd = [self.gsutil, 'stat', path] 303 return self.call(cmd, output=True, log_failures=False) 304 305 def ls(self, path): 306 """List a bucket or subdir.""" 307 cmd = [self.gsutil, 'ls', path] 308 return self.call(cmd, output=True) 309 310 def upload_json(self, path, jdict, generation=None): 311 """Upload the dictionary object to path.""" 312 if generation is not None: # generation==0 means object does not exist 313 gen = ['-h', 'x-goog-if-generation-match:%s' % generation] 314 else: 315 gen = [] 316 cmd = [ 317 self.gsutil, '-q', 318 '-h', 'Content-Type:application/json'] + gen + [ 319 'cp', '-', path] 320 self.call(cmd, stdin=json.dumps(jdict, indent=2)) 321 322 def copy_file(self, dest, orig): 323 """Copy the file to the specified path using compressed encoding.""" 324 cmd = [self.gsutil, '-q', 'cp', '-Z', orig, dest] 325 self.call(cmd) 326 327 def upload_text(self, path, txt, additional_headers=None, cached=True): 328 """Copy the text to path, optionally disabling caching.""" 329 headers = ['-h', 'Content-Type:text/plain'] 330 if not cached: 331 headers += ['-h', 'Cache-Control:private, max-age=0, no-transform'] 332 if additional_headers: 333 headers += additional_headers 334 cmd = [self.gsutil, '-q'] + headers + ['cp', '-', path] 335 self.call(cmd, stdin=txt) 336 337 def cat(self, path, generation): 338 """Return contents of path#generation""" 339 cmd = [self.gsutil, '-q', 'cat', '%s#%s' % (path, generation)] 340 return self.call(cmd, output=True) 341 342 343 def upload_artifacts(self, gsutil, path, artifacts): 344 """Upload artifacts to the specified path.""" 345 # Upload artifacts 346 if not os.path.isdir(artifacts): 347 logging.warning('Artifacts dir %s is missing.', artifacts) 348 return 349 try: 350 # If remote path exists, it will create .../_artifacts subdir instead 351 gsutil.ls(path) 352 # Success means remote path exists 353 remote_base = os.path.basename(path) 354 local_base = os.path.basename(artifacts) 355 if remote_base != local_base: 356 # if basename are different, need to copy things over first. 357 localpath = artifacts.replace(local_base, remote_base) 358 os.rename(artifacts, localpath) 359 artifacts = localpath 360 path = path[:-len(remote_base + '/')] 361 except subprocess.CalledProcessError: 362 logging.warning('Remote dir %s not exist yet', path) 363 cmd = [ 364 self.gsutil, '-m', '-q', 365 '-o', 'GSUtil:use_magicfile=True', 366 'cp', '-r', '-c', '-z', 'log,txt,xml', 367 artifacts, path, 368 ] 369 self.call(cmd) 370 371 372 def append_result(gsutil, path, build, version, passed): 373 """Download a json list and append metadata about this build to it.""" 374 # TODO(fejta): delete the clone of this logic in upload-to-gcs.sh 375 # (this is update_job_result_cache) 376 end = time.time() + 300 # try for up to five minutes 377 errors = 0 378 while time.time() < end: 379 if errors: 380 random_sleep(min(errors, 3)) 381 try: 382 out = gsutil.stat(path) 383 gen = re.search(r'Generation:\s+(\d+)', out).group(1) 384 except subprocess.CalledProcessError: 385 gen = 0 386 if gen: 387 try: 388 cache = json.loads(gsutil.cat(path, gen)) 389 if not isinstance(cache, list): 390 raise ValueError(cache) 391 except ValueError as exc: 392 logging.warning('Failed to decode JSON: %s', exc) 393 cache = [] 394 except subprocess.CalledProcessError: # gen doesn't exist 395 errors += 1 396 continue 397 else: 398 cache = [] 399 cache.append({ 400 'version': version, # TODO(fejta): retire 401 'job-version': version, 402 'buildnumber': build, 403 'passed': bool(passed), 404 'result': 'SUCCESS' if passed else 'FAILURE', 405 }) 406 cache = cache[-300:] 407 try: 408 gsutil.upload_json(path, cache, generation=gen) 409 return 410 except subprocess.CalledProcessError: 411 logging.warning('Failed to append to %s#%s', path, gen) 412 errors += 1 413 414 415 def metadata(repos, artifacts, call): 416 """Return metadata associated for the build, including inside artifacts.""" 417 path = os.path.join(artifacts or '', 'metadata.json') 418 meta = None 419 if os.path.isfile(path): 420 try: 421 with open(path) as fp: 422 meta = json.loads(fp.read()) 423 except (IOError, ValueError): 424 pass 425 426 if not meta or not isinstance(meta, dict): 427 meta = {} 428 if repos: 429 meta['repo'] = repos.main 430 meta['repos'] = repos_dict(repos) 431 432 try: 433 commit = call(['git', 'rev-parse', 'HEAD'], output=True) 434 if commit: 435 meta['repo-commit'] = commit.strip() 436 except subprocess.CalledProcessError: 437 pass 438 439 cwd = os.getcwd() 440 os.chdir(test_infra('.')) 441 try: 442 commit = call(['git', 'rev-parse', 'HEAD'], output=True) 443 if commit: 444 meta['infra-commit'] = commit.strip()[:9] 445 except subprocess.CalledProcessError: 446 pass 447 os.chdir(cwd) 448 449 return meta 450 451 452 def finish(gsutil, paths, success, artifacts, build, version, repos, call): 453 """ 454 Args: 455 paths: a Paths instance. 456 success: the build passed if true. 457 artifacts: a dir containing artifacts to upload. 458 build: identifier of this build. 459 version: identifies what version of the code the build tested. 460 repo: the target repo 461 """ 462 463 if os.path.isdir(artifacts) and any(f for _, _, f in os.walk(artifacts)): 464 try: 465 gsutil.upload_artifacts(gsutil, paths.artifacts, artifacts) 466 except subprocess.CalledProcessError: 467 logging.warning('Failed to upload artifacts') 468 else: 469 logging.warning('Missing local artifacts : %s', artifacts) 470 471 meta = metadata(repos, artifacts, call) 472 if not version: 473 version = meta.get('job-version') 474 if not version: # TODO(fejta): retire 475 version = meta.get('version') 476 # github.com/kubernetes/release/find_green_build depends on append_result() 477 # TODO(fejta): reconsider whether this is how we want to solve this problem. 478 append_result(gsutil, paths.result_cache, build, version, success) 479 if paths.pr_result_cache: 480 append_result(gsutil, paths.pr_result_cache, build, version, success) 481 482 data = { 483 # TODO(fejta): update utils.go in contrib to accept a float 484 'timestamp': int(time.time()), 485 'result': 'SUCCESS' if success else 'FAILURE', 486 'passed': bool(success), 487 'metadata': meta, 488 } 489 if version: 490 data['job-version'] = version 491 data['version'] = version # TODO(fejta): retire 492 gsutil.upload_json(paths.finished, data) 493 494 # Upload the latest build for the job. 495 # Do this last, since other tools expect the rest of the data to be 496 # published when this file is created. 497 for path in {paths.latest, paths.pr_latest}: 498 if path: 499 try: 500 gsutil.upload_text(path, str(build), cached=False) 501 except subprocess.CalledProcessError: 502 logging.warning('Failed to update %s', path) 503 504 505 def test_infra(*paths): 506 """Return path relative to root of test-infra repo.""" 507 return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths) 508 509 510 def node(): 511 """Return the name of the node running the build.""" 512 # TODO(fejta): jenkins sets the node name and our infra expect this value. 513 # TODO(fejta): Consider doing something different here. 514 if NODE_ENV not in os.environ: 515 os.environ[NODE_ENV] = ''.join(socket.gethostname().split('.')[:1]) 516 return os.environ[NODE_ENV] 517 518 519 def find_version(call): 520 """Determine and return the version of the build.""" 521 # TODO(fejta): once job-version is functional switch this to 522 # git rev-parse [--short=N] HEAD^{commit} 523 version_file = 'version' 524 if os.path.isfile(version_file): 525 # e2e tests which download kubernetes use this path: 526 with open(version_file) as fp: 527 return fp.read().strip() 528 529 version_script = 'hack/lib/version.sh' 530 if os.path.isfile(version_script): 531 cmd = [ 532 'bash', '-c', ( 533 """ 534 set -o errexit 535 set -o nounset 536 export KUBE_ROOT=. 537 source %s 538 kube::version::get_version_vars 539 echo $KUBE_GIT_VERSION 540 """ % version_script) 541 ] 542 return call(cmd, output=True).strip() 543 544 return 'unknown' 545 546 547 class Paths(object): # pylint: disable=too-many-instance-attributes,too-few-public-methods 548 """Links to remote gcs-paths for uploading results.""" 549 def __init__( # pylint: disable=too-many-arguments 550 self, 551 artifacts, # artifacts folder (in build) 552 build_log, # build-log.txt (in build) 553 pr_path, # path to build 554 finished, # finished.json (metadata from end of build) 555 latest, # latest-build.txt (in job) 556 pr_build_link, # file containng pr_path (in job directory) 557 pr_latest, # latest-build.txt (in pr job) 558 pr_result_cache, # jobResultsCache.json (in pr job) 559 result_cache, # jobResultsCache.json (cache of latest results in job) 560 started, # started.json (metadata from start of build) 561 ): 562 self.artifacts = artifacts 563 self.build_log = build_log 564 self.pr_path = pr_path 565 self.finished = finished 566 self.latest = latest 567 self.pr_build_link = pr_build_link 568 self.pr_latest = pr_latest 569 self.pr_result_cache = pr_result_cache 570 self.result_cache = result_cache 571 self.started = started 572 573 574 575 def ci_paths(base, job, build): 576 """Return a Paths() instance for a continuous build.""" 577 latest = os.path.join(base, job, 'latest-build.txt') 578 return Paths( 579 artifacts=os.path.join(base, job, build, 'artifacts'), 580 build_log=os.path.join(base, job, build, 'build-log.txt'), 581 pr_path=None, 582 finished=os.path.join(base, job, build, 'finished.json'), 583 latest=latest, 584 pr_build_link=None, 585 pr_latest=None, 586 pr_result_cache=None, 587 result_cache=os.path.join(base, job, 'jobResultsCache.json'), 588 started=os.path.join(base, job, build, 'started.json'), 589 ) 590 591 592 593 def pr_paths(base, repos, job, build): 594 """Return a Paths() instance for a PR.""" 595 if not repos: 596 raise ValueError('repos is empty') 597 repo = repos.main 598 pull = str(repos[repo][1]) 599 if repo in ['k8s.io/kubernetes', 'kubernetes/kubernetes']: 600 prefix = '' 601 elif repo.startswith('k8s.io/'): 602 prefix = repo[len('k8s.io/'):] 603 elif repo.startswith('kubernetes/'): 604 prefix = repo[len('kubernetes/'):] 605 elif repo.startswith('github.com/'): 606 prefix = repo[len('github.com/'):].replace('/', '_') 607 else: 608 prefix = repo.replace('/', '_') 609 # Batch merges are those with more than one PR specified. 610 pr_nums = pull_numbers(pull) 611 if len(pr_nums) > 1: 612 pull = os.path.join(prefix, 'batch') 613 else: 614 pull = os.path.join(prefix, pr_nums[0]) 615 pr_path = os.path.join(base, 'pull', pull, job, build) 616 result_cache = os.path.join( 617 base, 'directory', job, 'jobResultsCache.json') 618 pr_result_cache = os.path.join( 619 base, 'pull', pull, job, 'jobResultsCache.json') 620 return Paths( 621 artifacts=os.path.join(pr_path, 'artifacts'), 622 build_log=os.path.join(pr_path, 'build-log.txt'), 623 pr_path=pr_path, 624 finished=os.path.join(pr_path, 'finished.json'), 625 latest=os.path.join(base, 'directory', job, 'latest-build.txt'), 626 pr_build_link=os.path.join(base, 'directory', job, '%s.txt' % build), 627 pr_latest=os.path.join(base, 'pull', pull, job, 'latest-build.txt'), 628 pr_result_cache=pr_result_cache, 629 result_cache=result_cache, 630 started=os.path.join(pr_path, 'started.json'), 631 ) 632 633 634 635 BUILD_ENV = 'BUILD_NUMBER' 636 BOOTSTRAP_ENV = 'BOOTSTRAP_MIGRATION' 637 CLOUDSDK_ENV = 'CLOUDSDK_CONFIG' 638 GCE_KEY_ENV = 'JENKINS_GCE_SSH_PRIVATE_KEY_FILE' 639 GUBERNATOR = 'https://k8s-gubernator.appspot.com/build' 640 HOME_ENV = 'HOME' 641 JENKINS_HOME_ENV = 'JENKINS_HOME' 642 JOB_ENV = 'JOB_NAME' 643 NODE_ENV = 'NODE_NAME' 644 SERVICE_ACCOUNT_ENV = 'GOOGLE_APPLICATION_CREDENTIALS' 645 WORKSPACE_ENV = 'WORKSPACE' 646 GCS_ARTIFACTS_ENV = 'GCS_ARTIFACTS_DIR' 647 648 649 def build_name(started): 650 """Return the unique(ish) string representing this build.""" 651 # TODO(fejta): right now jenkins sets the BUILD_NUMBER and does this 652 # in an environment variable. Consider migrating this to a 653 # bootstrap.py flag 654 if BUILD_ENV not in os.environ: 655 # Automatically generate a build number if none is set 656 uniq = '%x-%d' % (hash(node()), os.getpid()) 657 autogen = time.strftime('%Y%m%d-%H%M%S-' + uniq, time.gmtime(started)) 658 os.environ[BUILD_ENV] = autogen 659 return os.environ[BUILD_ENV] 660 661 662 def setup_credentials(call, robot, upload): 663 """Activate the service account unless robot is none.""" 664 # TODO(fejta): stop activating inside the image 665 # TODO(fejta): allow use of existing gcloud auth 666 if robot: 667 os.environ[SERVICE_ACCOUNT_ENV] = robot 668 if not os.getenv(SERVICE_ACCOUNT_ENV) and upload: 669 logging.warning('Cannot --upload=%s, no active gcloud account.', upload) 670 raise ValueError('--upload requires --service-account') 671 if not os.getenv(SERVICE_ACCOUNT_ENV) and not upload: 672 logging.info('Will not upload results.') 673 return 674 if not os.path.isfile(os.environ[SERVICE_ACCOUNT_ENV]): 675 raise IOError( 676 'Cannot find service account credentials', 677 os.environ[SERVICE_ACCOUNT_ENV], 678 'Create service account and then create key at ' 679 'https://console.developers.google.com/iam-admin/serviceaccounts/project', # pylint: disable=line-too-long 680 ) 681 call([ 682 'gcloud', 683 'auth', 684 'activate-service-account', 685 '--key-file=%s' % os.environ[SERVICE_ACCOUNT_ENV], 686 ]) 687 try: # Old versions of gcloud may not support this value 688 account = call( 689 ['gcloud', 'config', 'get-value', 'account'], output=True).strip() 690 except subprocess.CalledProcessError: 691 account = 'unknown' 692 logging.info('Will upload results to %s using %s', upload, account) 693 694 695 def setup_logging(path): 696 """Initialize logging to screen and path.""" 697 # See https://docs.python.org/2/library/logging.html#logrecord-attributes 698 # [IWEF]mmdd HH:MM:SS.mmm] msg 699 fmt = '%(levelname).1s%(asctime)s.%(msecs)03d] %(message)s' # pylint: disable=line-too-long 700 datefmt = '%m%d %H:%M:%S' 701 logging.basicConfig( 702 level=logging.INFO, 703 format=fmt, 704 datefmt=datefmt, 705 ) 706 build_log = logging.FileHandler(filename=path, mode='w') 707 build_log.setLevel(logging.INFO) 708 formatter = logging.Formatter(fmt, datefmt=datefmt) 709 build_log.setFormatter(formatter) 710 logging.getLogger('').addHandler(build_log) 711 return build_log 712 713 714 def setup_magic_environment(job): 715 """Set magic environment variables scripts currently expect.""" 716 home = os.environ[HOME_ENV] 717 # TODO(fejta): jenkins sets these values. Consider migrating to using 718 # a secret volume instead and passing the path to this volume 719 # into bootstrap.py as a flag. 720 os.environ.setdefault( 721 GCE_KEY_ENV, 722 os.path.join(home, '.ssh/google_compute_engine'), 723 ) 724 os.environ.setdefault( 725 'JENKINS_GCE_SSH_PUBLIC_KEY_FILE', 726 os.path.join(home, '.ssh/google_compute_engine.pub'), 727 ) 728 os.environ.setdefault( 729 'JENKINS_AWS_SSH_PRIVATE_KEY_FILE', 730 os.path.join(home, '.ssh/kube_aws_rsa'), 731 ) 732 os.environ.setdefault( 733 'JENKINS_AWS_SSH_PUBLIC_KEY_FILE', 734 os.path.join(home, '.ssh/kube_aws_rsa.pub'), 735 ) 736 737 738 cwd = os.getcwd() 739 # TODO(fejta): jenkins sets WORKSPACE and pieces of our infra expect this 740 # value. Consider doing something else in the future. 741 # Furthermore, in the Jenkins and Prow environments, this is already set 742 # to something reasonable, but using cwd will likely cause all sorts of 743 # problems. Thus, only set this if we really need to. 744 if WORKSPACE_ENV not in os.environ: 745 os.environ[WORKSPACE_ENV] = cwd 746 # By default, Jenkins sets HOME to JENKINS_HOME, which is shared by all 747 # jobs. To avoid collisions, set it to the cwd instead, but only when 748 # running on Jenkins. 749 if os.environ.get(HOME_ENV, None) == os.environ.get(JENKINS_HOME_ENV, None): 750 os.environ[HOME_ENV] = cwd 751 # TODO(fejta): jenkins sets JOB_ENV and pieces of our infra expect this 752 # value. Consider making everything below here agnostic to the 753 # job name. 754 if JOB_ENV not in os.environ: 755 os.environ[JOB_ENV] = job 756 elif os.environ[JOB_ENV] != job: 757 logging.warning('%s=%s (overrides %s)', JOB_ENV, job, os.environ[JOB_ENV]) 758 os.environ[JOB_ENV] = job 759 # TODO(fejta): Magic value to tell our test code not do upload started.json 760 # TODO(fejta): delete upload-to-gcs.sh and then this value. 761 os.environ[BOOTSTRAP_ENV] = 'yes' 762 # This helps prevent reuse of cloudsdk configuration. It also reduces the 763 # risk that running a job on a workstation corrupts the user's config. 764 os.environ[CLOUDSDK_ENV] = '%s/.config/gcloud' % cwd 765 766 767 def job_args(args): 768 """Converts 'a ${FOO} $bar' into 'a wildly different string'.""" 769 return [os.path.expandvars(a) for a in args] 770 771 772 def job_script(job): 773 """Return path to script for job.""" 774 with open(test_infra('jobs/config.json')) as fp: 775 config = json.loads(fp.read()) 776 job_config = config[job] 777 cmd = test_infra('scenarios/%s.py' % job_config['scenario']) 778 return [cmd] + job_args(job_config.get('args', [])) 779 780 781 def gubernator_uri(paths): 782 """Return a gubernator link for this build.""" 783 job = os.path.dirname(paths.build_log) 784 if job.startswith('gs:/'): 785 return job.replace('gs:/', GUBERNATOR, 1) 786 return job 787 788 789 @contextlib.contextmanager 790 def choose_ssh_key(ssh): 791 """Creates a script for GIT_SSH that uses -i ssh if set.""" 792 if not ssh: # Nothing to do 793 yield 794 return 795 796 # Create a script for use with GIT_SSH, which defines the program git uses 797 # during git fetch. In the future change this to GIT_SSH_COMMAND 798 # https://superuser.com/questions/232373/how-to-tell-git-which-private-key-to-use 799 with tempfile.NamedTemporaryFile(prefix='ssh', delete=False) as fp: 800 fp.write('#!/bin/sh\nssh -o StrictHostKeyChecking=no -i \'%s\' -F /dev/null "${@}"\n' % ssh) 801 try: 802 os.chmod(fp.name, 0500) 803 had = 'GIT_SSH' in os.environ 804 old = os.getenv('GIT_SSH') 805 os.environ['GIT_SSH'] = fp.name 806 807 yield 808 809 del os.environ['GIT_SSH'] 810 if had: 811 os.environ['GIT_SSH'] = old 812 finally: 813 os.unlink(fp.name) 814 815 816 def setup_root(call, root, repos, ssh, git_cache, clean): 817 """Create root dir, checkout repo and cd into resulting dir.""" 818 if not os.path.exists(root): 819 os.makedirs(root) 820 root_dir = os.path.realpath(root) 821 logging.info('Root: %s', root_dir) 822 os.chdir(root_dir) 823 logging.info('cd to %s', root_dir) 824 825 with choose_ssh_key(ssh): 826 for repo, (branch, pull) in repos.items(): 827 os.chdir(root_dir) 828 logging.info( 829 'Checkout: %s %s', 830 os.path.join(root_dir, repo), 831 pull and pull or branch) 832 checkout(call, repo, branch, pull, ssh, git_cache, clean) 833 if len(repos) > 1: # cd back into the primary repo 834 os.chdir(root_dir) 835 os.chdir(repos.main) 836 837 838 class Repos(dict): 839 """{"repo": (branch, pull)} dict with a .main attribute.""" 840 main = '' 841 842 def __setitem__(self, k, v): 843 if not self: 844 self.main = k 845 return super(Repos, self).__setitem__(k, v) 846 847 848 def parse_repos(args): 849 """Convert --repo=foo=this,123:abc,555:ddd into a Repos().""" 850 repos = args.repo or {} 851 if not repos and not args.bare: 852 raise ValueError('--bare or --repo required') 853 ret = Repos() 854 if len(repos) != 1: 855 if args.pull: 856 raise ValueError('Multi --repo does not support --pull, use --repo=R=branch,p1,p2') 857 if args.branch: 858 raise ValueError('Multi --repo does not support --branch, use --repo=R=branch') 859 elif len(repos) == 1 and (args.branch or args.pull): 860 repo = repos[0] 861 if '=' in repo or ':' in repo: 862 raise ValueError('--repo cannot contain = or : with --branch or --pull') 863 ret[repo] = (args.branch, args.pull) 864 return ret 865 for repo in repos: 866 mat = re.match(r'([^=]+)(=([^:,~^\s]+(:[0-9a-fA-F]+)?(,|$))+)?$', repo) 867 if not mat: 868 raise ValueError('bad repo', repo, repos) 869 this_repo = mat.group(1) 870 if not mat.group(2): 871 ret[this_repo] = ('master', '') 872 continue 873 commits = mat.group(2)[1:].split(',') 874 if len(commits) == 1: 875 # Checking out a branch, possibly at a specific commit 876 ret[this_repo] = (commits[0], '') 877 continue 878 # Checking out one or more PRs 879 ret[this_repo] = ('', ','.join(commits)) 880 return ret 881 882 883 def bootstrap(args): 884 """Clone repo at pull/branch into root and run job script.""" 885 # pylint: disable=too-many-locals,too-many-branches,too-many-statements 886 job = args.job 887 repos = parse_repos(args) 888 upload = args.upload 889 890 build_log_path = os.path.abspath('build-log.txt') 891 build_log = setup_logging(build_log_path) 892 started = time.time() 893 if args.timeout: 894 end = started + args.timeout * 60 895 else: 896 end = 0 897 call = lambda *a, **kw: _call(end, *a, **kw) 898 gsutil = GSUtil(call) 899 900 logging.info('Bootstrap %s...', job) 901 build = build_name(started) 902 903 if upload: 904 if repos and repos[repos.main][1]: # merging commits, a pr 905 paths = pr_paths(upload, repos, job, build) 906 else: 907 paths = ci_paths(upload, job, build) 908 logging.info('Gubernator results at %s', gubernator_uri(paths)) 909 # TODO(fejta): Replace env var below with a flag eventually. 910 os.environ[GCS_ARTIFACTS_ENV] = paths.artifacts 911 912 version = 'unknown' 913 exc_type = None 914 setup_creds = False 915 916 try: 917 setup_root(call, args.root, repos, args.ssh, args.git_cache, args.clean) 918 logging.info('Configure environment...') 919 if repos: 920 version = find_version(call) 921 else: 922 version = '' 923 setup_magic_environment(job) 924 setup_credentials(call, args.service_account, upload) 925 setup_creds = True 926 logging.info('Start %s at %s...', build, version) 927 if upload: 928 start(gsutil, paths, started, node(), version, repos) 929 success = False 930 try: 931 call(job_script(job)) 932 logging.info('PASS: %s', job) 933 success = True 934 except subprocess.CalledProcessError: 935 logging.error('FAIL: %s', job) 936 except Exception: # pylint: disable=broad-except 937 exc_type, exc_value, exc_traceback = sys.exc_info() 938 logging.exception('unexpected error') 939 success = False 940 if not setup_creds: 941 setup_credentials(call, args.service_account, upload) 942 if upload: 943 logging.info('Upload result and artifacts...') 944 logging.info('Gubernator results at %s', gubernator_uri(paths)) 945 try: 946 finish( 947 gsutil, paths, success, 948 os.path.join(os.getenv(WORKSPACE_ENV, os.getcwd()), '_artifacts'), 949 build, version, repos, call 950 ) 951 except subprocess.CalledProcessError: # Still try to upload build log 952 success = False 953 logging.getLogger('').removeHandler(build_log) 954 build_log.close() 955 if upload: 956 gsutil.copy_file(paths.build_log, build_log_path) 957 if exc_type: 958 raise exc_type, exc_value, exc_traceback # pylint: disable=raising-bad-type 959 if not success: 960 # TODO(fejta/spxtr): we should distinguish infra and non-infra problems 961 # by exit code and automatically retrigger after an infra-problem. 962 sys.exit(1) 963 964 def parse_args(arguments=None): 965 """Parse arguments or sys.argv[1:].""" 966 parser = argparse.ArgumentParser() 967 parser.add_argument('--root', default='.', help='Root dir to work with') 968 parser.add_argument( 969 '--timeout', type=float, default=0, help='Timeout in minutes if set') 970 parser.add_argument( 971 '--repo', 972 action='append', 973 help='Fetch the specified repositories, with the first one considered primary') 974 parser.add_argument( 975 '--bare', 976 action='store_true', 977 help='Do not check out a repository') 978 parser.add_argument('--job', required=True, help='Name of the job to run') 979 parser.add_argument( 980 '--upload', 981 help='Upload results here if set, requires --service-account') 982 parser.add_argument( 983 '--service-account', 984 help='Activate and use path/to/service-account.json if set.') 985 parser.add_argument( 986 '--ssh', 987 help='Use the ssh key to fetch the repository instead of https if set.') 988 parser.add_argument( 989 '--git-cache', 990 help='Location of the git cache.') 991 parser.add_argument( 992 '--clean', 993 action='store_true', 994 help='Clean the git repo before running tests.') 995 args = parser.parse_args(arguments) 996 # --pull is deprecated, use --repo=k8s.io/foo=master:abcd,12:ef12,45:ff65 997 setattr(args, 'pull', None) 998 # --branch is deprecated, use --repo=k8s.io/foo=master 999 setattr(args, 'branch', None) 1000 if bool(args.repo) == bool(args.bare): 1001 raise argparse.ArgumentTypeError( 1002 'Expected --repo xor --bare:', args.repo, args.bare) 1003 return args 1004 1005 1006 if __name__ == '__main__': 1007 ARGS = parse_args() 1008 bootstrap(ARGS)