github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/jenkins/bootstrap.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2016 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Need to figure out why this only fails on travis 18 # pylint: disable=bad-continuation 19 20 """Bootstraps starting a test job. 21 22 The following should already be done: 23 git checkout http://k8s.io/test-infra 24 cd $WORKSPACE 25 test-infra/jenkins/bootstrap.py <--repo=R || --bare> <--job=J> <--pull=P || --branch=B> 26 27 The bootstrapper now does the following: 28 # Note start time 29 # check out repoes defined in --repo 30 # note job started 31 # call runner defined in $JOB.json 32 # upload artifacts (this will change later) 33 # upload build-log.txt 34 # note job ended 35 36 The contract with the runner is as follows: 37 * Runner must exit non-zero if job fails for any reason. 38 """ 39 40 41 import argparse 42 import contextlib 43 import json 44 import logging 45 import os 46 import pipes 47 import random 48 import re 49 import select 50 import signal 51 import socket 52 import subprocess 53 import sys 54 import tempfile 55 import time 56 import urllib2 57 58 ORIG_CWD = os.getcwd() # Checkout changes cwd 59 60 61 def read_all(end, stream, append): 62 """Read all buffered lines from a stream.""" 63 while not end or time.time() < end: 64 line = stream.readline() 65 if not line: 66 return True # Read everything 67 # Strip \n at the end if any. Last line of file may not have one. 68 append(line.rstrip('\n')) 69 # Is there more on the buffer? 70 ret = select.select([stream.fileno()], [], [], 0.1) 71 if not ret[0]: 72 return False # Cleared buffer but not at the end 73 return False # Time expired 74 75 76 def elapsed(since): 77 """Return the number of minutes elapsed since a time.""" 78 return (time.time() - since) / 60 79 80 81 def terminate(end, proc, kill): 82 """Terminate or kill the process after end.""" 83 if not end or time.time() <= end: 84 return False 85 if kill: # Process will not die, kill everything 86 pgid = os.getpgid(proc.pid) 87 logging.info( 88 'Kill %d and process group %d', proc.pid, pgid) 89 os.killpg(pgid, signal.SIGKILL) 90 proc.kill() 91 return True 92 logging.info( 93 'Terminate %d on timeout', proc.pid) 94 proc.terminate() 95 return True 96 97 98 def _call(end, cmd, stdin=None, check=True, output=None, log_failures=True, env=None): # pylint: disable=too-many-locals 99 """Start a subprocess.""" 100 logging.info('Call: %s', ' '.join(pipes.quote(c) for c in cmd)) 101 begin = time.time() 102 if end: 103 end = max(end, time.time() + 60) # Allow at least 60s per command 104 proc = subprocess.Popen( 105 cmd, 106 stdin=subprocess.PIPE if stdin is not None else None, 107 stdout=subprocess.PIPE, 108 stderr=subprocess.PIPE, 109 preexec_fn=os.setsid, 110 env=env, 111 ) 112 if stdin: 113 proc.stdin.write(stdin) 114 proc.stdin.close() 115 out = [] 116 code = None 117 timeout = False 118 reads = { 119 proc.stderr.fileno(): (proc.stderr, logging.warning), 120 proc.stdout.fileno(): ( 121 proc.stdout, (out.append if output else logging.info)), 122 } 123 while reads: 124 if terminate(end, proc, timeout): 125 if timeout: # We killed everything 126 break 127 # Give subprocess some cleanup time before killing. 128 end = time.time() + 15 * 60 129 timeout = True 130 ret = select.select(reads, [], [], 0.1) 131 for fdesc in ret[0]: 132 if read_all(end, *reads[fdesc]): 133 reads.pop(fdesc) 134 if not ret[0] and proc.poll() is not None: 135 break # process exited without closing pipes (timeout?) 136 137 code = proc.wait() 138 if timeout: 139 code = code or 124 140 logging.error('Build timed out') 141 if code and log_failures: 142 logging.error('Command failed') 143 logging.info( 144 'process %d exited with code %d after %.1fm', 145 proc.pid, code, elapsed(begin)) 146 out.append('') 147 lines = output and '\n'.join(out) 148 if check and code: 149 raise subprocess.CalledProcessError(code, cmd, lines) 150 return lines 151 152 153 def ref_has_shas(ref): 154 """Determine if a reference specifies shas (contains ':')""" 155 return isinstance(ref, basestring) and ':' in ref 156 157 158 def pull_numbers(pull): 159 """Turn a pull reference list into a list of PR numbers to merge.""" 160 if ref_has_shas(pull): 161 return [r.split(':')[0] for r in pull.split(',')][1:] 162 return [str(pull)] 163 164 165 def pull_ref(pull): 166 """Turn a PR number of list of refs into specific refs to fetch and check out.""" 167 if isinstance(pull, int) or ',' not in pull: 168 return ['+refs/pull/%d/merge' % int(pull)], ['FETCH_HEAD'] 169 pulls = pull.split(',') 170 refs = [] 171 checkouts = [] 172 for ref in pulls: 173 change_ref = None 174 if ':' in ref: # master:abcd or 1234:abcd or 1234:abcd:ref/for/pr 175 res = ref.split(':') 176 name = res[0] 177 sha = res[1] 178 if len(res) > 2: 179 change_ref = res[2] 180 elif not refs: # master 181 name, sha = ref, 'FETCH_HEAD' 182 else: 183 name = ref 184 sha = 'refs/pr/%s' % ref 185 186 checkouts.append(sha) 187 if not refs: # First ref should be branch to merge into 188 refs.append(name) 189 elif change_ref: # explicit change refs 190 refs.append(change_ref) 191 else: # PR numbers 192 num = int(name) 193 refs.append('+refs/pull/%d/head:refs/pr/%d' % (num, num)) 194 return refs, checkouts 195 196 197 def branch_ref(branch): 198 """Split branch:sha if necessary.""" 199 if ref_has_shas(branch): 200 split_refs = branch.split(':') 201 return [split_refs[0]], [split_refs[1]] 202 return [branch], ['FETCH_HEAD'] 203 204 205 def repository(repo, ssh): 206 """Return the url associated with the repo.""" 207 if repo.startswith('k8s.io/'): 208 repo = 'github.com/kubernetes/%s' % (repo[len('k8s.io/'):]) 209 elif repo.startswith('sigs.k8s.io/'): 210 repo = 'github.com/kubernetes-sigs/%s' % (repo[len('sigs.k8s.io/'):]) 211 elif repo.startswith('istio.io/'): 212 repo = 'github.com/istio/%s' % (repo[len('istio.io/'):]) 213 if ssh: 214 if ":" not in repo: 215 parts = repo.split('/', 1) 216 repo = '%s:%s' % (parts[0], parts[1]) 217 return 'git@%s' % repo 218 return 'https://%s' % repo 219 220 221 def random_sleep(attempt): 222 """Sleep 2**attempt seconds with a random fractional offset.""" 223 time.sleep(random.random() + attempt ** 2) 224 225 226 def auth_google_gerrit(git, call): 227 """authenticate to foo.googlesource.com""" 228 call([git, 'clone', 'https://gerrit.googlesource.com/gcompute-tools']) 229 call(['./gcompute-tools/git-cookie-authdaemon']) 230 231 232 def commit_date(git, commit, call): 233 try: 234 return call([git, 'show', '-s', '--format=format:%ct', commit], 235 output=True, log_failures=False) 236 except subprocess.CalledProcessError: 237 logging.warning('Unable to print commit date for %s', commit) 238 return None 239 240 241 def checkout(call, repo, repo_path, branch, pull, ssh='', git_cache='', clean=False): 242 """Fetch and checkout the repository at the specified branch/pull. 243 244 Note that repo and repo_path should usually be the same, but repo_path can 245 be set to a different relative path where repo should be checked out.""" 246 # pylint: disable=too-many-locals,too-many-branches 247 if bool(branch) == bool(pull): 248 raise ValueError('Must specify one of --branch or --pull') 249 250 if pull: 251 refs, checkouts = pull_ref(pull) 252 else: 253 refs, checkouts = branch_ref(branch) 254 255 git = 'git' 256 257 # auth to google gerrit instance 258 # TODO(krzyzacy): when migrate to init container we'll make a gerrit 259 # checkout image and move this logic there 260 if '.googlesource.com' in repo: 261 auth_google_gerrit(git, call) 262 263 if git_cache: 264 cache_dir = '%s/%s' % (git_cache, repo) 265 try: 266 os.makedirs(cache_dir) 267 except OSError: 268 pass 269 call([git, 'init', repo_path, '--separate-git-dir=%s' % cache_dir]) 270 call(['rm', '-f', '%s/index.lock' % cache_dir]) 271 else: 272 call([git, 'init', repo_path]) 273 os.chdir(repo_path) 274 275 if clean: 276 call([git, 'clean', '-dfx']) 277 call([git, 'reset', '--hard']) 278 279 # To make a merge commit, a user needs to be set. It's okay to use a dummy 280 # user here, since we're not exporting the history. 281 call([git, 'config', '--local', 'user.name', 'K8S Bootstrap']) 282 call([git, 'config', '--local', 'user.email', 'k8s_bootstrap@localhost']) 283 retries = 3 284 for attempt in range(retries): 285 try: 286 call([git, 'fetch', '--quiet', '--tags', repository(repo, ssh)] + refs) 287 break 288 except subprocess.CalledProcessError as cpe: 289 if attempt >= retries - 1: 290 raise 291 if cpe.returncode != 128: 292 raise 293 logging.warning('git fetch failed') 294 random_sleep(attempt) 295 call([git, 'checkout', '-B', 'test', checkouts[0]]) 296 297 # Lie about the date in merge commits: use sequential seconds after the 298 # commit date of the tip of the parent branch we're checking into. 299 merge_date = int(commit_date(git, 'HEAD', call) or time.time()) 300 301 git_merge_env = os.environ.copy() 302 for ref, head in zip(refs, checkouts)[1:]: 303 merge_date += 1 304 git_merge_env[GIT_AUTHOR_DATE_ENV] = str(merge_date) 305 git_merge_env[GIT_COMMITTER_DATE_ENV] = str(merge_date) 306 call(['git', 'merge', '--no-ff', '-m', 'Merge %s' % ref, head], 307 env=git_merge_env) 308 309 310 def repos_dict(repos): 311 """Returns {"repo1": "branch", "repo2": "pull"}.""" 312 return {r: b or p for (r, (b, p)) in repos.items()} 313 314 315 def start(gsutil, paths, stamp, node_name, version, repos): 316 """Construct and upload started.json.""" 317 data = { 318 'timestamp': int(stamp), 319 'node': node_name, 320 } 321 if version: 322 data['repo-version'] = version 323 data['version'] = version # TODO(fejta): retire 324 if repos: 325 pull = repos[repos.main] 326 if ref_has_shas(pull[1]): 327 data['pull'] = pull[1] 328 data['repos'] = repos_dict(repos) 329 if POD_ENV in os.environ: 330 data['metadata'] = {'pod': os.environ[POD_ENV]} 331 332 gsutil.upload_json(paths.started, data) 333 # Upload a link to the build path in the directory 334 if paths.pr_build_link: 335 gsutil.upload_text( 336 paths.pr_build_link, 337 paths.pr_path, 338 additional_headers=['-h', 'x-goog-meta-link: %s' % paths.pr_path] 339 ) 340 341 342 class GSUtil(object): 343 """A helper class for making gsutil commands.""" 344 gsutil = 'gsutil' 345 346 def __init__(self, call): 347 self.call = call 348 349 def stat(self, path): 350 """Return metadata about the object, such as generation.""" 351 cmd = [self.gsutil, 'stat', path] 352 return self.call(cmd, output=True, log_failures=False) 353 354 def ls(self, path): 355 """List a bucket or subdir.""" 356 cmd = [self.gsutil, 'ls', path] 357 return self.call(cmd, output=True) 358 359 def upload_json(self, path, jdict, generation=None): 360 """Upload the dictionary object to path.""" 361 if generation is not None: # generation==0 means object does not exist 362 gen = ['-h', 'x-goog-if-generation-match:%s' % generation] 363 else: 364 gen = [] 365 with tempfile.NamedTemporaryFile(prefix='gsutil_') as fp: 366 fp.write(json.dumps(jdict, indent=2)) 367 fp.flush() 368 cmd = [ 369 self.gsutil, '-q', 370 '-h', 'Content-Type:application/json'] + gen + [ 371 'cp', fp.name, path] 372 self.call(cmd) 373 374 def copy_file(self, dest, orig): 375 """Copy the file to the specified path using compressed encoding.""" 376 cmd = [self.gsutil, '-q', 'cp', '-Z', orig, dest] 377 self.call(cmd) 378 379 def upload_text(self, path, txt, additional_headers=None, cached=True): 380 """Copy the text to path, optionally disabling caching.""" 381 headers = ['-h', 'Content-Type:text/plain'] 382 if not cached: 383 headers += ['-h', 'Cache-Control:private, max-age=0, no-transform'] 384 if additional_headers: 385 headers += additional_headers 386 with tempfile.NamedTemporaryFile(prefix='gsutil_') as fp: 387 fp.write(txt) 388 fp.flush() 389 cmd = [self.gsutil, '-q'] + headers + ['cp', fp.name, path] 390 self.call(cmd) 391 392 def cat(self, path, generation): 393 """Return contents of path#generation""" 394 cmd = [self.gsutil, '-q', 'cat', '%s#%s' % (path, generation)] 395 return self.call(cmd, output=True) 396 397 def upload_artifacts(self, gsutil, path, artifacts): 398 """Upload artifacts to the specified path.""" 399 # Upload artifacts 400 if not os.path.isdir(artifacts): 401 logging.warning('Artifacts dir %s is missing.', artifacts) 402 return 403 original_artifacts = artifacts 404 try: 405 # If remote path exists, it will create .../_artifacts subdir instead 406 gsutil.ls(path) 407 # Success means remote path exists 408 remote_base = os.path.basename(path) 409 local_base = os.path.basename(artifacts) 410 if remote_base != local_base: 411 # if basename are different, need to copy things over first. 412 localpath = artifacts.replace(local_base, remote_base) 413 os.rename(artifacts, localpath) 414 artifacts = localpath 415 path = path[:-len(remote_base + '/')] 416 except subprocess.CalledProcessError: 417 logging.warning('Remote dir %s not exist yet', path) 418 cmd = [ 419 self.gsutil, '-m', '-q', 420 '-o', 'GSUtil:use_magicfile=True', 421 'cp', '-r', '-c', '-z', 'log,txt,xml', 422 artifacts, path, 423 ] 424 self.call(cmd) 425 426 # rename the artifacts dir back 427 # other places still references the original artifacts dir 428 if original_artifacts != artifacts: 429 os.rename(artifacts, original_artifacts) 430 431 432 def append_result(gsutil, path, build, version, passed): 433 """Download a json list and append metadata about this build to it.""" 434 # TODO(fejta): delete the clone of this logic in upload-to-gcs.sh 435 # (this is update_job_result_cache) 436 end = time.time() + 300 # try for up to five minutes 437 errors = 0 438 while time.time() < end: 439 if errors: 440 random_sleep(min(errors, 3)) 441 try: 442 out = gsutil.stat(path) 443 gen = re.search(r'Generation:\s+(\d+)', out).group(1) 444 except subprocess.CalledProcessError: 445 gen = 0 446 if gen: 447 try: 448 cache = json.loads(gsutil.cat(path, gen)) 449 if not isinstance(cache, list): 450 raise ValueError(cache) 451 except ValueError as exc: 452 logging.warning('Failed to decode JSON: %s', exc) 453 cache = [] 454 except subprocess.CalledProcessError: # gen doesn't exist 455 errors += 1 456 continue 457 else: 458 cache = [] 459 cache.append({ 460 'version': version, # TODO(fejta): retire 461 'job-version': version, 462 'buildnumber': build, 463 'passed': bool(passed), 464 'result': 'SUCCESS' if passed else 'FAILURE', 465 }) 466 cache = cache[-300:] 467 try: 468 gsutil.upload_json(path, cache, generation=gen) 469 return 470 except subprocess.CalledProcessError: 471 logging.warning('Failed to append to %s#%s', path, gen) 472 errors += 1 473 474 475 def metadata(repos, artifacts, call): 476 """Return metadata associated for the build, including inside artifacts.""" 477 path = os.path.join(artifacts or '', 'metadata.json') 478 meta = None 479 if os.path.isfile(path): 480 try: 481 with open(path) as fp: 482 meta = json.loads(fp.read()) 483 except (IOError, ValueError): 484 logging.warning('Failed to open %s', path) 485 else: 486 logging.warning('metadata path %s does not exist', path) 487 488 if not meta or not isinstance(meta, dict): 489 logging.warning( 490 'metadata not found or invalid, init with empty metadata') 491 meta = {} 492 if repos: 493 meta['repo'] = repos.main 494 meta['repos'] = repos_dict(repos) 495 496 if POD_ENV in os.environ: 497 # HARDEN against metadata only being read from finished. 498 meta['pod'] = os.environ[POD_ENV] 499 500 try: 501 commit = call(['git', 'rev-parse', 'HEAD'], output=True) 502 if commit: 503 meta['repo-commit'] = commit.strip() 504 except subprocess.CalledProcessError: 505 pass 506 507 cwd = os.getcwd() 508 os.chdir(test_infra('.')) 509 try: 510 commit = call(['git', 'rev-parse', 'HEAD'], output=True) 511 if commit: 512 meta['infra-commit'] = commit.strip()[:9] 513 except subprocess.CalledProcessError: 514 pass 515 os.chdir(cwd) 516 517 return meta 518 519 520 def finish(gsutil, paths, success, artifacts, build, version, repos, call): 521 """ 522 Args: 523 paths: a Paths instance. 524 success: the build passed if true. 525 artifacts: a dir containing artifacts to upload. 526 build: identifier of this build. 527 version: identifies what version of the code the build tested. 528 repo: the target repo 529 """ 530 531 if os.path.isdir(artifacts) and any(f for _, _, f in os.walk(artifacts)): 532 try: 533 gsutil.upload_artifacts(gsutil, paths.artifacts, artifacts) 534 except subprocess.CalledProcessError: 535 logging.warning('Failed to upload artifacts') 536 else: 537 logging.warning('Missing local artifacts : %s', artifacts) 538 539 meta = metadata(repos, artifacts, call) 540 if not version: 541 version = meta.get('job-version') 542 if not version: # TODO(fejta): retire 543 version = meta.get('version') 544 # github.com/kubernetes/release/find_green_build depends on append_result() 545 # TODO(fejta): reconsider whether this is how we want to solve this problem. 546 append_result(gsutil, paths.result_cache, build, version, success) 547 if paths.pr_result_cache: 548 append_result(gsutil, paths.pr_result_cache, build, version, success) 549 550 data = { 551 # TODO(fejta): update utils.go in contrib to accept a float 552 'timestamp': int(time.time()), 553 'result': 'SUCCESS' if success else 'FAILURE', 554 'passed': bool(success), 555 'metadata': meta, 556 } 557 if version: 558 data['job-version'] = version 559 data['version'] = version # TODO(fejta): retire 560 gsutil.upload_json(paths.finished, data) 561 562 # Upload the latest build for the job. 563 # Do this last, since other tools expect the rest of the data to be 564 # published when this file is created. 565 for path in {paths.latest, paths.pr_latest}: 566 if path: 567 try: 568 gsutil.upload_text(path, str(build), cached=False) 569 except subprocess.CalledProcessError: 570 logging.warning('Failed to update %s', path) 571 572 573 def test_infra(*paths): 574 """Return path relative to root of test-infra repo.""" 575 return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths) 576 577 578 def node(): 579 """Return the name of the node running the build.""" 580 # TODO(fejta): jenkins sets the node name and our infra expect this value. 581 # TODO(fejta): Consider doing something different here. 582 if NODE_ENV not in os.environ: 583 host = socket.gethostname().split('.')[0] 584 try: 585 # Try reading the name of the VM we're running on, using the 586 # metadata server. 587 os.environ[NODE_ENV] = urllib2.urlopen(urllib2.Request( 588 'http://169.254.169.254/computeMetadata/v1/instance/name', 589 headers={'Metadata-Flavor': 'Google'})).read() 590 os.environ[POD_ENV] = host # We also want to log this. 591 except IOError: # Fallback. 592 os.environ[NODE_ENV] = host 593 return os.environ[NODE_ENV] 594 595 596 def find_version(call): 597 """Determine and return the version of the build.""" 598 # TODO(fejta): once job-version is functional switch this to 599 # git rev-parse [--short=N] HEAD^{commit} 600 version_file = 'version' 601 if os.path.isfile(version_file): 602 # e2e tests which download kubernetes use this path: 603 with open(version_file) as fp: 604 return fp.read().strip() 605 606 version_script = 'hack/lib/version.sh' 607 if os.path.isfile(version_script): 608 cmd = [ 609 'bash', '-c', ( 610 """ 611 set -o errexit 612 set -o nounset 613 export KUBE_ROOT=. 614 source %s 615 kube::version::get_version_vars 616 echo $KUBE_GIT_VERSION 617 """ % version_script) 618 ] 619 return call(cmd, output=True).strip() 620 621 return 'unknown' 622 623 624 class Paths(object): # pylint: disable=too-many-instance-attributes,too-few-public-methods 625 """Links to remote gcs-paths for uploading results.""" 626 627 def __init__( # pylint: disable=too-many-arguments 628 self, 629 artifacts, # artifacts folder (in build) 630 build_log, # build-log.txt (in build) 631 pr_path, # path to build 632 finished, # finished.json (metadata from end of build) 633 latest, # latest-build.txt (in job) 634 pr_build_link, # file containng pr_path (in job directory) 635 pr_latest, # latest-build.txt (in pr job) 636 pr_result_cache, # jobResultsCache.json (in pr job) 637 result_cache, # jobResultsCache.json (cache of latest results in job) 638 started, # started.json (metadata from start of build) 639 ): 640 self.artifacts = artifacts 641 self.build_log = build_log 642 self.pr_path = pr_path 643 self.finished = finished 644 self.latest = latest 645 self.pr_build_link = pr_build_link 646 self.pr_latest = pr_latest 647 self.pr_result_cache = pr_result_cache 648 self.result_cache = result_cache 649 self.started = started 650 651 652 def ci_paths(base, job, build): 653 """Return a Paths() instance for a continuous build.""" 654 latest = os.path.join(base, job, 'latest-build.txt') 655 return Paths( 656 artifacts=os.path.join(base, job, build, 'artifacts'), 657 build_log=os.path.join(base, job, build, 'build-log.txt'), 658 pr_path=None, 659 finished=os.path.join(base, job, build, 'finished.json'), 660 latest=latest, 661 pr_build_link=None, 662 pr_latest=None, 663 pr_result_cache=None, 664 result_cache=os.path.join(base, job, 'jobResultsCache.json'), 665 started=os.path.join(base, job, build, 'started.json'), 666 ) 667 668 669 def pr_paths(base, repos, job, build): 670 """Return a Paths() instance for a PR.""" 671 if not repos: 672 raise ValueError('repos is empty') 673 repo = repos.main 674 pull = str(repos[repo][1]) 675 if repo in ['k8s.io/kubernetes', 'kubernetes/kubernetes']: 676 prefix = '' 677 elif repo.startswith('k8s.io/'): 678 prefix = repo[len('k8s.io/'):] 679 elif repo.startswith('kubernetes/'): 680 prefix = repo[len('kubernetes/'):] 681 elif repo.startswith('github.com/'): 682 prefix = repo[len('github.com/'):].replace('/', '_') 683 else: 684 prefix = repo.replace('/', '_') 685 # Batch merges are those with more than one PR specified. 686 pr_nums = pull_numbers(pull) 687 if len(pr_nums) > 1: 688 pull = os.path.join(prefix, 'batch') 689 else: 690 pull = os.path.join(prefix, pr_nums[0]) 691 pr_path = os.path.join(base, 'pull', pull, job, build) 692 result_cache = os.path.join( 693 base, 'directory', job, 'jobResultsCache.json') 694 pr_result_cache = os.path.join( 695 base, 'pull', pull, job, 'jobResultsCache.json') 696 return Paths( 697 artifacts=os.path.join(pr_path, 'artifacts'), 698 build_log=os.path.join(pr_path, 'build-log.txt'), 699 pr_path=pr_path, 700 finished=os.path.join(pr_path, 'finished.json'), 701 latest=os.path.join(base, 'directory', job, 'latest-build.txt'), 702 pr_build_link=os.path.join(base, 'directory', job, '%s.txt' % build), 703 pr_latest=os.path.join(base, 'pull', pull, job, 'latest-build.txt'), 704 pr_result_cache=pr_result_cache, 705 result_cache=result_cache, 706 started=os.path.join(pr_path, 'started.json'), 707 ) 708 709 710 BUILD_ENV = 'BUILD_ID' 711 BOOTSTRAP_ENV = 'BOOTSTRAP_MIGRATION' 712 CLOUDSDK_ENV = 'CLOUDSDK_CONFIG' 713 GCE_KEY_ENV = 'JENKINS_GCE_SSH_PRIVATE_KEY_FILE' 714 GUBERNATOR = 'https://gubernator.k8s.io/build' 715 HOME_ENV = 'HOME' 716 JENKINS_HOME_ENV = 'JENKINS_HOME' 717 K8S_ENV = 'KUBERNETES_SERVICE_HOST' 718 JOB_ENV = 'JOB_NAME' 719 NODE_ENV = 'NODE_NAME' 720 POD_ENV = 'POD_NAME' 721 SERVICE_ACCOUNT_ENV = 'GOOGLE_APPLICATION_CREDENTIALS' 722 WORKSPACE_ENV = 'WORKSPACE' 723 GCS_ARTIFACTS_ENV = 'GCS_ARTIFACTS_DIR' 724 IMAGE_NAME_ENV = 'IMAGE' 725 GIT_AUTHOR_DATE_ENV = 'GIT_AUTHOR_DATE' 726 GIT_COMMITTER_DATE_ENV = 'GIT_COMMITTER_DATE' 727 SOURCE_DATE_EPOCH_ENV = 'SOURCE_DATE_EPOCH' 728 JOB_ARTIFACTS_ENV = 'ARTIFACTS' 729 730 731 def build_name(started): 732 """Return the unique(ish) string representing this build.""" 733 # TODO(fejta): right now jenkins sets the BUILD_ID and does this 734 # in an environment variable. Consider migrating this to a 735 # bootstrap.py flag 736 if BUILD_ENV not in os.environ: 737 # Automatically generate a build number if none is set 738 uniq = '%x-%d' % (hash(node()), os.getpid()) 739 autogen = time.strftime('%Y%m%d-%H%M%S-' + uniq, time.gmtime(started)) 740 os.environ[BUILD_ENV] = autogen 741 return os.environ[BUILD_ENV] 742 743 744 def setup_credentials(call, robot, upload): 745 """Activate the service account unless robot is none.""" 746 # TODO(fejta): stop activating inside the image 747 # TODO(fejta): allow use of existing gcloud auth 748 if robot: 749 os.environ[SERVICE_ACCOUNT_ENV] = robot 750 if not os.getenv(SERVICE_ACCOUNT_ENV) and upload: 751 logging.warning( 752 'Cannot --upload=%s, no active gcloud account.', upload) 753 raise ValueError('--upload requires --service-account') 754 if not os.getenv(SERVICE_ACCOUNT_ENV) and not upload: 755 logging.info('Will not upload results.') 756 return 757 if not os.path.isfile(os.environ[SERVICE_ACCOUNT_ENV]): 758 raise IOError( 759 'Cannot find service account credentials', 760 os.environ[SERVICE_ACCOUNT_ENV], 761 'Create service account and then create key at ' 762 'https://console.developers.google.com/iam-admin/serviceaccounts/project', # pylint: disable=line-too-long 763 ) 764 # this sometimes fails spuriously due to DNS flakiness, so we retry it 765 for _ in range(5): 766 try: 767 call([ 768 'gcloud', 769 'auth', 770 'activate-service-account', 771 '--key-file=%s' % os.environ[SERVICE_ACCOUNT_ENV], 772 ]) 773 break 774 except subprocess.CalledProcessError: 775 pass 776 sleep_for = 1 777 logging.info( 778 'Retrying service account activation in %.2fs ...', sleep_for) 779 time.sleep(sleep_for) 780 else: 781 raise Exception( 782 "Failed to activate service account, exhausted retries") 783 try: # Old versions of gcloud may not support this value 784 account = call( 785 ['gcloud', 'config', 'get-value', 'account'], output=True).strip() 786 except subprocess.CalledProcessError: 787 account = 'unknown' 788 logging.info('Will upload results to %s using %s', upload, account) 789 790 791 def setup_logging(path): 792 """Initialize logging to screen and path.""" 793 # See https://docs.python.org/2/library/logging.html#logrecord-attributes 794 # [IWEF]mmdd HH:MM:SS.mmm] msg 795 fmt = '%(levelname).1s%(asctime)s.%(msecs)03d] %(message)s' # pylint: disable=line-too-long 796 datefmt = '%m%d %H:%M:%S' 797 logging.basicConfig( 798 level=logging.INFO, 799 format=fmt, 800 datefmt=datefmt, 801 ) 802 build_log = logging.FileHandler(filename=path, mode='w') 803 build_log.setLevel(logging.INFO) 804 formatter = logging.Formatter(fmt, datefmt=datefmt) 805 build_log.setFormatter(formatter) 806 logging.getLogger('').addHandler(build_log) 807 return build_log 808 809 810 def get_artifacts_dir(): 811 return os.getenv( 812 JOB_ARTIFACTS_ENV, 813 os.path.join(os.getenv(WORKSPACE_ENV, os.getcwd()), '_artifacts')) 814 815 816 def setup_magic_environment(job, call): 817 """Set magic environment variables scripts currently expect.""" 818 home = os.environ[HOME_ENV] 819 # TODO(fejta): jenkins sets these values. Consider migrating to using 820 # a secret volume instead and passing the path to this volume 821 # into bootstrap.py as a flag. 822 os.environ.setdefault( 823 GCE_KEY_ENV, 824 os.path.join(home, '.ssh/google_compute_engine'), 825 ) 826 os.environ.setdefault( 827 'JENKINS_GCE_SSH_PUBLIC_KEY_FILE', 828 os.path.join(home, '.ssh/google_compute_engine.pub'), 829 ) 830 os.environ.setdefault( 831 'JENKINS_AWS_SSH_PRIVATE_KEY_FILE', 832 os.path.join(home, '.ssh/kube_aws_rsa'), 833 ) 834 os.environ.setdefault( 835 'JENKINS_AWS_SSH_PUBLIC_KEY_FILE', 836 os.path.join(home, '.ssh/kube_aws_rsa.pub'), 837 ) 838 839 cwd = os.getcwd() 840 # TODO(fejta): jenkins sets WORKSPACE and pieces of our infra expect this 841 # value. Consider doing something else in the future. 842 # Furthermore, in the Jenkins and Prow environments, this is already set 843 # to something reasonable, but using cwd will likely cause all sorts of 844 # problems. Thus, only set this if we really need to. 845 if WORKSPACE_ENV not in os.environ: 846 os.environ[WORKSPACE_ENV] = cwd 847 # By default, Jenkins sets HOME to JENKINS_HOME, which is shared by all 848 # jobs. To avoid collisions, set it to the cwd instead, but only when 849 # running on Jenkins. 850 if os.getenv(HOME_ENV) and os.getenv(HOME_ENV) == os.getenv(JENKINS_HOME_ENV): 851 os.environ[HOME_ENV] = cwd 852 # TODO(fejta): jenkins sets JOB_ENV and pieces of our infra expect this 853 # value. Consider making everything below here agnostic to the 854 # job name. 855 if JOB_ENV not in os.environ: 856 os.environ[JOB_ENV] = job 857 elif os.environ[JOB_ENV] != job: 858 logging.warning('%s=%s (overrides %s)', JOB_ENV, 859 job, os.environ[JOB_ENV]) 860 os.environ[JOB_ENV] = job 861 # TODO(fejta): Magic value to tell our test code not do upload started.json 862 # TODO(fejta): delete upload-to-gcs.sh and then this value. 863 os.environ[BOOTSTRAP_ENV] = 'yes' 864 # This helps prevent reuse of cloudsdk configuration. It also reduces the 865 # risk that running a job on a workstation corrupts the user's config. 866 os.environ[CLOUDSDK_ENV] = '%s/.config/gcloud' % cwd 867 868 # Set $ARTIFACTS to help migrate to podutils 869 os.environ[JOB_ARTIFACTS_ENV] = os.path.join( 870 os.getenv(WORKSPACE_ENV, os.getcwd()), '_artifacts') 871 872 # also make the artifacts dir if it doesn't exist yet 873 if not os.path.isdir(get_artifacts_dir()): 874 try: 875 os.makedirs(get_artifacts_dir()) 876 except OSError as exc: 877 logging.info( 878 'cannot create %s, continue : %s', get_artifacts_dir(), exc) 879 880 # Try to set SOURCE_DATE_EPOCH based on the commit date of the tip of the 881 # tree. 882 # This improves cacheability of stamped binaries. 883 head_commit_date = commit_date('git', 'HEAD', call) 884 if head_commit_date: 885 os.environ[SOURCE_DATE_EPOCH_ENV] = head_commit_date.strip() 886 887 888 def job_args(args): 889 """Converts 'a ${FOO} $bar' into 'a wildly different string'.""" 890 return [os.path.expandvars(a) for a in args] 891 892 893 def job_script(job, scenario, extra_job_args): 894 """Return path to script for job.""" 895 with open(test_infra('jobs/config.json')) as fp: 896 config = json.loads(fp.read()) 897 if job.startswith('pull-security-kubernetes-'): 898 job = job.replace('pull-security-kubernetes-', 'pull-kubernetes-', 1) 899 config_json_args = [] 900 if job in config: 901 job_config = config[job] 902 if not scenario: 903 scenario = job_config['scenario'] 904 config_json_args = job_config.get('args', []) 905 elif not scenario: 906 raise ValueError('cannot find scenario for job', job) 907 cmd = test_infra('scenarios/%s.py' % scenario) 908 return [cmd] + job_args(config_json_args + extra_job_args) 909 910 911 def gubernator_uri(paths): 912 """Return a gubernator link for this build.""" 913 job = os.path.dirname(paths.build_log) 914 if job.startswith('gs:/'): 915 return job.replace('gs:/', GUBERNATOR, 1) 916 return job 917 918 919 @contextlib.contextmanager 920 def configure_ssh_key(ssh): 921 """Creates a script for GIT_SSH that uses -i ssh if set.""" 922 if not ssh: # Nothing to do 923 yield 924 return 925 926 try: 927 os.makedirs(os.path.join(os.environ[HOME_ENV], '.ssh')) 928 except OSError as exc: 929 logging.info('cannot create $HOME/.ssh, continue : %s', exc) 930 except KeyError as exc: 931 logging.info('$%s does not exist, continue : %s', HOME_ENV, exc) 932 933 # Create a script for use with GIT_SSH, which defines the program git uses 934 # during git fetch. In the future change this to GIT_SSH_COMMAND 935 # https://superuser.com/questions/232373/how-to-tell-git-which-private-key-to-use 936 with tempfile.NamedTemporaryFile(prefix='ssh', delete=False) as fp: 937 fp.write( 938 '#!/bin/sh\nssh -o StrictHostKeyChecking=no -i \'%s\' -F /dev/null "${@}"\n' % ssh) 939 try: 940 os.chmod(fp.name, 0500) 941 had = 'GIT_SSH' in os.environ 942 old = os.getenv('GIT_SSH') 943 os.environ['GIT_SSH'] = fp.name 944 945 yield 946 947 del os.environ['GIT_SSH'] 948 if had: 949 os.environ['GIT_SSH'] = old 950 finally: 951 os.unlink(fp.name) 952 953 954 def maybe_upload_podspec(call, artifacts, gsutil, getenv): 955 """ Attempt to read our own podspec and upload it to the artifacts dir. """ 956 if not getenv(K8S_ENV): 957 return # we don't appear to be a pod 958 hostname = getenv('HOSTNAME') 959 if not hostname: 960 return 961 spec = call(['kubectl', 'get', '-oyaml', 'pods/' + hostname], output=True) 962 gsutil.upload_text( 963 os.path.join(artifacts, 'prow_podspec.yaml'), spec) 964 965 966 def setup_root(call, root, repos, ssh, git_cache, clean): 967 """Create root dir, checkout repo and cd into resulting dir.""" 968 if not os.path.exists(root): 969 os.makedirs(root) 970 root_dir = os.path.realpath(root) 971 logging.info('Root: %s', root_dir) 972 os.chdir(root_dir) 973 logging.info('cd to %s', root_dir) 974 975 # we want to checkout the correct repo for k-s/k but *everything* 976 # under the sun assumes $GOPATH/src/k8s.io/kubernetes so... :( 977 # after this method is called we've already computed the upload paths 978 # etc. so we can just swap it out for the desired path on disk 979 for repo, (branch, pull) in repos.items(): 980 os.chdir(root_dir) 981 # for k-s/k these are different, for the rest they are the same 982 # TODO(bentheelder,cjwagner,stevekuznetsov): in the integrated 983 # prow checkout support remapping checkouts and kill this monstrosity 984 repo_path = repo 985 if repo == "github.com/kubernetes-security/kubernetes": 986 repo_path = "k8s.io/kubernetes" 987 logging.info( 988 'Checkout: %s %s to %s', 989 os.path.join(root_dir, repo), 990 pull and pull or branch, 991 os.path.join(root_dir, repo_path)) 992 checkout(call, repo, repo_path, branch, pull, ssh, git_cache, clean) 993 # switch out the main repo for the actual path on disk if we are k-s/k 994 # from this point forward this is the path we want to use for everything 995 if repos.main == "github.com/kubernetes-security/kubernetes": 996 repos["k8s.io/kubernetes"], repos.main = repos[repos.main], "k8s.io/kubernetes" 997 if len(repos) > 1: # cd back into the primary repo 998 os.chdir(root_dir) 999 os.chdir(repos.main) 1000 1001 1002 class Repos(dict): 1003 """{"repo": (branch, pull)} dict with a .main attribute.""" 1004 main = '' 1005 1006 def __setitem__(self, k, v): 1007 if not self: 1008 self.main = k 1009 return super(Repos, self).__setitem__(k, v) 1010 1011 1012 def parse_repos(args): 1013 """Convert --repo=foo=this,123:abc,555:ddd into a Repos().""" 1014 repos = args.repo or {} 1015 if not repos and not args.bare: 1016 raise ValueError('--bare or --repo required') 1017 ret = Repos() 1018 if len(repos) != 1: 1019 if args.pull: 1020 raise ValueError( 1021 'Multi --repo does not support --pull, use --repo=R=branch,p1,p2') 1022 if args.branch: 1023 raise ValueError( 1024 'Multi --repo does not support --branch, use --repo=R=branch') 1025 elif len(repos) == 1 and (args.branch or args.pull): 1026 repo = repos[0] 1027 if '=' in repo or ':' in repo: 1028 raise ValueError( 1029 '--repo cannot contain = or : with --branch or --pull') 1030 ret[repo] = (args.branch, args.pull) 1031 return ret 1032 for repo in repos: 1033 mat = re.match( 1034 r'([^=]+)(=([^:,~^\s]+(:[0-9a-fA-F]+)?(:refs/changes/[0-9/]+)?(,|$))+)?$', repo) 1035 if not mat: 1036 raise ValueError('bad repo', repo, repos) 1037 this_repo = mat.group(1) 1038 if not mat.group(2): 1039 ret[this_repo] = ('master', '') 1040 continue 1041 commits = mat.group(2)[1:].split(',') 1042 if len(commits) == 1: 1043 # Checking out a branch, possibly at a specific commit 1044 ret[this_repo] = (commits[0], '') 1045 continue 1046 # Checking out one or more PRs 1047 ret[this_repo] = ('', ','.join(commits)) 1048 return ret 1049 1050 1051 def bootstrap(args): 1052 """Clone repo at pull/branch into root and run job script.""" 1053 # pylint: disable=too-many-locals,too-many-branches,too-many-statements 1054 job = args.job 1055 repos = parse_repos(args) 1056 upload = args.upload 1057 1058 build_log_path = os.path.abspath('build-log.txt') 1059 build_log = setup_logging(build_log_path) 1060 started = time.time() 1061 if args.timeout: 1062 end = started + args.timeout * 60 1063 else: 1064 end = 0 1065 call = lambda *a, **kw: _call(end, *a, **kw) 1066 gsutil = GSUtil(call) 1067 1068 logging.warning('bootstrap.py is deprecated!\n' 1069 'Please migrate your job to podutils!\n' 1070 'https://github.com/kubernetes/test-infra/blob/master/prow/pod-utilities.md' 1071 ) 1072 1073 if len(sys.argv) > 1: 1074 logging.info('Args: %s', ' '.join(pipes.quote(a) 1075 for a in sys.argv[1:])) 1076 logging.info('Bootstrap %s...', job) 1077 logging.info('Builder: %s', node()) 1078 if IMAGE_NAME_ENV in os.environ: 1079 logging.info('Image: %s', os.environ[IMAGE_NAME_ENV]) 1080 build = build_name(started) 1081 1082 if upload: 1083 # TODO(bentheelder, cjwager, stevekuznetsov): support the workspace 1084 # repo not matching the upload repo in the shiny new init container 1085 pull_ref_repos = [repo for repo in repos if repos[repo][1]] 1086 if pull_ref_repos: 1087 workspace_main, repos.main = repos.main, pull_ref_repos[0] 1088 paths = pr_paths(upload, repos, job, build) 1089 repos.main = workspace_main 1090 else: 1091 paths = ci_paths(upload, job, build) 1092 logging.info('Gubernator results at %s', gubernator_uri(paths)) 1093 # TODO(fejta): Replace env var below with a flag eventually. 1094 os.environ[GCS_ARTIFACTS_ENV] = paths.artifacts 1095 1096 version = 'unknown' 1097 exc_type = None 1098 1099 try: 1100 with configure_ssh_key(args.ssh): 1101 setup_credentials(call, args.service_account, upload) 1102 if upload: 1103 try: 1104 maybe_upload_podspec( 1105 call, paths.artifacts, gsutil, os.getenv) 1106 except (OSError, subprocess.CalledProcessError), exc: 1107 logging.error("unable to upload podspecs: %s", exc) 1108 setup_root(call, args.root, repos, args.ssh, 1109 args.git_cache, args.clean) 1110 logging.info('Configure environment...') 1111 setup_magic_environment(job, call) 1112 setup_credentials(call, args.service_account, upload) 1113 version = find_version(call) if repos else '' 1114 logging.info('Start %s at %s...', build, version) 1115 if upload: 1116 start(gsutil, paths, started, node(), version, repos) 1117 success = False 1118 try: 1119 call(job_script(job, args.scenario, args.extra_job_args)) 1120 logging.info('PASS: %s', job) 1121 success = True 1122 except subprocess.CalledProcessError: 1123 logging.error('FAIL: %s', job) 1124 except Exception: # pylint: disable=broad-except 1125 exc_type, exc_value, exc_traceback = sys.exc_info() 1126 logging.exception('unexpected error') 1127 success = False 1128 1129 # jobs can change service account, always set it back before we upload logs 1130 setup_credentials(call, args.service_account, upload) 1131 if upload: 1132 logging.info('Upload result and artifacts...') 1133 logging.info('Gubernator results at %s', gubernator_uri(paths)) 1134 try: 1135 finish( 1136 gsutil, paths, success, get_artifacts_dir(), 1137 build, version, repos, call 1138 ) 1139 except subprocess.CalledProcessError: # Still try to upload build log 1140 success = False 1141 logging.getLogger('').removeHandler(build_log) 1142 build_log.close() 1143 if upload: 1144 gsutil.copy_file(paths.build_log, build_log_path) 1145 if exc_type: 1146 raise exc_type, exc_value, exc_traceback # pylint: disable=raising-bad-type 1147 if not success: 1148 # TODO(fejta/spxtr): we should distinguish infra and non-infra problems 1149 # by exit code and automatically retrigger after an infra-problem. 1150 sys.exit(1) 1151 1152 1153 def parse_args(arguments=None): 1154 """Parse arguments or sys.argv[1:].""" 1155 if arguments is None: 1156 arguments = sys.argv[1:] 1157 parser = argparse.ArgumentParser() 1158 parser.add_argument('--root', default='.', help='Root dir to work with') 1159 parser.add_argument( 1160 '--timeout', type=float, default=0, help='Timeout in minutes if set') 1161 parser.add_argument( 1162 '--repo', 1163 action='append', 1164 help='Fetch the specified repositories, with the first one considered primary') 1165 parser.add_argument( 1166 '--bare', 1167 action='store_true', 1168 help='Do not check out a repository') 1169 parser.add_argument('--job', required=True, help='Name of the job to run') 1170 parser.add_argument( 1171 '--upload', 1172 help='Upload results here if set, requires --service-account') 1173 parser.add_argument( 1174 '--service-account', 1175 help='Activate and use path/to/service-account.json if set.') 1176 parser.add_argument( 1177 '--ssh', 1178 help='Use the ssh key to fetch the repository instead of https if set.') 1179 parser.add_argument( 1180 '--git-cache', 1181 help='Location of the git cache.') 1182 parser.add_argument( 1183 '--clean', 1184 action='store_true', 1185 help='Clean the git repo before running tests.') 1186 # TODO(krzyzacy): later we should merge prow+config.json 1187 # and utilize this flag 1188 parser.add_argument( 1189 '--scenario', 1190 help='Scenario to use, if not specified in config.json') 1191 # split out args after `--` as job arguments 1192 extra_job_args = [] 1193 if '--' in arguments: 1194 index = arguments.index('--') 1195 arguments, extra_job_args = arguments[:index], arguments[index+1:] 1196 args = parser.parse_args(arguments) 1197 setattr(args, 'extra_job_args', extra_job_args) 1198 # --pull is deprecated, use --repo=k8s.io/foo=master:abcd,12:ef12,45:ff65 1199 setattr(args, 'pull', None) 1200 # --branch is deprecated, use --repo=k8s.io/foo=master 1201 setattr(args, 'branch', None) 1202 if bool(args.repo) == bool(args.bare): 1203 raise argparse.ArgumentTypeError( 1204 'Expected --repo xor --bare:', args.repo, args.bare) 1205 return args 1206 1207 1208 if __name__ == '__main__': 1209 ARGS = parse_args() 1210 bootstrap(ARGS)