k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/jenkins/bootstrap.py (about) 1 #!/usr/bin/env python3 2 3 # Copyright 2016 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Need to figure out why this only fails on travis 18 # pylint: disable=bad-continuation 19 20 """Bootstraps starting a test job. 21 22 The following should already be done: 23 git checkout http://k8s.io/test-infra 24 cd $WORKSPACE 25 test-infra/jenkins/bootstrap.py <--repo=R || --bare> <--job=J> <--pull=P || --branch=B> 26 27 The bootstrapper now does the following: 28 # Note start time 29 # check out repoes defined in --repo 30 # note job started 31 # call runner defined in $JOB.json 32 # upload artifacts (this will change later) 33 # upload build-log.txt 34 # note job ended 35 36 The contract with the runner is as follows: 37 * Runner must exit non-zero if job fails for any reason. 38 """ 39 40 41 import argparse 42 import contextlib 43 import json 44 import logging 45 import os 46 import random 47 import re 48 import select 49 import shlex 50 import signal 51 import socket 52 import subprocess 53 import sys 54 import tempfile 55 import time 56 import urllib.request, urllib.error, urllib.parse 57 58 ORIG_CWD = os.getcwd() # Checkout changes cwd 59 60 61 def read_all(end, stream, append): 62 """Read all buffered lines from a stream.""" 63 while not end or time.time() < end: 64 line = stream.readline() 65 if not line: 66 return True # Read everything 67 # Strip \n at the end if any. Last line of file may not have one. 68 # decode bytes to string 69 line = line.decode() 70 append(line.rstrip('\n')) 71 # Is there more on the buffer? 72 ret = select.select([stream.fileno()], [], [], 0.1) 73 if not ret[0]: 74 return False # Cleared buffer but not at the end 75 return False # Time expired 76 77 78 def elapsed(since): 79 """Return the number of minutes elapsed since a time.""" 80 return (time.time() - since) / 60 81 82 83 def terminate(end, proc, kill): 84 """Terminate or kill the process after end.""" 85 if not end or time.time() <= end: 86 return False 87 if kill: # Process will not die, kill everything 88 pgid = os.getpgid(proc.pid) 89 logging.info( 90 'Kill %d and process group %d', proc.pid, pgid) 91 os.killpg(pgid, signal.SIGKILL) 92 proc.kill() 93 return True 94 logging.info( 95 'Terminate %d on timeout', proc.pid) 96 proc.terminate() 97 return True 98 99 100 def _call(end, cmd, stdin=None, check=True, output=None, log_failures=True, env=None): # pylint: disable=too-many-locals 101 """Start a subprocess.""" 102 logging.info('Call: %s', ' '.join(shlex.quote(c) for c in cmd)) 103 begin = time.time() 104 if end: 105 end = max(end, time.time() + 60) # Allow at least 60s per command 106 proc = subprocess.Popen( 107 cmd, 108 stdin=subprocess.PIPE if stdin is not None else None, 109 stdout=subprocess.PIPE, 110 stderr=subprocess.PIPE, 111 preexec_fn=os.setsid, 112 env=env, 113 ) 114 if stdin: 115 proc.stdin.write(stdin) 116 proc.stdin.close() 117 out = [] 118 code = None 119 timeout = False 120 reads = { 121 proc.stderr.fileno(): (proc.stderr, logging.warning), 122 proc.stdout.fileno(): ( 123 proc.stdout, (out.append if output else logging.info)), 124 } 125 while reads: 126 if terminate(end, proc, timeout): 127 if timeout: # We killed everything 128 break 129 # Give subprocess some cleanup time before killing. 130 end = time.time() + 15 * 60 131 timeout = True 132 ret = select.select(reads, [], [], 0.1) 133 for fdesc in ret[0]: 134 if read_all(end, *reads[fdesc]): 135 reads.pop(fdesc) 136 if not ret[0] and proc.poll() is not None: 137 break # process exited without closing pipes (timeout?) 138 139 code = proc.wait() 140 if timeout: 141 code = code or 124 142 logging.error('Build timed out') 143 if code and log_failures: 144 logging.error('Command failed') 145 logging.info( 146 'process %d exited with code %d after %.1fm', 147 proc.pid, code, elapsed(begin)) 148 out.append('') 149 lines = output and '\n'.join(out) 150 if check and code: 151 raise subprocess.CalledProcessError(code, cmd, lines) 152 return lines 153 154 155 def ref_has_shas(ref): 156 """Determine if a reference specifies shas (contains ':')""" 157 return isinstance(ref, str) and ':' in ref 158 159 160 def pull_numbers(pull): 161 """Turn a pull reference list into a list of PR numbers to merge.""" 162 if ref_has_shas(pull): 163 return [r.split(':')[0] for r in pull.split(',')][1:] 164 return [str(pull)] 165 166 167 def pull_ref(pull): 168 """Turn a PR number of list of refs into specific refs to fetch and check out.""" 169 if isinstance(pull, int) or ',' not in pull: 170 return ['+refs/pull/%d/merge' % int(pull)], ['FETCH_HEAD'] 171 pulls = pull.split(',') 172 refs = [] 173 checkouts = [] 174 for ref in pulls: 175 change_ref = None 176 if ':' in ref: # master:abcd or 1234:abcd or 1234:abcd:ref/for/pr 177 res = ref.split(':') 178 name = res[0] 179 sha = res[1] 180 if len(res) > 2: 181 change_ref = res[2] 182 elif not refs: # master 183 name, sha = ref, 'FETCH_HEAD' 184 else: 185 name = ref 186 sha = 'refs/pr/%s' % ref 187 188 checkouts.append(sha) 189 if not refs: # First ref should be branch to merge into 190 refs.append(name) 191 elif change_ref: # explicit change refs 192 refs.append(change_ref) 193 else: # PR numbers 194 num = int(name) 195 refs.append('+refs/pull/%d/head:refs/pr/%d' % (num, num)) 196 return refs, checkouts 197 198 199 def branch_ref(branch): 200 """Split branch:sha if necessary.""" 201 if ref_has_shas(branch): 202 split_refs = branch.split(':') 203 return [split_refs[0]], [split_refs[1]] 204 return [branch], ['FETCH_HEAD'] 205 206 207 def repository(repo, ssh): 208 """Return the url associated with the repo.""" 209 if repo.startswith('k8s.io/'): 210 repo = 'github.com/kubernetes/%s' % (repo[len('k8s.io/'):]) 211 elif repo.startswith('sigs.k8s.io/'): 212 repo = 'github.com/kubernetes-sigs/%s' % (repo[len('sigs.k8s.io/'):]) 213 elif repo.startswith('istio.io/'): 214 repo = 'github.com/istio/%s' % (repo[len('istio.io/'):]) 215 if ssh: 216 if ":" not in repo: 217 parts = repo.split('/', 1) 218 repo = '%s:%s' % (parts[0], parts[1]) 219 return 'git@%s' % repo 220 return 'https://%s' % repo 221 222 223 def random_sleep(attempt): 224 """Sleep 2**attempt seconds with a random fractional offset.""" 225 time.sleep(random.random() + attempt ** 2) 226 227 228 def auth_google_gerrit(git, call): 229 """authenticate to foo.googlesource.com""" 230 call([git, 'clone', 'https://gerrit.googlesource.com/gcompute-tools']) 231 call(['./gcompute-tools/git-cookie-authdaemon']) 232 233 234 def commit_date(git, commit, call): 235 try: 236 return call([git, 'show', '-s', '--format=format:%ct', commit], 237 output=True, log_failures=False) 238 except subprocess.CalledProcessError: 239 logging.warning('Unable to print commit date for %s', commit) 240 return None 241 242 243 def checkout(call, repo, repo_path, branch, pull, ssh='', git_cache='', clean=False): 244 """Fetch and checkout the repository at the specified branch/pull. 245 246 Note that repo and repo_path should usually be the same, but repo_path can 247 be set to a different relative path where repo should be checked out.""" 248 # pylint: disable=too-many-locals,too-many-branches 249 if bool(branch) == bool(pull): 250 raise ValueError('Must specify one of --branch or --pull') 251 252 if pull: 253 refs, checkouts = pull_ref(pull) 254 else: 255 refs, checkouts = branch_ref(branch) 256 257 git = 'git' 258 259 # auth to google gerrit instance 260 # TODO(krzyzacy): when migrate to init container we'll make a gerrit 261 # checkout image and move this logic there 262 if '.googlesource.com' in repo: 263 auth_google_gerrit(git, call) 264 265 if git_cache: 266 cache_dir = '%s/%s' % (git_cache, repo) 267 try: 268 os.makedirs(cache_dir) 269 except OSError: 270 pass 271 call([git, 'init', repo_path, '--separate-git-dir=%s' % cache_dir]) 272 call(['rm', '-f', '%s/index.lock' % cache_dir]) 273 else: 274 call([git, 'init', repo_path]) 275 os.chdir(repo_path) 276 277 if clean: 278 call([git, 'clean', '-dfx']) 279 call([git, 'reset', '--hard']) 280 281 # To make a merge commit, a user needs to be set. It's okay to use a dummy 282 # user here, since we're not exporting the history. 283 call([git, 'config', '--local', 'user.name', 'K8S Bootstrap']) 284 call([git, 'config', '--local', 'user.email', 'k8s_bootstrap@localhost']) 285 retries = 3 286 for attempt in range(retries): 287 try: 288 call([git, 'fetch', '--filter=blob:none', '--quiet', '--tags', repository(repo, ssh)] + refs) 289 break 290 except subprocess.CalledProcessError as cpe: 291 if attempt >= retries - 1: 292 raise 293 if cpe.returncode != 128: 294 raise 295 logging.warning('git fetch failed') 296 random_sleep(attempt) 297 call([git, 'checkout', '-B', 'test', checkouts[0]]) 298 299 # Lie about the date in merge commits: use sequential seconds after the 300 # commit date of the tip of the parent branch we're checking into. 301 merge_date = int(commit_date(git, 'HEAD', call) or time.time()) 302 303 git_merge_env = os.environ.copy() 304 for ref, head in list(zip(refs, checkouts))[1:]: 305 merge_date += 1 306 git_merge_env[GIT_AUTHOR_DATE_ENV] = str(merge_date) 307 git_merge_env[GIT_COMMITTER_DATE_ENV] = str(merge_date) 308 call(['git', 'merge', '--no-ff', '-m', 'Merge %s' % ref, head], 309 env=git_merge_env) 310 311 312 def repos_dict(repos): 313 """Returns {"repo1": "branch", "repo2": "pull"}.""" 314 return {r: b or p for (r, (b, p)) in list(repos.items())} 315 316 317 def start(gsutil, paths, stamp, node_name, version, repos): 318 """Construct and upload started.json.""" 319 data = { 320 'timestamp': int(stamp), 321 'node': node_name, 322 } 323 if version: 324 data['repo-version'] = version 325 data['version'] = version # TODO(fejta): retire 326 if repos: 327 pull = repos[repos.main] 328 if ref_has_shas(pull[1]): 329 data['pull'] = pull[1] 330 data['repos'] = repos_dict(repos) 331 if POD_ENV in os.environ: 332 data['metadata'] = {'pod': os.environ[POD_ENV]} 333 334 gsutil.upload_json(paths.started, data) 335 # Upload a link to the build path in the directory 336 if paths.pr_build_link: 337 gsutil.upload_text( 338 paths.pr_build_link, 339 paths.pr_path, 340 additional_headers=['-h', 'x-goog-meta-link: %s' % paths.pr_path] 341 ) 342 343 344 class GSUtil(object): 345 """A helper class for making gsutil commands.""" 346 gsutil = 'gsutil' 347 348 def __init__(self, call): 349 self.call = call 350 351 def stat(self, path): 352 """Return metadata about the object, such as generation.""" 353 cmd = [self.gsutil, 'stat', path] 354 return self.call(cmd, output=True, log_failures=False) 355 356 def ls(self, path): 357 """List a bucket or subdir.""" 358 cmd = [self.gsutil, 'ls', path] 359 return self.call(cmd, output=True) 360 361 def upload_json(self, path, jdict, generation=None): 362 """Upload the dictionary object to path.""" 363 if generation is not None: # generation==0 means object does not exist 364 gen = ['-h', 'x-goog-if-generation-match:%s' % generation] 365 else: 366 gen = [] 367 with tempfile.NamedTemporaryFile(mode='wt', encoding='utf-8', prefix='gsutil_') as fp: 368 json.dump(jdict, fp, indent=2) 369 fp.flush() 370 cmd = [ 371 self.gsutil, '-q', 372 '-h', 'Content-Type:application/json'] + gen + [ 373 'cp', fp.name, path] 374 self.call(cmd) 375 376 def copy_file(self, dest, orig, compress): 377 """Copy the file to the specified path using compressed encoding.""" 378 compress = ['-Z'] if compress else [] 379 cmd = [self.gsutil, '-q', 'cp'] + compress + [orig, dest] 380 self.call(cmd) 381 382 def upload_text(self, path, txt, additional_headers=None, cached=True): 383 """Copy the text to path, optionally disabling caching.""" 384 headers = ['-h', 'Content-Type:text/plain'] 385 if not cached: 386 headers += ['-h', 'Cache-Control:private, max-age=0, no-transform'] 387 if additional_headers: 388 headers += additional_headers 389 with tempfile.NamedTemporaryFile(mode='wt', encoding='utf-8', prefix='gsutil_') as fp: 390 fp.write(txt) 391 fp.flush() 392 cmd = [self.gsutil, '-q'] + headers + ['cp', fp.name, path] 393 self.call(cmd) 394 395 def cat(self, path, generation): 396 """Return contents of path#generation""" 397 cmd = [self.gsutil, '-q', 'cat', '%s#%s' % (path, generation)] 398 return self.call(cmd, output=True) 399 400 def upload_artifacts(self, gsutil, path, artifacts): 401 """Upload artifacts to the specified path.""" 402 # Upload artifacts 403 if not os.path.isdir(artifacts): 404 logging.warning('Artifacts dir %s is missing.', artifacts) 405 return 406 original_artifacts = artifacts 407 try: 408 # If remote path exists, it will create .../_artifacts subdir instead 409 gsutil.ls(path) 410 # Success means remote path exists 411 remote_base = os.path.basename(path) 412 local_base = os.path.basename(artifacts) 413 if remote_base != local_base: 414 # if basename are different, need to copy things over first. 415 localpath = artifacts.replace(local_base, remote_base) 416 os.rename(artifacts, localpath) 417 artifacts = localpath 418 path = path[:-len(remote_base + '/')] 419 except subprocess.CalledProcessError: 420 logging.warning('Remote dir %s not exist yet', path) 421 cmd = [ 422 self.gsutil, '-m', '-q', 423 '-o', 'GSUtil:use_magicfile=True', 424 'cp', '-r', '-c', '-z', 'log,txt,xml', 425 artifacts, path, 426 ] 427 self.call(cmd) 428 429 # rename the artifacts dir back 430 # other places still references the original artifacts dir 431 if original_artifacts != artifacts: 432 os.rename(artifacts, original_artifacts) 433 434 435 def append_result(gsutil, path, build, version, passed): 436 """Download a json list and append metadata about this build to it.""" 437 # TODO(fejta): delete the clone of this logic in upload-to-gcs.sh 438 # (this is update_job_result_cache) 439 end = time.time() + 300 # try for up to five minutes 440 errors = 0 441 while time.time() < end: 442 if errors: 443 random_sleep(min(errors, 3)) 444 try: 445 out = gsutil.stat(path) 446 gen = re.search(r'Generation:\s+(\d+)', out).group(1) 447 except subprocess.CalledProcessError: 448 gen = 0 449 if gen: 450 try: 451 cache = json.loads(gsutil.cat(path, gen)) 452 if not isinstance(cache, list): 453 raise ValueError(cache) 454 except ValueError as exc: 455 logging.warning('Failed to decode JSON: %s', exc) 456 cache = [] 457 except subprocess.CalledProcessError: # gen doesn't exist 458 errors += 1 459 continue 460 else: 461 cache = [] 462 cache.append({ 463 'version': version, # TODO(fejta): retire 464 'job-version': version, 465 'buildnumber': build, 466 'passed': bool(passed), 467 'result': 'SUCCESS' if passed else 'FAILURE', 468 }) 469 cache = cache[-300:] 470 try: 471 gsutil.upload_json(path, cache, generation=gen) 472 return 473 except subprocess.CalledProcessError: 474 logging.warning('Failed to append to %s#%s', path, gen) 475 errors += 1 476 477 478 def metadata(repos, artifacts, call): 479 """Return metadata associated for the build, including inside artifacts.""" 480 path = os.path.join(artifacts or '', 'metadata.json') 481 meta = None 482 if os.path.isfile(path): 483 try: 484 with open(path) as fp: 485 meta = json.loads(fp.read()) 486 except (IOError, ValueError): 487 logging.warning('Failed to open %s', path) 488 else: 489 logging.warning('metadata path %s does not exist', path) 490 491 if not meta or not isinstance(meta, dict): 492 logging.warning( 493 'metadata not found or invalid, init with empty metadata') 494 meta = {} 495 if repos: 496 meta['repo'] = repos.main 497 meta['repos'] = repos_dict(repos) 498 499 if POD_ENV in os.environ: 500 # HARDEN against metadata only being read from finished. 501 meta['pod'] = os.environ[POD_ENV] 502 503 try: 504 commit = call(['git', 'rev-parse', 'HEAD'], output=True) 505 if commit: 506 meta['repo-commit'] = commit.strip() 507 except subprocess.CalledProcessError: 508 pass 509 510 cwd = os.getcwd() 511 os.chdir(test_infra('.')) 512 try: 513 commit = call(['git', 'rev-parse', 'HEAD'], output=True) 514 if commit: 515 meta['infra-commit'] = commit.strip()[:9] 516 except subprocess.CalledProcessError: 517 pass 518 os.chdir(cwd) 519 520 return meta 521 522 523 def finish(gsutil, paths, success, artifacts, build, version, repos, call): 524 """ 525 Args: 526 paths: a Paths instance. 527 success: the build passed if true. 528 artifacts: a dir containing artifacts to upload. 529 build: identifier of this build. 530 version: identifies what version of the code the build tested. 531 repo: the target repo 532 """ 533 534 if os.path.isdir(artifacts) and any(f for _, _, f in os.walk(artifacts)): 535 try: 536 gsutil.upload_artifacts(gsutil, paths.artifacts, artifacts) 537 except subprocess.CalledProcessError: 538 logging.warning('Failed to upload artifacts') 539 else: 540 logging.warning('Missing local artifacts : %s', artifacts) 541 542 meta = metadata(repos, artifacts, call) 543 if not version: 544 version = meta.get('job-version') 545 if not version: # TODO(fejta): retire 546 version = meta.get('version') 547 # github.com/kubernetes/release/find_green_build depends on append_result() 548 # TODO(fejta): reconsider whether this is how we want to solve this problem. 549 append_result(gsutil, paths.result_cache, build, version, success) 550 if paths.pr_result_cache: 551 append_result(gsutil, paths.pr_result_cache, build, version, success) 552 553 data = { 554 # TODO(fejta): update utils.go in contrib to accept a float 555 'timestamp': int(time.time()), 556 'result': 'SUCCESS' if success else 'FAILURE', 557 'passed': bool(success), 558 'metadata': meta, 559 } 560 if version: 561 data['job-version'] = version 562 data['version'] = version # TODO(fejta): retire 563 gsutil.upload_json(paths.finished, data) 564 565 # Upload the latest build for the job. 566 # Do this last, since other tools expect the rest of the data to be 567 # published when this file is created. 568 for path in {paths.latest, paths.pr_latest}: 569 if path: 570 try: 571 gsutil.upload_text(path, str(build), cached=False) 572 except subprocess.CalledProcessError: 573 logging.warning('Failed to update %s', path) 574 575 576 def test_infra(*paths): 577 """Return path relative to root of test-infra repo.""" 578 return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths) 579 580 581 def node(): 582 """Return the name of the node running the build.""" 583 # TODO(fejta): jenkins sets the node name and our infra expect this value. 584 # TODO(fejta): Consider doing something different here. 585 if NODE_ENV not in os.environ: 586 host = socket.gethostname().split('.')[0] 587 try: 588 # Try reading the name of the VM we're running on, using the 589 # metadata server. 590 os.environ[NODE_ENV] = urllib.request.urlopen(urllib.request.Request( 591 'http://169.254.169.254/computeMetadata/v1/instance/name', 592 headers={'Metadata-Flavor': 'Google'})).read().decode('utf-8') 593 os.environ[POD_ENV] = host # We also want to log this. 594 except IOError: # Fallback. 595 os.environ[NODE_ENV] = host 596 return os.environ[NODE_ENV] 597 598 599 def find_version(call): 600 """Determine and return the version of the build.""" 601 # TODO(fejta): once job-version is functional switch this to 602 # git rev-parse [--short=N] HEAD^{commit} 603 version_file = 'version' 604 if os.path.isfile(version_file): 605 # e2e tests which download kubernetes use this path: 606 with open(version_file) as fp: 607 return fp.read().strip() 608 609 version_script = 'hack/lib/version.sh' 610 if os.path.isfile(version_script): 611 cmd = [ 612 'bash', '-c', ( 613 """ 614 set -o errexit 615 set -o nounset 616 export KUBE_ROOT=. 617 source %s 618 kube::version::get_version_vars 619 echo $KUBE_GIT_VERSION 620 """ % version_script) 621 ] 622 return call(cmd, output=True).strip() 623 624 return 'unknown' 625 626 627 class Paths(object): # pylint: disable=too-many-instance-attributes,too-few-public-methods 628 """Links to remote gcs-paths for uploading results.""" 629 630 def __init__( # pylint: disable=too-many-arguments 631 self, 632 artifacts, # artifacts folder (in build) 633 build_log, # build-log.txt (in build) 634 pr_path, # path to build 635 finished, # finished.json (metadata from end of build) 636 latest, # latest-build.txt (in job) 637 pr_build_link, # file containng pr_path (in job directory) 638 pr_latest, # latest-build.txt (in pr job) 639 pr_result_cache, # jobResultsCache.json (in pr job) 640 result_cache, # jobResultsCache.json (cache of latest results in job) 641 started, # started.json (metadata from start of build) 642 ): 643 self.artifacts = artifacts 644 self.build_log = build_log 645 self.pr_path = pr_path 646 self.finished = finished 647 self.latest = latest 648 self.pr_build_link = pr_build_link 649 self.pr_latest = pr_latest 650 self.pr_result_cache = pr_result_cache 651 self.result_cache = result_cache 652 self.started = started 653 654 655 def ci_paths(base, job, build): 656 """Return a Paths() instance for a continuous build.""" 657 latest = os.path.join(base, job, 'latest-build.txt') 658 return Paths( 659 artifacts=os.path.join(base, job, build, 'artifacts'), 660 build_log=os.path.join(base, job, build, 'build-log.txt'), 661 pr_path=None, 662 finished=os.path.join(base, job, build, 'finished.json'), 663 latest=latest, 664 pr_build_link=None, 665 pr_latest=None, 666 pr_result_cache=None, 667 result_cache=os.path.join(base, job, 'jobResultsCache.json'), 668 started=os.path.join(base, job, build, 'started.json'), 669 ) 670 671 672 def pr_paths(base, repos, job, build): 673 """Return a Paths() instance for a PR.""" 674 if not repos: 675 raise ValueError('repos is empty') 676 repo = repos.main 677 pull = str(repos[repo][1]) 678 if repo in ['k8s.io/kubernetes', 'kubernetes/kubernetes']: 679 prefix = '' 680 elif repo.startswith('k8s.io/'): 681 prefix = repo[len('k8s.io/'):] 682 elif repo.startswith('kubernetes/'): 683 prefix = repo[len('kubernetes/'):] 684 elif repo.startswith('github.com/'): 685 prefix = repo[len('github.com/'):].replace('/', '_') 686 else: 687 prefix = repo.replace('/', '_') 688 # Batch merges are those with more than one PR specified. 689 pr_nums = pull_numbers(pull) 690 if len(pr_nums) > 1: 691 pull = os.path.join(prefix, 'batch') 692 else: 693 pull = os.path.join(prefix, pr_nums[0]) 694 pr_path = os.path.join(base, 'pull', pull, job, build) 695 result_cache = os.path.join( 696 base, 'directory', job, 'jobResultsCache.json') 697 pr_result_cache = os.path.join( 698 base, 'pull', pull, job, 'jobResultsCache.json') 699 return Paths( 700 artifacts=os.path.join(pr_path, 'artifacts'), 701 build_log=os.path.join(pr_path, 'build-log.txt'), 702 pr_path=pr_path, 703 finished=os.path.join(pr_path, 'finished.json'), 704 latest=os.path.join(base, 'directory', job, 'latest-build.txt'), 705 pr_build_link=os.path.join(base, 'directory', job, '%s.txt' % build), 706 pr_latest=os.path.join(base, 'pull', pull, job, 'latest-build.txt'), 707 pr_result_cache=pr_result_cache, 708 result_cache=result_cache, 709 started=os.path.join(pr_path, 'started.json'), 710 ) 711 712 713 BUILD_ENV = 'BUILD_ID' 714 BOOTSTRAP_ENV = 'BOOTSTRAP_MIGRATION' 715 CLOUDSDK_ENV = 'CLOUDSDK_CONFIG' 716 GCE_KEY_ENV = 'JENKINS_GCE_SSH_PRIVATE_KEY_FILE' 717 GUBERNATOR = 'https://gubernator.k8s.io/build' 718 HOME_ENV = 'HOME' 719 JENKINS_HOME_ENV = 'JENKINS_HOME' 720 K8S_ENV = 'KUBERNETES_SERVICE_HOST' 721 JOB_ENV = 'JOB_NAME' 722 NODE_ENV = 'NODE_NAME' 723 POD_ENV = 'POD_NAME' 724 SERVICE_ACCOUNT_ENV = 'GOOGLE_APPLICATION_CREDENTIALS' 725 WORKSPACE_ENV = 'WORKSPACE' 726 GCS_ARTIFACTS_ENV = 'GCS_ARTIFACTS_DIR' 727 IMAGE_NAME_ENV = 'IMAGE' 728 GIT_AUTHOR_DATE_ENV = 'GIT_AUTHOR_DATE' 729 GIT_COMMITTER_DATE_ENV = 'GIT_COMMITTER_DATE' 730 SOURCE_DATE_EPOCH_ENV = 'SOURCE_DATE_EPOCH' 731 JOB_ARTIFACTS_ENV = 'ARTIFACTS' 732 733 734 def build_name(started): 735 """Return the unique(ish) string representing this build.""" 736 # TODO(fejta): right now jenkins sets the BUILD_ID and does this 737 # in an environment variable. Consider migrating this to a 738 # bootstrap.py flag 739 if BUILD_ENV not in os.environ: 740 # Automatically generate a build number if none is set 741 uniq = '%x-%d' % (hash(node()), os.getpid()) 742 autogen = time.strftime('%Y%m%d-%H%M%S-' + uniq, time.gmtime(started)) 743 os.environ[BUILD_ENV] = autogen 744 return os.environ[BUILD_ENV] 745 746 747 def setup_credentials(call, robot, upload): 748 """Activate the service account unless robot is none.""" 749 # TODO(fejta): stop activating inside the image 750 # TODO(fejta): allow use of existing gcloud auth 751 if robot: 752 os.environ[SERVICE_ACCOUNT_ENV] = robot 753 if not os.getenv(SERVICE_ACCOUNT_ENV) and upload: 754 logging.warning( 755 'Cannot --upload=%s, no active gcloud account.', upload) 756 raise ValueError('--upload requires --service-account') 757 if not os.getenv(SERVICE_ACCOUNT_ENV) and not upload: 758 logging.info('Will not upload results.') 759 return 760 if not os.path.isfile(os.environ[SERVICE_ACCOUNT_ENV]): 761 raise IOError( 762 'Cannot find service account credentials', 763 os.environ[SERVICE_ACCOUNT_ENV], 764 'Create service account and then create key at ' 765 'https://console.developers.google.com/iam-admin/serviceaccounts/project', # pylint: disable=line-too-long 766 ) 767 # this sometimes fails spuriously due to DNS flakiness, so we retry it 768 for _ in range(5): 769 try: 770 call([ 771 'gcloud', 772 'auth', 773 'activate-service-account', 774 '--key-file=%s' % os.environ[SERVICE_ACCOUNT_ENV], 775 ]) 776 break 777 except subprocess.CalledProcessError: 778 pass 779 sleep_for = 1 780 logging.info( 781 'Retrying service account activation in %.2fs ...', sleep_for) 782 time.sleep(sleep_for) 783 else: 784 raise Exception( 785 "Failed to activate service account, exhausted retries") 786 try: # Old versions of gcloud may not support this value 787 account = call( 788 ['gcloud', 'config', 'get-value', 'account'], output=True).strip() 789 except subprocess.CalledProcessError: 790 account = 'unknown' 791 logging.info('Will upload results to %s using %s', upload, account) 792 793 794 def setup_logging(path): 795 """Initialize logging to screen and path.""" 796 # See https://docs.python.org/2/library/logging.html#logrecord-attributes 797 # [IWEF]mmdd HH:MM:SS.mmm] msg 798 fmt = '%(levelname).1s%(asctime)s.%(msecs)03d] %(message)s' # pylint: disable=line-too-long 799 datefmt = '%m%d %H:%M:%S' 800 logging.basicConfig( 801 level=logging.INFO, 802 format=fmt, 803 datefmt=datefmt, 804 ) 805 build_log = logging.FileHandler(filename=path, mode='w') 806 build_log.setLevel(logging.INFO) 807 formatter = logging.Formatter(fmt, datefmt=datefmt) 808 build_log.setFormatter(formatter) 809 logging.getLogger('').addHandler(build_log) 810 return build_log 811 812 813 def get_artifacts_dir(): 814 return os.getenv( 815 JOB_ARTIFACTS_ENV, 816 os.path.join(os.getenv(WORKSPACE_ENV, os.getcwd()), '_artifacts')) 817 818 819 def setup_magic_environment(job, call): 820 """Set magic environment variables scripts currently expect.""" 821 home = os.environ[HOME_ENV] 822 # TODO(fejta): jenkins sets these values. Consider migrating to using 823 # a secret volume instead and passing the path to this volume 824 # into bootstrap.py as a flag. 825 os.environ.setdefault( 826 GCE_KEY_ENV, 827 os.path.join(home, '.ssh/google_compute_engine'), 828 ) 829 os.environ.setdefault( 830 'JENKINS_GCE_SSH_PUBLIC_KEY_FILE', 831 os.path.join(home, '.ssh/google_compute_engine.pub'), 832 ) 833 os.environ.setdefault( 834 'AWS_SSH_PRIVATE_KEY_FILE', 835 os.path.join(home, '.ssh/kube_aws_rsa'), 836 ) 837 os.environ.setdefault( 838 'AWS_SSH_PUBLIC_KEY_FILE', 839 os.path.join(home, '.ssh/kube_aws_rsa.pub'), 840 ) 841 842 cwd = os.getcwd() 843 # TODO(fejta): jenkins sets WORKSPACE and pieces of our infra expect this 844 # value. Consider doing something else in the future. 845 # Furthermore, in the Jenkins and Prow environments, this is already set 846 # to something reasonable, but using cwd will likely cause all sorts of 847 # problems. Thus, only set this if we really need to. 848 if WORKSPACE_ENV not in os.environ: 849 os.environ[WORKSPACE_ENV] = cwd 850 # By default, Jenkins sets HOME to JENKINS_HOME, which is shared by all 851 # jobs. To avoid collisions, set it to the cwd instead, but only when 852 # running on Jenkins. 853 if os.getenv(HOME_ENV) and os.getenv(HOME_ENV) == os.getenv(JENKINS_HOME_ENV): 854 os.environ[HOME_ENV] = cwd 855 # TODO(fejta): jenkins sets JOB_ENV and pieces of our infra expect this 856 # value. Consider making everything below here agnostic to the 857 # job name. 858 if JOB_ENV not in os.environ: 859 os.environ[JOB_ENV] = job 860 elif os.environ[JOB_ENV] != job: 861 logging.warning('%s=%s (overrides %s)', JOB_ENV, 862 job, os.environ[JOB_ENV]) 863 os.environ[JOB_ENV] = job 864 # TODO(fejta): Magic value to tell our test code not do upload started.json 865 # TODO(fejta): delete upload-to-gcs.sh and then this value. 866 os.environ[BOOTSTRAP_ENV] = 'yes' 867 # This helps prevent reuse of cloudsdk configuration. It also reduces the 868 # risk that running a job on a workstation corrupts the user's config. 869 os.environ[CLOUDSDK_ENV] = '%s/.config/gcloud' % cwd 870 871 # Set $ARTIFACTS to help migrate to podutils 872 os.environ[JOB_ARTIFACTS_ENV] = os.path.join( 873 os.getenv(WORKSPACE_ENV, os.getcwd()), '_artifacts') 874 875 # also make the artifacts dir if it doesn't exist yet 876 if not os.path.isdir(get_artifacts_dir()): 877 try: 878 os.makedirs(get_artifacts_dir()) 879 except OSError as exc: 880 logging.info( 881 'cannot create %s, continue : %s', get_artifacts_dir(), exc) 882 883 # Try to set SOURCE_DATE_EPOCH based on the commit date of the tip of the 884 # tree. 885 # This improves cacheability of stamped binaries. 886 head_commit_date = commit_date('git', 'HEAD', call) 887 if head_commit_date: 888 os.environ[SOURCE_DATE_EPOCH_ENV] = head_commit_date.strip() 889 890 891 def job_args(args): 892 """Converts 'a ${FOO} $bar' into 'a wildly different string'.""" 893 return [os.path.expandvars(a) for a in args] 894 895 896 def job_script(job, scenario, extra_job_args): 897 """Return path to script for job.""" 898 with open(test_infra('jobs/config.json')) as fp: 899 config = json.loads(fp.read()) 900 if job.startswith('pull-security-kubernetes-'): 901 job = job.replace('pull-security-kubernetes-', 'pull-kubernetes-', 1) 902 config_json_args = [] 903 if job in config: 904 job_config = config[job] 905 if not scenario: 906 scenario = job_config['scenario'] 907 config_json_args = job_config.get('args', []) 908 elif not scenario: 909 raise ValueError('cannot find scenario for job', job) 910 cmd = test_infra('scenarios/%s.py' % scenario) 911 return [cmd] + job_args(config_json_args + extra_job_args) 912 913 914 def gubernator_uri(paths): 915 """Return a gubernator link for this build.""" 916 job = os.path.dirname(paths.build_log) 917 if job.startswith('gs:/'): 918 return job.replace('gs:/', GUBERNATOR, 1) 919 return job 920 921 922 @contextlib.contextmanager 923 def configure_ssh_key(ssh): 924 """Creates a script for GIT_SSH that uses -i ssh if set.""" 925 if not ssh: # Nothing to do 926 yield 927 return 928 929 try: 930 os.makedirs(os.path.join(os.environ[HOME_ENV], '.ssh')) 931 except OSError as exc: 932 logging.info('cannot create $HOME/.ssh, continue : %s', exc) 933 except KeyError as exc: 934 logging.info('$%s does not exist, continue : %s', HOME_ENV, exc) 935 936 # Create a script for use with GIT_SSH, which defines the program git uses 937 # during git fetch. In the future change this to GIT_SSH_COMMAND 938 # https://superuser.com/questions/232373/how-to-tell-git-which-private-key-to-use 939 with tempfile.NamedTemporaryFile(prefix='ssh', delete=False) as fp: 940 fp.write( 941 '#!/bin/sh\nssh -o StrictHostKeyChecking=no -i \'%s\' -F /dev/null "${@}"\n' % ssh) 942 try: 943 os.chmod(fp.name, 0o500) 944 had = 'GIT_SSH' in os.environ 945 old = os.getenv('GIT_SSH') 946 os.environ['GIT_SSH'] = fp.name 947 948 yield 949 950 del os.environ['GIT_SSH'] 951 if had: 952 os.environ['GIT_SSH'] = old 953 finally: 954 os.unlink(fp.name) 955 956 957 def setup_root(call, root, repos, ssh, git_cache, clean): 958 """Create root dir, checkout repo and cd into resulting dir.""" 959 if not os.path.exists(root): 960 os.makedirs(root) 961 root_dir = os.path.realpath(root) 962 logging.info('Root: %s', root_dir) 963 os.chdir(root_dir) 964 logging.info('cd to %s', root_dir) 965 966 # we want to checkout the correct repo for k-s/k but *everything* 967 # under the sun assumes $GOPATH/src/k8s.io/kubernetes so... :( 968 # after this method is called we've already computed the upload paths 969 # etc. so we can just swap it out for the desired path on disk 970 for repo, (branch, pull) in list(repos.items()): 971 os.chdir(root_dir) 972 # for k-s/k these are different, for the rest they are the same 973 # TODO(cjwagner,stevekuznetsov): in the integrated 974 # prow checkout support remapping checkouts and kill this monstrosity 975 repo_path = repo 976 if repo == "github.com/kubernetes-security/kubernetes": 977 repo_path = "k8s.io/kubernetes" 978 logging.info( 979 'Checkout: %s %s to %s', 980 os.path.join(root_dir, repo), 981 pull and pull or branch, 982 os.path.join(root_dir, repo_path)) 983 checkout(call, repo, repo_path, branch, pull, ssh, git_cache, clean) 984 # switch out the main repo for the actual path on disk if we are k-s/k 985 # from this point forward this is the path we want to use for everything 986 if repos.main == "github.com/kubernetes-security/kubernetes": 987 repos["k8s.io/kubernetes"], repos.main = repos[repos.main], "k8s.io/kubernetes" 988 if len(repos) > 1: # cd back into the primary repo 989 os.chdir(root_dir) 990 os.chdir(repos.main) 991 992 993 class Repos(dict): 994 """{"repo": (branch, pull)} dict with a .main attribute.""" 995 main = '' 996 997 def __setitem__(self, k, v): 998 if not self: 999 self.main = k 1000 return super(Repos, self).__setitem__(k, v) 1001 1002 1003 def parse_repos(args): 1004 """Convert --repo=foo=this,123:abc,555:ddd into a Repos().""" 1005 repos = args.repo or {} 1006 if not repos and not args.bare: 1007 raise ValueError('--bare or --repo required') 1008 ret = Repos() 1009 if len(repos) != 1: 1010 if args.pull: 1011 raise ValueError( 1012 'Multi --repo does not support --pull, use --repo=R=branch,p1,p2') 1013 if args.branch: 1014 raise ValueError( 1015 'Multi --repo does not support --branch, use --repo=R=branch') 1016 elif len(repos) == 1 and (args.branch or args.pull): 1017 repo = repos[0] 1018 if '=' in repo or ':' in repo: 1019 raise ValueError( 1020 '--repo cannot contain = or : with --branch or --pull') 1021 ret[repo] = (args.branch, args.pull) 1022 return ret 1023 for repo in repos: 1024 mat = re.match( 1025 r'([^=]+)(=([^:,~^\s]+(:[0-9a-fA-F]+)?(:refs/changes/[0-9/]+)?(,|$))+)?$', repo) 1026 if not mat: 1027 raise ValueError('bad repo', repo, repos) 1028 this_repo = mat.group(1) 1029 if not mat.group(2): 1030 ret[this_repo] = ('master', '') 1031 continue 1032 commits = mat.group(2)[1:].split(',') 1033 if len(commits) == 1: 1034 # Checking out a branch, possibly at a specific commit 1035 ret[this_repo] = (commits[0], '') 1036 continue 1037 # Checking out one or more PRs 1038 ret[this_repo] = ('', ','.join(commits)) 1039 return ret 1040 1041 1042 def bootstrap(args): 1043 """Clone repo at pull/branch into root and run job script.""" 1044 # pylint: disable=too-many-locals,too-many-branches,too-many-statements 1045 job = args.job 1046 repos = parse_repos(args) 1047 upload = args.upload 1048 1049 build_log_path = os.path.abspath('build-log.txt') 1050 build_log = setup_logging(build_log_path) 1051 started = time.time() 1052 if args.timeout: 1053 end = started + args.timeout * 60 1054 else: 1055 end = 0 1056 call = lambda *a, **kw: _call(end, *a, **kw) 1057 gsutil = GSUtil(call) 1058 1059 logging.warning( 1060 '**************************************************************************\n' 1061 'bootstrap.py is deprecated!\n' 1062 'test-infra oncall does not support any job still using bootstrap.py.\n' 1063 'Please migrate your job to podutils!\n' 1064 'https://github.com/kubernetes/test-infra/blob/master/prow/pod-utilities.md\n' 1065 '**************************************************************************' 1066 ) 1067 1068 if len(sys.argv) > 1: 1069 logging.info('Args: %s', ' '.join(shlex.quote(a) 1070 for a in sys.argv[1:])) 1071 logging.info('Bootstrap %s...', job) 1072 logging.info('Builder: %s', node()) 1073 if IMAGE_NAME_ENV in os.environ: 1074 logging.info('Image: %s', os.environ[IMAGE_NAME_ENV]) 1075 build = build_name(started) 1076 1077 if upload: 1078 # TODO(cjwager, stevekuznetsov): support the workspace 1079 # repo not matching the upload repo in the shiny new init container 1080 pull_ref_repos = [repo for repo in repos if repos[repo][1]] 1081 if pull_ref_repos: 1082 workspace_main, repos.main = repos.main, pull_ref_repos[0] 1083 paths = pr_paths(upload, repos, job, build) 1084 repos.main = workspace_main 1085 else: 1086 paths = ci_paths(upload, job, build) 1087 logging.info('Gubernator results at %s', gubernator_uri(paths)) 1088 # TODO(fejta): Replace env var below with a flag eventually. 1089 os.environ[GCS_ARTIFACTS_ENV] = paths.artifacts 1090 1091 version = 'unknown' 1092 exc_type = None 1093 1094 try: 1095 with configure_ssh_key(args.ssh): 1096 setup_credentials(call, args.service_account, upload) 1097 setup_root(call, args.root, repos, args.ssh, 1098 args.git_cache, args.clean) 1099 logging.info('Configure environment...') 1100 setup_magic_environment(job, call) 1101 setup_credentials(call, args.service_account, upload) 1102 version = find_version(call) if repos else '' 1103 logging.info('Start %s at %s...', build, version) 1104 if upload: 1105 start(gsutil, paths, started, node(), version, repos) 1106 success = False 1107 try: 1108 call(job_script(job, args.scenario, args.extra_job_args)) 1109 logging.info('PASS: %s', job) 1110 success = True 1111 except subprocess.CalledProcessError: 1112 logging.error('FAIL: %s', job) 1113 except Exception: # pylint: disable=broad-except 1114 exc_type, exc_value, exc_traceback = sys.exc_info() 1115 logging.exception('unexpected error') 1116 success = False 1117 1118 # jobs can change service account, always set it back before we upload logs 1119 setup_credentials(call, args.service_account, upload) 1120 if upload: 1121 logging.info('Upload result and artifacts...') 1122 logging.info('Gubernator results at %s', gubernator_uri(paths)) 1123 try: 1124 finish( 1125 gsutil, paths, success, get_artifacts_dir(), 1126 build, version, repos, call 1127 ) 1128 except subprocess.CalledProcessError: # Still try to upload build log 1129 success = False 1130 logging.getLogger('').removeHandler(build_log) 1131 build_log.close() 1132 if upload: 1133 gsutil.copy_file(paths.build_log, build_log_path, args.compress) 1134 if exc_type: 1135 raise exc_type(exc_value).with_traceback(exc_traceback) # pylint: disable=raising-bad-type 1136 if not success: 1137 # TODO(fejta/spxtr): we should distinguish infra and non-infra problems 1138 # by exit code and automatically retrigger after an infra-problem. 1139 sys.exit(1) 1140 1141 1142 def parse_args(arguments=None): 1143 """Parse arguments or sys.argv[1:].""" 1144 if arguments is None: 1145 arguments = sys.argv[1:] 1146 parser = argparse.ArgumentParser() 1147 parser.add_argument('--root', default='.', help='Root dir to work with') 1148 parser.add_argument( 1149 '--timeout', type=float, default=0, help='Timeout in minutes if set') 1150 parser.add_argument( 1151 '--compress', 1152 action='store_true', 1153 help='Compress build-log.txt when set', 1154 ) 1155 parser.add_argument( 1156 '--repo', 1157 action='append', 1158 help='Fetch the specified repositories, with the first one considered primary') 1159 parser.add_argument( 1160 '--bare', 1161 action='store_true', 1162 help='Do not check out a repository') 1163 parser.add_argument('--job', required=True, help='Name of the job to run') 1164 parser.add_argument( 1165 '--upload', 1166 help='Upload results here if set, requires --service-account') 1167 parser.add_argument( 1168 '--service-account', 1169 help='Activate and use path/to/service-account.json if set.') 1170 parser.add_argument( 1171 '--ssh', 1172 help='Use the ssh key to fetch the repository instead of https if set.') 1173 parser.add_argument( 1174 '--git-cache', 1175 help='Location of the git cache.') 1176 parser.add_argument( 1177 '--clean', 1178 action='store_true', 1179 help='Clean the git repo before running tests.') 1180 # TODO(krzyzacy): later we should merge prow+config.json 1181 # and utilize this flag 1182 parser.add_argument( 1183 '--scenario', 1184 help='Scenario to use, if not specified in config.json') 1185 # split out args after `--` as job arguments 1186 extra_job_args = [] 1187 if '--' in arguments: 1188 index = arguments.index('--') 1189 arguments, extra_job_args = arguments[:index], arguments[index+1:] 1190 args = parser.parse_args(arguments) 1191 setattr(args, 'extra_job_args', extra_job_args) 1192 # --pull is deprecated, use --repo=k8s.io/foo=master:abcd,12:ef12,45:ff65 1193 setattr(args, 'pull', None) 1194 # --branch is deprecated, use --repo=k8s.io/foo=master 1195 setattr(args, 'branch', None) 1196 if bool(args.repo) == bool(args.bare): 1197 raise argparse.ArgumentTypeError( 1198 'Expected --repo xor --bare:', args.repo, args.bare) 1199 return args 1200 1201 1202 if __name__ == '__main__': 1203 ARGS = parse_args() 1204 bootstrap(ARGS)