github.com/jenkins-x/test-infra@v0.0.7/scenarios/kubernetes_janitor.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2017 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Need to figure out why this only fails on travis 18 # pylint: disable=bad-continuation 19 20 """Dig through jobs/FOO.env, and execute a janitor pass for each of the project""" 21 22 import argparse 23 import json 24 import os 25 import re 26 import subprocess 27 import sys 28 29 ORIG_CWD = os.getcwd() # Checkout changes cwd 30 31 def test_infra(*paths): 32 """Return path relative to root of test-infra repo.""" 33 return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths) 34 35 36 def check(*cmd): 37 """Log and run the command, raising on errors.""" 38 print >>sys.stderr, 'Run:', cmd 39 subprocess.check_call(cmd) 40 41 42 def parse_project(path): 43 """Parse target env file and return GCP project name.""" 44 with open(path, 'r') as fp: 45 env = fp.read() 46 match = re.search(r'PROJECT=([^\n"]+)', env) 47 if match: 48 project = match.group(1) 49 return project 50 return None 51 52 53 def clean_project(project, hours=24, dryrun=False, ratelimit=None): 54 """Execute janitor for target GCP project """ 55 # Multiple jobs can share the same project, woooo 56 if project in CHECKED: 57 return 58 CHECKED.add(project) 59 60 cmd = ['python', test_infra('boskos/janitor/janitor.py'), '--project=%s' % project] 61 cmd.append('--hour=%d' % hours) 62 if dryrun: 63 cmd.append('--dryrun') 64 if ratelimit: 65 cmd.append('--ratelimit=%d' % ratelimit) 66 67 try: 68 check(*cmd) 69 except subprocess.CalledProcessError: 70 FAILED.append(project) 71 72 73 BLACKLIST = [ 74 'kubernetes-scale', # Let it's up/down job handle the resources 75 'k8s-scale-testing', # As it can be running some manual experiments 76 'k8s-jkns-e2e-gce-f8n-1-7', # federation projects should use fedtidy to clean up 77 'k8s-jkns-e2e-gce-f8n-1-8', # federation projects should use fedtidy to clean up 78 ] 79 80 PR_PROJECTS = { 81 # k8s-jkns-pr-bldr-e2e-gce-fdrtn 82 # k8s-jkns-pr-cnry-e2e-gce-fdrtn 83 # cleans up resources older than 3h 84 # which is more than enough for presubmit jobs to finish. 85 'k8s-jkns-pr-gce': 3, 86 'k8s-jkns-pr-gce-bazel': 3, 87 'k8s-jkns-pr-gce-etcd3': 3, 88 'k8s-jkns-pr-gci-gce': 3, 89 'k8s-jkns-pr-gci-gke': 3, 90 'k8s-jkns-pr-gci-kubemark': 3, 91 'k8s-jkns-pr-gke': 3, 92 'k8s-jkns-pr-kubeadm': 3, 93 'k8s-jkns-pr-kubemark': 3, 94 'k8s-jkns-pr-node-e2e': 3, 95 'k8s-jkns-pr-gce-gpus': 3, 96 'k8s-gke-gpu-pr': 3, 97 } 98 99 SCALE_PROJECT = { 100 'k8s-presubmit-scale': 3, 101 } 102 103 def check_predefine_jobs(jobs, ratelimit): 104 """Handle predefined jobs""" 105 for project, expire in jobs.iteritems(): 106 clean_project(project, hours=expire, ratelimit=ratelimit) 107 108 def check_ci_jobs(): 109 """Handle CI jobs""" 110 with open(test_infra('jobs/config.json')) as fp: 111 config = json.load(fp) 112 113 match_re = re.compile(r'--gcp-project=(.+)') 114 for value in config.values(): 115 clean_hours = 24 116 found = None 117 for arg in value.get('args', []): 118 # lifetime for soak cluster should be 7 days 119 # clean up everything older than 10 days to prevent leak 120 if '--soak' in arg: 121 clean_hours = 24 * 10 122 mat = match_re.match(arg) 123 if not mat: 124 continue 125 project = mat.group(1) 126 if any(b in project for b in BLACKLIST): 127 print >>sys.stderr, 'Project %r is blacklisted in ci-janitor' % project 128 continue 129 if project in PR_PROJECTS or project in SCALE_PROJECT: 130 continue # CI janitor skips all PR jobs 131 found = project 132 if found: 133 clean_project(found, clean_hours) 134 135 # Hard code node-ci project here 136 clean_project('k8s-jkns-ci-node-e2e') 137 138 139 def main(mode, ratelimit, projects, age): 140 """Run janitor for each project.""" 141 if mode == 'pr': 142 check_predefine_jobs(PR_PROJECTS, ratelimit) 143 elif mode == 'scale': 144 check_predefine_jobs(SCALE_PROJECT, ratelimit) 145 elif mode == 'custom': 146 projs = str.split(projects, ',') 147 for proj in projs: 148 clean_project(proj.strip(), hours=age, ratelimit=ratelimit) 149 else: 150 check_ci_jobs() 151 152 # Summary 153 print 'Janitor checked %d project, %d failed to clean up.' % (len(CHECKED), len(FAILED)) 154 if FAILED: 155 print >>sys.stderr, 'Failed projects: %r' % FAILED 156 exit(1) 157 158 159 if __name__ == '__main__': 160 # keep some metric 161 CHECKED = set() 162 FAILED = [] 163 PARSER = argparse.ArgumentParser() 164 PARSER.add_argument( 165 '--mode', default='ci', choices=['ci', 'pr', 'scale', 'custom'], 166 help='Which type of projects to clear') 167 PARSER.add_argument( 168 '--ratelimit', type=int, 169 help='Max number of resources to clear in one gcloud delete call (passed into janitor.py)') 170 PARSER.add_argument( 171 '--projects', type=str, 172 help='Comma separated list of projects to clean up. Only applicable in custom mode.') 173 PARSER.add_argument( 174 '--age', type=int, 175 help='Expiry age for projects, in hours. Only applicable in custom mode.') 176 ARGS = PARSER.parse_args() 177 main(ARGS.mode, ARGS.ratelimit, ARGS.projects, ARGS.age)