github.com/jenkins-x/test-infra@v0.0.7/scenarios/kubernetes_janitor.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2017 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Need to figure out why this only fails on travis
    18  # pylint: disable=bad-continuation
    19  
    20  """Dig through jobs/FOO.env, and execute a janitor pass for each of the project"""
    21  
    22  import argparse
    23  import json
    24  import os
    25  import re
    26  import subprocess
    27  import sys
    28  
    29  ORIG_CWD = os.getcwd()  # Checkout changes cwd
    30  
    31  def test_infra(*paths):
    32      """Return path relative to root of test-infra repo."""
    33      return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths)
    34  
    35  
    36  def check(*cmd):
    37      """Log and run the command, raising on errors."""
    38      print >>sys.stderr, 'Run:', cmd
    39      subprocess.check_call(cmd)
    40  
    41  
    42  def parse_project(path):
    43      """Parse target env file and return GCP project name."""
    44      with open(path, 'r') as fp:
    45          env = fp.read()
    46      match = re.search(r'PROJECT=([^\n"]+)', env)
    47      if match:
    48          project = match.group(1)
    49          return project
    50      return None
    51  
    52  
    53  def clean_project(project, hours=24, dryrun=False, ratelimit=None):
    54      """Execute janitor for target GCP project """
    55      # Multiple jobs can share the same project, woooo
    56      if project in CHECKED:
    57          return
    58      CHECKED.add(project)
    59  
    60      cmd = ['python', test_infra('boskos/janitor/janitor.py'), '--project=%s' % project]
    61      cmd.append('--hour=%d' % hours)
    62      if dryrun:
    63          cmd.append('--dryrun')
    64      if ratelimit:
    65          cmd.append('--ratelimit=%d' % ratelimit)
    66  
    67      try:
    68          check(*cmd)
    69      except subprocess.CalledProcessError:
    70          FAILED.append(project)
    71  
    72  
    73  BLACKLIST = [
    74      'kubernetes-scale', # Let it's up/down job handle the resources
    75      'k8s-scale-testing', # As it can be running some manual experiments
    76      'k8s-jkns-e2e-gce-f8n-1-7', # federation projects should use fedtidy to clean up
    77      'k8s-jkns-e2e-gce-f8n-1-8', # federation projects should use fedtidy to clean up
    78  ]
    79  
    80  PR_PROJECTS = {
    81      # k8s-jkns-pr-bldr-e2e-gce-fdrtn
    82      # k8s-jkns-pr-cnry-e2e-gce-fdrtn
    83      # cleans up resources older than 3h
    84      # which is more than enough for presubmit jobs to finish.
    85      'k8s-jkns-pr-gce': 3,
    86      'k8s-jkns-pr-gce-bazel': 3,
    87      'k8s-jkns-pr-gce-etcd3': 3,
    88      'k8s-jkns-pr-gci-gce': 3,
    89      'k8s-jkns-pr-gci-gke': 3,
    90      'k8s-jkns-pr-gci-kubemark': 3,
    91      'k8s-jkns-pr-gke': 3,
    92      'k8s-jkns-pr-kubeadm': 3,
    93      'k8s-jkns-pr-kubemark': 3,
    94      'k8s-jkns-pr-node-e2e': 3,
    95      'k8s-jkns-pr-gce-gpus': 3,
    96      'k8s-gke-gpu-pr': 3,
    97  }
    98  
    99  SCALE_PROJECT = {
   100      'k8s-presubmit-scale': 3,
   101  }
   102  
   103  def check_predefine_jobs(jobs, ratelimit):
   104      """Handle predefined jobs"""
   105      for project, expire in jobs.iteritems():
   106          clean_project(project, hours=expire, ratelimit=ratelimit)
   107  
   108  def check_ci_jobs():
   109      """Handle CI jobs"""
   110      with open(test_infra('jobs/config.json')) as fp:
   111          config = json.load(fp)
   112  
   113      match_re = re.compile(r'--gcp-project=(.+)')
   114      for value in config.values():
   115          clean_hours = 24
   116          found = None
   117          for arg in value.get('args', []):
   118              # lifetime for soak cluster should be 7 days
   119              # clean up everything older than 10 days to prevent leak
   120              if '--soak' in arg:
   121                  clean_hours = 24 * 10
   122              mat = match_re.match(arg)
   123              if not mat:
   124                  continue
   125              project = mat.group(1)
   126              if any(b in project for b in BLACKLIST):
   127                  print >>sys.stderr, 'Project %r is blacklisted in ci-janitor' % project
   128                  continue
   129              if project in PR_PROJECTS or project in SCALE_PROJECT:
   130                  continue # CI janitor skips all PR jobs
   131              found = project
   132          if found:
   133              clean_project(found, clean_hours)
   134  
   135      # Hard code node-ci project here
   136      clean_project('k8s-jkns-ci-node-e2e')
   137  
   138  
   139  def main(mode, ratelimit, projects, age):
   140      """Run janitor for each project."""
   141      if mode == 'pr':
   142          check_predefine_jobs(PR_PROJECTS, ratelimit)
   143      elif mode == 'scale':
   144          check_predefine_jobs(SCALE_PROJECT, ratelimit)
   145      elif mode == 'custom':
   146          projs = str.split(projects, ',')
   147          for proj in projs:
   148              clean_project(proj.strip(), hours=age, ratelimit=ratelimit)
   149      else:
   150          check_ci_jobs()
   151  
   152      # Summary
   153      print 'Janitor checked %d project, %d failed to clean up.' % (len(CHECKED), len(FAILED))
   154      if FAILED:
   155          print >>sys.stderr, 'Failed projects: %r' % FAILED
   156          exit(1)
   157  
   158  
   159  if __name__ == '__main__':
   160      # keep some metric
   161      CHECKED = set()
   162      FAILED = []
   163      PARSER = argparse.ArgumentParser()
   164      PARSER.add_argument(
   165          '--mode', default='ci', choices=['ci', 'pr', 'scale', 'custom'],
   166          help='Which type of projects to clear')
   167      PARSER.add_argument(
   168          '--ratelimit', type=int,
   169          help='Max number of resources to clear in one gcloud delete call (passed into janitor.py)')
   170      PARSER.add_argument(
   171          '--projects', type=str,
   172          help='Comma separated list of projects to clean up. Only applicable in custom mode.')
   173      PARSER.add_argument(
   174          '--age', type=int,
   175          help='Expiry age for projects, in hours. Only applicable in custom mode.')
   176      ARGS = PARSER.parse_args()
   177      main(ARGS.mode, ARGS.ratelimit, ARGS.projects, ARGS.age)