k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/scenarios/kubernetes_janitor.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2017 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Need to figure out why this only fails on travis
    18  # pylint: disable=bad-continuation
    19  
    20  """Dig through jobs/FOO.env, and execute a janitor pass for each of the project"""
    21  
    22  import argparse
    23  import json
    24  import os
    25  import re
    26  import subprocess
    27  import sys
    28  
    29  try:
    30      from junit_xml import TestSuite, TestCase
    31      HAS_JUNIT = True
    32  except ImportError:
    33      HAS_JUNIT = False
    34  
    35  ORIG_CWD = os.getcwd()  # Checkout changes cwd
    36  
    37  def test_infra(*paths):
    38      """Return path relative to root of test-infra repo."""
    39      return os.path.join(ORIG_CWD, os.path.dirname(__file__), '..', *paths)
    40  
    41  
    42  def check(*cmd):
    43      """Log and run the command, raising on errors."""
    44      print('Run:', cmd, file=sys.stderr)
    45      subprocess.check_call(cmd)
    46  
    47  
    48  def parse_project(path):
    49      """Parse target env file and return GCP project name."""
    50      with open(path, 'r') as fp:
    51          env = fp.read()
    52      match = re.search(r'PROJECT=([^\n"]+)', env)
    53      if match:
    54          project = match.group(1)
    55          return project
    56      return None
    57  
    58  
    59  def clean_project(project, hours=24, dryrun=False, ratelimit=None, filt=None, python='python3'):
    60      """Execute janitor for target GCP project """
    61      # Multiple jobs can share the same project, woooo
    62      if project in CHECKED:
    63          return
    64      CHECKED.add(project)
    65  
    66      cmd = [python, test_infra('boskos/cmd/janitor/gcp_janitor.py'), '--project=%s' % project]
    67      cmd.append('--hour=%d' % hours)
    68      if dryrun:
    69          cmd.append('--dryrun')
    70      if ratelimit:
    71          cmd.append('--ratelimit=%d' % ratelimit)
    72      if VERBOSE:
    73          cmd.append('--verbose')
    74      if filt:
    75          cmd.append('--filter=%s' % filt)
    76  
    77      try:
    78          check(*cmd)
    79      except subprocess.CalledProcessError:
    80          FAILED.append(project)
    81  
    82  
    83  EXEMPT_PROJECTS = [
    84      'kubernetes-scale', # Let it's up/down job handle the resources
    85      'k8s-scale-testing', # As it can be running some manual experiments
    86      'k8s-jkns-e2e-gce-f8n-1-7', # federation projects should use fedtidy to clean up
    87      'k8s-jkns-e2e-gce-f8n-1-8', # federation projects should use fedtidy to clean up
    88  ]
    89  
    90  PR_PROJECTS = {
    91      # k8s-jkns-pr-bldr-e2e-gce-fdrtn
    92      # k8s-jkns-pr-cnry-e2e-gce-fdrtn
    93      # cleans up resources older than 3h
    94      # which is more than enough for presubmit jobs to finish.
    95      'k8s-jkns-pr-gci-gke': 3,
    96      'k8s-jkns-pr-gke': 3,
    97      'k8s-jkns-pr-kubeadm': 3,
    98      'k8s-jkns-pr-node-e2e': 3,
    99      'k8s-jkns-pr-gce-gpus': 3,
   100      'cri-c8d-pr-node-e2e': 3,
   101  }
   102  
   103  def check_predefine_jobs(jobs, ratelimit):
   104      """Handle predefined jobs"""
   105      for project, expire in jobs.items():
   106          clean_project(project, hours=expire, ratelimit=ratelimit)
   107  
   108  def check_ci_jobs():
   109      """Handle CI jobs"""
   110      with open(test_infra('jobs/config.json')) as fp:
   111          config = json.load(fp)
   112  
   113      match_re = re.compile(r'--gcp-project=(.+)')
   114      for value in list(config.values()):
   115          clean_hours = 24
   116          found = None
   117          for arg in value.get('args', []):
   118              # lifetime for soak cluster should be 7 days
   119              # clean up everything older than 10 days to prevent leak
   120              if '--soak' in arg:
   121                  clean_hours = 24 * 10
   122              mat = match_re.match(arg)
   123              if not mat:
   124                  continue
   125              project = mat.group(1)
   126              if any(b in project for b in EXEMPT_PROJECTS):
   127                  print('Project %r is exempted in ci-janitor' % project, file=sys.stderr)
   128                  continue
   129              found = project
   130          if found:
   131              clean_project(found, clean_hours)
   132  
   133  
   134  def main(mode, ratelimit, projects, age, artifacts, filt):
   135      """Run janitor for each project."""
   136      if mode == 'pr':
   137          check_predefine_jobs(PR_PROJECTS, ratelimit)
   138      elif mode == 'custom':
   139          projs = str.split(projects, ',')
   140          for proj in projs:
   141              clean_project(proj.strip(), hours=age, ratelimit=ratelimit, filt=filt)
   142      else:
   143          check_ci_jobs()
   144  
   145      # Summary
   146      print('Janitor checked %d project, %d failed to clean up.' % (len(CHECKED), len(FAILED)))
   147      print(HAS_JUNIT)
   148      if artifacts:
   149          output = os.path.join(artifacts, 'junit_janitor.xml')
   150          if not HAS_JUNIT:
   151              print('Please install junit-xml (https://pypi.org/project/junit-xml/)')
   152          else:
   153              print('Generating junit output:')
   154              tcs = []
   155              for project in CHECKED:
   156                  tc = TestCase(project, 'kubernetes_janitor')
   157                  if project in FAILED:
   158                      # TODO(krzyzacy): pipe down stdout here as well
   159                      tc.add_failure_info('failed to clean up gcp project')
   160                  tcs.append(tc)
   161  
   162              ts = TestSuite('janitor', tcs)
   163              with open(output, 'w') as f:
   164                  TestSuite.to_file(f, [ts])
   165      if FAILED:
   166          print('Failed projects: %r' % FAILED, file=sys.stderr)
   167          exit(1)
   168  
   169  
   170  if __name__ == '__main__':
   171      # keep some metric
   172      CHECKED = set()
   173      FAILED = []
   174      VERBOSE = False
   175      PARSER = argparse.ArgumentParser()
   176      PARSER.add_argument(
   177          '--mode', default='ci', choices=['ci', 'pr', 'custom'],
   178          help='Which type of projects to clear')
   179      PARSER.add_argument(
   180          '--ratelimit', type=int,
   181          help='Max number of resources to clear in one gcloud delete call '
   182               '(passed into gcp_janitor.py)')
   183      PARSER.add_argument(
   184          '--projects', type=str,
   185          help='Comma separated list of projects to clean up. Only applicable in custom mode.')
   186      PARSER.add_argument(
   187          '--age', type=int,
   188          help='Expiry age for projects, in hours. Only applicable in custom mode.')
   189      PARSER.add_argument(
   190          '--verbose', action='store_true',
   191          help='If want more detailed logs from the janitor script.')
   192      PARSER.add_argument(
   193          '--artifacts',
   194          help='generate junit style xml to target path',
   195          default=os.environ.get('ARTIFACTS', None))
   196      PARSER.add_argument(
   197          '--filter',
   198          default=None,
   199          help='Filter down to these instances(passed into gcp_janitor.py)')
   200      ARGS = PARSER.parse_args()
   201      VERBOSE = ARGS.verbose
   202      main(ARGS.mode, ARGS.ratelimit, ARGS.projects, ARGS.age, ARGS.artifacts, ARGS.filter)