k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/hack/boilerplate/boilerplate.py

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/hack/boilerplate/boilerplate.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2015 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  import argparse
    18  import datetime
    19  import difflib
    20  import glob
    21  import os
    22  import re
    23  import sys
    24  
    25  parser = argparse.ArgumentParser()
    26  parser.add_argument(
    27      "filenames", help="list of files to check, all files if unspecified", nargs="*"
    28  )
    29  
    30  rootdir = os.path.dirname(__file__) + "/../../"
    31  rootdir = os.path.abspath(rootdir)
    32  parser.add_argument("--rootdir", default=rootdir, help="root directory to examine")
    33  
    34  default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
    35  parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
    36  
    37  parser.add_argument(
    38      "-v",
    39      "--verbose",
    40      help="give verbose output regarding why a file does not pass",
    41      action="store_true",
    42  )
    43  
    44  args = parser.parse_args()
    45  
    46  verbose_out = sys.stderr if args.verbose else open("/dev/null", "w")
    47  
    48  
    49  def get_refs():
    50      refs = {}
    51  
    52      for path in glob.glob(os.path.join(args.boilerplate_dir, "boilerplate.*.txt")):
    53          extension = os.path.basename(path).split(".")[1]
    54  
    55          with open(path, "r") as ref_file:
    56              refs[extension] = ref_file.read().splitlines()
    57  
    58      return refs
    59  
    60  
    61  def is_generated_file(data, regexs):
    62      return regexs["generated"].search(data)
    63  
    64  
    65  def file_passes(filename, refs, regexs):
    66      try:
    67          with open(filename) as stream:
    68              data = stream.read()
    69      except OSError as exc:
    70          print(f"Unable to open {filename}: {exc}", file=verbose_out)
    71          return False
    72  
    73      # determine if the file is automatically generated
    74      generated = is_generated_file(data, regexs)
    75  
    76      basename = os.path.basename(filename)
    77      extension = file_extension(filename)
    78      if generated:
    79          if extension == "go":
    80              extension = "generatego"
    81  
    82      if extension != "":
    83          ref = refs[extension]
    84      else:
    85          ref = refs[basename]
    86  
    87      # remove extra content from the top of files
    88      if extension in ("go", "generatego"):
    89          data, found = regexs["go_build_constraints"].subn("", data, 1)
    90      elif extension in ["sh", "py"]:
    91          data, found = regexs["shebang"].subn("", data, 1)
    92  
    93      data = data.splitlines()
    94  
    95      # if our test file is smaller than the reference it surely fails!
    96      if len(ref) > len(data):
    97          print(
    98              f"File {filename} smaller than reference ({len(data)} < {len(ref)})",
    99              file=verbose_out,
   100          )
   101          return False
   102  
   103      # trim our file to the same number of lines as the reference file
   104      data = data[: len(ref)]
   105  
   106      pattern = regexs["year"]
   107      for line in data:
   108          if pattern.search(line):
   109              if generated:
   110                  print(
   111                      f"File {filename} has the YEAR field, but it should not be in generated file",
   112                      file=verbose_out,
   113                  )
   114              else:
   115                  print(
   116                      "File {filename} has the YEAR field, but missing the year of date",
   117                      file=verbose_out,
   118                  )
   119              return False
   120  
   121      if not generated:
   122          # Replace all occurrences of the regex "2014|2015|2016|2017|2018" with "YEAR"
   123          pattern = regexs["date"]
   124          for i, line in enumerate(data):
   125              data[i], found = pattern.subn("YEAR", line)
   126              if found != 0:
   127                  break
   128  
   129      # if we don't match the reference at this point, fail
   130      if ref != data:
   131          print(f"Header in {filename} does not match reference, diff:", file=verbose_out)
   132          if args.verbose:
   133              print(file=verbose_out)
   134              for line in difflib.unified_diff(
   135                  ref, data, "reference", filename, lineterm=""
   136              ):
   137                  print(line, file=verbose_out)
   138              print(file=verbose_out)
   139          return False
   140  
   141      return True
   142  
   143  
   144  def file_extension(filename):
   145      return os.path.splitext(filename)[1].split(".")[-1].lower()
   146  
   147  
   148  skipped_names = [
   149      "third_party",
   150      "_output",
   151      ".git",
   152      "cluster/env.sh",
   153      "vendor",
   154      "testdata",
   155      "test/e2e/generated/bindata.go",
   156      "hack/boilerplate/test",
   157      "staging/src/k8s.io/kubectl/pkg/generated/bindata.go",
   158  ]
   159  
   160  
   161  def normalize_files(files):
   162      newfiles = []
   163      for pathname in files:
   164          if any(x in pathname for x in skipped_names):
   165              continue
   166          newfiles.append(pathname)
   167      for i, pathname in enumerate(newfiles):
   168          if not os.path.isabs(pathname):
   169              newfiles[i] = os.path.join(args.rootdir, pathname)
   170      return newfiles
   171  
   172  
   173  def get_files(extensions):
   174      files = []
   175      if len(args.filenames) > 0:
   176          files = args.filenames
   177      else:
   178          for root, dirs, walkfiles in os.walk(args.rootdir):
   179              # don't visit certain dirs. This is just a performance improvement
   180              # as we would prune these later in normalize_files(). But doing it
   181              # cuts down the amount of filesystem walking we do and cuts down
   182              # the size of the file list
   183              for dname in skipped_names:
   184                  if dname in dirs:
   185                      dirs.remove(dname)
   186              for dname in dirs:
   187                  # dirs that start with __ are ignored
   188                  if dname.startswith("__"):
   189                      dirs.remove(dname)
   190  
   191              for name in walkfiles:
   192                  pathname = os.path.join(root, name)
   193                  files.append(pathname)
   194  
   195      files = normalize_files(files)
   196      outfiles = []
   197      for pathname in files:
   198          basename = os.path.basename(pathname)
   199          extension = file_extension(pathname)
   200          if extension in extensions or basename in extensions:
   201              outfiles.append(pathname)
   202      return outfiles
   203  
   204  
   205  def get_dates():
   206      years = datetime.datetime.now().year
   207      return "(%s)" % "|".join(str(year) for year in range(2014, years + 1))
   208  
   209  
   210  def get_regexs():
   211      regexs = {}
   212      # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
   213      regexs["year"] = re.compile("YEAR")
   214      # get_dates return 2014, 2015, 2016, 2017, or 2018 until the current year
   215      # as a regex like: "(2014|2015|2016|2017|2018)";
   216      # company holder names can be anything
   217      regexs["date"] = re.compile(get_dates())
   218      # strip the following build constraints/tags:
   219      # //go:build
   220      # // +build \n\n
   221      regexs["go_build_constraints"] = re.compile(
   222          r"^(//(go:build| \+build).*\n)+\n", re.MULTILINE
   223      )
   224      # strip #!.* from scripts
   225      regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
   226      # Search for generated files
   227      regexs["generated"] = re.compile(r"^[/*#]+ +.* DO NOT EDIT\.$", re.MULTILINE)
   228      return regexs
   229  
   230  
   231  def main():
   232      regexs = get_regexs()
   233      refs = get_refs()
   234      filenames = get_files(refs)
   235  
   236      for filename in filenames:
   237          if not file_passes(filename, refs, regexs):
   238              print(filename)
   239  
   240      return 0
   241  
   242  
   243  if __name__ == "__main__":
   244      sys.exit(main())