github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/hack/boilerplate/verify_boilerplate.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2015 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Verifies that all source files contain the necessary copyright boilerplate
    18  # snippet.
    19  
    20  import argparse
    21  import datetime
    22  import glob
    23  import os
    24  import re
    25  import sys
    26  
    27  
    28  def get_args():
    29      parser = argparse.ArgumentParser()
    30      parser.add_argument(
    31          "filenames",
    32          help="list of files to check, all files if unspecified",
    33          nargs='*')
    34  
    35      rootdir = os.path.abspath('.')
    36      parser.add_argument("--rootdir",
    37                          default=rootdir,
    38                          help="root directory to examine")
    39  
    40      default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
    41      parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
    42  
    43      parser.add_argument(
    44          '--skip',
    45          default=[
    46              '_bin',
    47              'external/bazel_tools',
    48              '.git',
    49              'node_modules',
    50              '_output',
    51              'third_party',
    52              'vendor',
    53              'hack/boilerplate/test',
    54              'verify_boilerplate.py',
    55              '.python_virtual_env',
    56          ],
    57          action='append',
    58          help='Customize paths to avoid',
    59      )
    60      return parser.parse_args()
    61  
    62  
    63  def get_refs():
    64      refs = {}
    65  
    66      template_dir = ARGS.boilerplate_dir
    67      if not os.path.isdir(template_dir):
    68          template_dir = os.path.dirname(template_dir)
    69      for path in glob.glob(os.path.join(template_dir, "boilerplate.*.txt")):
    70          extension = os.path.basename(path).split(".")[1]
    71  
    72          # Pass the encoding parameter to avoid ascii decode error for some
    73          # platform.
    74          ref_file = open(path, 'r', encoding='utf-8')
    75          ref = ref_file.read().splitlines()
    76          ref_file.close()
    77          refs[extension] = ref
    78  
    79      return refs
    80  
    81  # given the file contents, return true if the file appears to be generated
    82  def is_generated(data):
    83      if re.search(r"^// Code generated by .*\. DO NOT EDIT\.$", data, re.MULTILINE):
    84          return True
    85      return False
    86  
    87  
    88  def file_passes(filename, refs, regexs):  # pylint: disable=too-many-locals,too-many-return-statements,too-many-branches
    89      try:
    90          # Pass the encoding parameter to avoid ascii decode error for some
    91          # platform.
    92          with open(filename, 'r', encoding='utf-8') as fp:
    93              file_data = fp.read()
    94      except IOError:
    95          return False
    96  
    97      if not file_data:
    98          return True  # Nothing to copyright in this empty file.
    99  
   100      basename = os.path.basename(filename)
   101      extension = file_extension(filename)
   102      if extension != "":
   103          ref = refs[extension]
   104      else:
   105          ref = refs[basename]
   106  
   107      ref = ref.copy()
   108  
   109      # remove build tags from the top of Go files
   110      if extension == "go":
   111          con = regexs["go_build_constraints"]
   112          (file_data, found) = con.subn("", file_data, 1)
   113  
   114      # remove shebang from the top of shell files
   115      if extension in ("sh", "py"):
   116          she = regexs["shebang"]
   117          (file_data, found) = she.subn("", file_data, 1)
   118  
   119      data = file_data.splitlines()
   120  
   121      # if our test file is smaller than the reference it surely fails!
   122      if len(ref) > len(data):
   123          return False
   124  
   125      # trim our file to the same number of lines as the reference file
   126      data = data[:len(ref)]
   127  
   128      # check if we encounter a 'YEAR' placeholder if the file is generated
   129      if is_generated(file_data):
   130          # pylint: disable=unused-variable
   131          for i, line in enumerate(data):
   132              if "Copyright YEAR" in line:
   133                  return False
   134          return True
   135  
   136      year = regexs["year"]
   137      for datum in data:
   138          if year.search(datum):
   139              return False
   140  
   141      # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR"
   142      when = regexs["date"]
   143      for idx, datum in enumerate(data):
   144          (data[idx], found) = when.subn('YEAR', datum)
   145          if found != 0:
   146              break
   147  
   148      # if we don't match the reference at this point, fail
   149      if ref != data:
   150          return False
   151  
   152      return True
   153  
   154  
   155  def file_extension(filename):
   156      return os.path.splitext(filename)[1].split(".")[-1].lower()
   157  
   158  
   159  # even when generated by bazel we will complain about some generated files
   160  # not having the headers. since they're just generated, ignore them
   161  IGNORE_HEADERS = ['// Code generated by go-bindata.']
   162  
   163  
   164  def has_ignored_header(pathname):
   165      # Pass the encoding parameter to avoid ascii decode error for some
   166      # platform.
   167      with open(pathname, 'r', encoding='utf-8') as myfile:
   168          data = myfile.read()
   169          for header in IGNORE_HEADERS:
   170              if data.startswith(header):
   171                  return True
   172      return False
   173  
   174  
   175  def normalize_files(files):
   176      newfiles = []
   177      for pathname in files:
   178          if any(x in pathname for x in ARGS.skip):
   179              continue
   180          newfiles.append(pathname)
   181      for idx, pathname in enumerate(newfiles):
   182          if not os.path.isabs(pathname):
   183              newfiles[idx] = os.path.join(ARGS.rootdir, pathname)
   184      return newfiles
   185  
   186  
   187  def get_files(extensions):
   188      files = []
   189      if ARGS.filenames:
   190          files = ARGS.filenames
   191      else:
   192          for root, dirs, walkfiles in os.walk(ARGS.rootdir):
   193              # don't visit certain dirs. This is just a performance improvement
   194              # as we would prune these later in normalize_files(). But doing it
   195              # cuts down the amount of filesystem walking we do and cuts down
   196              # the size of the file list
   197              for dpath in ARGS.skip:
   198                  if dpath in dirs:
   199                      dirs.remove(dpath)
   200  
   201              for name in walkfiles:
   202                  pathname = os.path.join(root, name)
   203                  files.append(pathname)
   204  
   205      files = normalize_files(files)
   206      outfiles = []
   207      for pathname in files:
   208          basename = os.path.basename(pathname)
   209          extension = file_extension(pathname)
   210          if extension in extensions or basename in extensions:
   211              if not has_ignored_header(pathname):
   212                  outfiles.append(pathname)
   213      return outfiles
   214  
   215  
   216  def get_dates():
   217      years = datetime.datetime.now().year
   218      return '(%s)' % '|'.join((str(year) for year in range(2014, years + 1)))
   219  
   220  
   221  def get_regexs():
   222      regexs = {}
   223      # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
   224      regexs["year"] = re.compile('YEAR')
   225      # dates can be any year between 2014 and the current year, company holder names can be anything
   226      regexs["date"] = re.compile(get_dates())
   227      # strip // +build \n\n build constraints
   228      regexs["go_build_constraints"] = re.compile(r"^(//( \+build|go:build).*\n)+\n",
   229                                                  re.MULTILINE)
   230      # strip #!.* from shell/python scripts
   231      regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
   232      return regexs
   233  
   234  
   235  def nonconforming_lines(files):
   236      yield '%d files have incorrect boilerplate headers:' % len(files)
   237      for fp in files:
   238          yield os.path.relpath(fp, ARGS.rootdir)
   239  
   240  
   241  def main():
   242      regexs = get_regexs()
   243      refs = get_refs()
   244      filenames = get_files(refs.keys())
   245      nonconforming_files = []
   246      for filename in sorted(filenames):
   247          if not file_passes(filename, refs, regexs):
   248              nonconforming_files.append(filename)
   249  
   250      if nonconforming_files:
   251          for line in nonconforming_lines(nonconforming_files):
   252              print(line)
   253          sys.exit(1)
   254  
   255      print("Verified %d files" % (len(filenames), ))
   256  
   257  
   258  if __name__ == "__main__":
   259      ARGS = get_args()
   260      main()