github.com/abayer/test-infra@v0.0.5/hack/verify_boilerplate.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2015 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Verifies that all source files contain the necessary copyright boilerplate
    18  # snippet.
    19  
    20  from __future__ import print_function
    21  
    22  import argparse
    23  import glob
    24  import os
    25  import re
    26  import sys
    27  
    28  def get_args():
    29      parser = argparse.ArgumentParser()
    30      parser.add_argument(
    31          "filenames", help="list of files to check, all files if unspecified", nargs='*')
    32  
    33      rootdir = os.path.dirname(__file__) + "/../"
    34      rootdir = os.path.abspath(rootdir)
    35      parser.add_argument("--rootdir", default=rootdir, help="root directory to examine")
    36  
    37      default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
    38      parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
    39      return parser.parse_args()
    40  
    41  
    42  def get_refs():
    43      refs = {}
    44  
    45      for path in glob.glob(os.path.join(ARGS.boilerplate_dir, "boilerplate.*.txt")):
    46          extension = os.path.basename(path).split(".")[1]
    47  
    48          ref_file = open(path, 'r')
    49          ref = ref_file.read().splitlines()
    50          ref_file.close()
    51          refs[extension] = ref
    52  
    53      return refs
    54  
    55  
    56  def file_passes(filename, refs, regexs):  # pylint: disable=too-many-locals
    57      try:
    58          with open(filename, 'r') as fp:
    59              data = fp.read()
    60      except IOError:
    61          return False
    62  
    63      basename = os.path.basename(filename)
    64      extension = file_extension(filename)
    65      if extension != "":
    66          ref = refs[extension]
    67      else:
    68          ref = refs[basename]
    69  
    70      # remove build tags from the top of Go files
    71      if extension == "go":
    72          con = regexs["go_build_constraints"]
    73          (data, found) = con.subn("", data, 1)
    74  
    75      # remove shebang from the top of shell files
    76      if extension == "sh" or extension == "py":
    77          she = regexs["shebang"]
    78          (data, found) = she.subn("", data, 1)
    79  
    80      data = data.splitlines()
    81  
    82      # if our test file is smaller than the reference it surely fails!
    83      if len(ref) > len(data):
    84          return False
    85  
    86      # trim our file to the same number of lines as the reference file
    87      data = data[:len(ref)]
    88  
    89      year = regexs["year"]
    90      for datum in data:
    91          if year.search(datum):
    92              return False
    93  
    94      # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR"
    95      when = regexs["date"]
    96      for idx, datum in enumerate(data):
    97          (data[idx], found) = when.subn('YEAR', datum)
    98          if found != 0:
    99              break
   100  
   101      # if we don't match the reference at this point, fail
   102      if ref != data:
   103          return False
   104  
   105      return True
   106  
   107  def file_extension(filename):
   108      return os.path.splitext(filename)[1].split(".")[-1].lower()
   109  
   110  SKIPPED_DIRS = [
   111      'Godeps', 'third_party', '_gopath', '_output',
   112      '.git', 'vendor', '__init__.py', 'node_modules'
   113  ]
   114  
   115  def normalize_files(files):
   116      newfiles = []
   117      for pathname in files:
   118          if any(x in pathname for x in SKIPPED_DIRS):
   119              continue
   120          newfiles.append(pathname)
   121      for idx, pathname in enumerate(newfiles):
   122          if not os.path.isabs(pathname):
   123              newfiles[idx] = os.path.join(ARGS.rootdir, pathname)
   124      return newfiles
   125  
   126  
   127  def get_files(extensions):
   128      files = []
   129      if ARGS.filenames:
   130          files = ARGS.filenames
   131      else:
   132          for root, dirs, walkfiles in os.walk(ARGS.rootdir):
   133              # don't visit certain dirs. This is just a performance improvement
   134              # as we would prune these later in normalize_files(). But doing it
   135              # cuts down the amount of filesystem walking we do and cuts down
   136              # the size of the file list
   137              for dpath in SKIPPED_DIRS:
   138                  if dpath in dirs:
   139                      dirs.remove(dpath)
   140  
   141              for name in walkfiles:
   142                  pathname = os.path.join(root, name)
   143                  files.append(pathname)
   144  
   145      files = normalize_files(files)
   146      outfiles = []
   147      for pathname in files:
   148          basename = os.path.basename(pathname)
   149          extension = file_extension(pathname)
   150          if extension in extensions or basename in extensions:
   151              outfiles.append(pathname)
   152      return outfiles
   153  
   154  
   155  def get_regexs():
   156      regexs = {}
   157      # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
   158      regexs["year"] = re.compile('YEAR')
   159      # dates can be 2014, 2015, 2016 or 2017, company holder names can be anything
   160      regexs["date"] = re.compile('(2014|2015|2016|2017|2018)')
   161      # strip // +build \n\n build constraints
   162      regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE)
   163      # strip #!.* from shell/python scripts
   164      regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
   165      return regexs
   166  
   167  
   168  def main():
   169      regexs = get_regexs()
   170      refs = get_refs()
   171      filenames = get_files(refs.keys())
   172      nonconforming_files = []
   173      for filename in filenames:
   174          if not file_passes(filename, refs, regexs):
   175              nonconforming_files.append(filename)
   176  
   177      if nonconforming_files:
   178          print('%d files have incorrect boilerplate headers:' %
   179                len(nonconforming_files))
   180          for filename in sorted(nonconforming_files):
   181              print(os.path.relpath(filename, ARGS.rootdir))
   182          sys.exit(1)
   183  
   184  
   185  if __name__ == "__main__":
   186      ARGS = get_args()
   187      main()