github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/hack/verify_boilerplate.py

github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/hack/verify_boilerplate.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2015 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Verifies that all source files contain the necessary copyright boilerplate
    18  # snippet.
    19  
    20  from __future__ import print_function
    21  
    22  import argparse
    23  import glob
    24  import os
    25  import re
    26  import sys
    27  
    28  
    29  def get_args():
    30      parser = argparse.ArgumentParser()
    31      parser.add_argument(
    32          "filenames", help="list of files to check, all files if unspecified", nargs='*')
    33  
    34      rootdir = os.path.dirname(__file__) + "/../"
    35      rootdir = os.path.abspath(rootdir)
    36      parser.add_argument("--rootdir", default=rootdir,
    37                          help="root directory to examine")
    38  
    39      default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
    40      parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
    41      return parser.parse_args()
    42  
    43  
    44  def get_refs():
    45      refs = {}
    46  
    47      for path in glob.glob(os.path.join(ARGS.boilerplate_dir, "boilerplate.*.txt")):
    48          extension = os.path.basename(path).split(".")[1]
    49  
    50          ref_file = open(path, 'r')
    51          ref = ref_file.read().splitlines()
    52          ref_file.close()
    53          refs[extension] = ref
    54  
    55      return refs
    56  
    57  
    58  GENERATED_GO_MARKERS = [
    59      "// Code generated by client-gen. DO NOT EDIT.",
    60      "// Code generated by deepcopy-gen. DO NOT EDIT.",
    61      "// Code generated by informer-gen. DO NOT EDIT.",
    62      "// Code generated by lister-gen. DO NOT EDIT.",
    63  ]
    64  
    65  # given the file contents, return true if the file appears to be generated
    66  
    67  
    68  def is_generated(data):
    69      for marker in GENERATED_GO_MARKERS:
    70          if marker in data:
    71              return True
    72      return False
    73  
    74  
    75  def file_passes(filename, refs, regexs):  # pylint: disable=too-many-locals
    76      try:
    77          with open(filename, 'r') as fp:
    78              data = fp.read()
    79      except IOError:
    80          return False
    81  
    82      basename = os.path.basename(filename)
    83      extension = file_extension(filename)
    84      if extension != "":
    85          ref = refs[extension]
    86      else:
    87          ref = refs[basename]
    88  
    89      # check for and skip generated files
    90      if is_generated(data):
    91          return True
    92  
    93      # remove build tags from the top of Go files
    94      if extension == "go":
    95          con = regexs["go_build_constraints"]
    96          (data, found) = con.subn("", data, 1)
    97  
    98      # remove shebang from the top of shell files
    99      if extension == "sh" or extension == "py":
   100          she = regexs["shebang"]
   101          (data, found) = she.subn("", data, 1)
   102  
   103      data = data.splitlines()
   104  
   105      # if our test file is smaller than the reference it surely fails!
   106      if len(ref) > len(data):
   107          return False
   108  
   109      # trim our file to the same number of lines as the reference file
   110      data = data[:len(ref)]
   111  
   112      year = regexs["year"]
   113      for datum in data:
   114          if year.search(datum):
   115              return False
   116  
   117      # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR"
   118      when = regexs["date"]
   119      for idx, datum in enumerate(data):
   120          (data[idx], found) = when.subn('YEAR', datum)
   121          if found != 0:
   122              break
   123  
   124      # if we don't match the reference at this point, fail
   125      if ref != data:
   126          return False
   127  
   128      return True
   129  
   130  
   131  def file_extension(filename):
   132      return os.path.splitext(filename)[1].split(".")[-1].lower()
   133  
   134  
   135  SKIPPED_DIRS = [
   136      'Godeps', 'third_party', '_gopath', '_output',
   137      '.git', 'vendor', '__init__.py', 'node_modules'
   138  ]
   139  
   140  # even when generated by bazel we will complain about some generated files
   141  # not having the headers. since they're just generated, ignore them
   142  IGNORE_HEADERS = [
   143      '// Code generated by go-bindata.'
   144  ]
   145  
   146  
   147  def has_ignored_header(pathname):
   148      with open(pathname, 'r') as myfile:
   149          data = myfile.read()
   150          for header in IGNORE_HEADERS:
   151              if data.startswith(header):
   152                  return True
   153      return False
   154  
   155  
   156  def normalize_files(files):
   157      newfiles = []
   158      for pathname in files:
   159          if any(x in pathname for x in SKIPPED_DIRS):
   160              continue
   161          newfiles.append(pathname)
   162      for idx, pathname in enumerate(newfiles):
   163          if not os.path.isabs(pathname):
   164              newfiles[idx] = os.path.join(ARGS.rootdir, pathname)
   165      return newfiles
   166  
   167  
   168  def get_files(extensions):
   169      files = []
   170      if ARGS.filenames:
   171          files = ARGS.filenames
   172      else:
   173          for root, dirs, walkfiles in os.walk(ARGS.rootdir):
   174              # don't visit certain dirs. This is just a performance improvement
   175              # as we would prune these later in normalize_files(). But doing it
   176              # cuts down the amount of filesystem walking we do and cuts down
   177              # the size of the file list
   178              for dpath in SKIPPED_DIRS:
   179                  if dpath in dirs:
   180                      dirs.remove(dpath)
   181  
   182              for name in walkfiles:
   183                  pathname = os.path.join(root, name)
   184                  files.append(pathname)
   185  
   186      files = normalize_files(files)
   187      outfiles = []
   188      for pathname in files:
   189          basename = os.path.basename(pathname)
   190          extension = file_extension(pathname)
   191          if extension in extensions or basename in extensions:
   192              if not has_ignored_header(pathname):
   193                  outfiles.append(pathname)
   194      return outfiles
   195  
   196  
   197  def get_regexs():
   198      regexs = {}
   199      # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
   200      regexs["year"] = re.compile('YEAR')
   201      # dates can be 2014, 2015, 2016 or 2017, company holder names can be anything
   202      regexs["date"] = re.compile('(2014|2015|2016|2017|2018)')
   203      # strip // +build \n\n build constraints
   204      regexs["go_build_constraints"] = re.compile(
   205          r"^(// \+build.*\n)+\n", re.MULTILINE)
   206      # strip #!.* from shell/python scripts
   207      regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
   208      return regexs
   209  
   210  
   211  def main():
   212      regexs = get_regexs()
   213      refs = get_refs()
   214      filenames = get_files(refs.keys())
   215      nonconforming_files = []
   216      for filename in filenames:
   217          if not file_passes(filename, refs, regexs):
   218              nonconforming_files.append(filename)
   219  
   220      if nonconforming_files:
   221          print('%d files have incorrect boilerplate headers:' %
   222                len(nonconforming_files))
   223          for filename in sorted(nonconforming_files):
   224              print(os.path.relpath(filename, ARGS.rootdir))
   225          sys.exit(1)
   226  
   227  
   228  if __name__ == "__main__":
   229      ARGS = get_args()
   230      main()