github.com/GoogleCloudPlatform/testgrid@v0.0.174/hack/verify_boilerplate.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2015 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Verifies that all source files contain the necessary copyright boilerplate
    18  # snippet.
    19  
    20  import argparse
    21  import datetime
    22  import glob
    23  import os
    24  import re
    25  import sys
    26  
    27  AUTHORS = r"TestGrid|Kubernetes"
    28  YEAR = r"YEAR"
    29  
    30  def get_args():
    31      parser = argparse.ArgumentParser()
    32      parser.add_argument(
    33          "filenames", help="list of files to check, all files if unspecified", nargs='*')
    34  
    35      rootdir = os.path.dirname(__file__) + "/../"
    36      rootdir = os.path.abspath(rootdir)
    37      parser.add_argument("--rootdir", default=rootdir,
    38                          help="root directory to examine")
    39  
    40      default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate")
    41      parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir)
    42      return parser.parse_args()
    43  
    44  
    45  def get_refs():
    46      refs = {}
    47  
    48      for path in glob.glob(os.path.join(ARGS.boilerplate_dir, "boilerplate.*.txt")):
    49          extension = os.path.basename(path).split(".")[1]
    50  
    51          # Pass the encoding parameter to avoid ascii decode error for some
    52          # platform.
    53          ref_file = open(path, 'r', encoding='utf-8')
    54          ref = ref_file.read().splitlines()
    55          ref_file.close()
    56          refs[extension] = ref
    57  
    58      return refs
    59  
    60  
    61  GENERATED_GO_MARKERS = [
    62      "// Code generated by client-gen. DO NOT EDIT.",
    63      "// Code generated by deepcopy-gen. DO NOT EDIT.",
    64      "// Code generated by informer-gen. DO NOT EDIT.",
    65      "// Code generated by lister-gen. DO NOT EDIT.",
    66      "// Code generated by protoc-gen-go. DO NOT EDIT.",
    67  ]
    68  
    69  # given the file contents, return true if the file appears to be generated
    70  
    71  
    72  def is_generated(data):
    73      for marker in GENERATED_GO_MARKERS:
    74          if marker in data:
    75              return True
    76      return False
    77  
    78  
    79  def file_passes(filename, refs, regexs):  # pylint: disable=too-many-locals
    80      try:
    81          # Pass the encoding parameter to avoid ascii decode error for some
    82          # platform.
    83          with open(filename, 'r', encoding='utf-8') as fp:
    84              data = fp.read()
    85      except IOError:
    86          return False
    87  
    88      basename = os.path.basename(filename)
    89      extension = file_extension(filename)
    90      if extension != "":
    91          ref = refs[extension]
    92      else:
    93          ref = refs[basename]
    94  
    95      # check for and skip generated files
    96      if is_generated(data):
    97          return True
    98  
    99      # remove build tags from the top of Go files
   100      if extension == "go":
   101          con = regexs["go_build_constraints"]
   102          (data, found) = con.subn("", data, 1)
   103  
   104      # remove shebang from the top of shell files
   105      if extension in ("sh", "py"):
   106          she = regexs["shebang"]
   107          (data, found) = she.subn("", data, 1)
   108  
   109      data = data.splitlines()
   110  
   111      # if our test file is smaller than the reference it surely fails!
   112      if len(ref) > len(data):
   113          return False
   114  
   115      # trim our file to the same number of lines as the reference file
   116      data = data[:len(ref)]
   117  
   118      year = regexs["year"]
   119      for datum in data:
   120          if year.search(datum):
   121              return False
   122  
   123      # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR"
   124      when = regexs["date"]
   125      for idx, datum in enumerate(data):
   126          (data[idx], found) = when.subn("YEAR", datum)
   127          if found != 0:
   128              break
   129  
   130      # Replace all occurrences of the regex "Testgrid|Kubernetes" with "AUTHOR"
   131      author = regexs["author"]
   132      for idx, datum in enumerate(data):
   133          (data[idx], found) = author.subn("AUTHOR", datum)
   134          if found != 0:
   135              break
   136  
   137      # if we don't match the reference at this point, fail
   138      if ref != data:
   139          return False
   140  
   141      return True
   142  
   143  
   144  def file_extension(filename):
   145      return os.path.splitext(filename)[1].split(".")[-1].lower()
   146  
   147  
   148  SKIPPED_DIRS = [
   149      'external',
   150      '.git',
   151      'Godeps',
   152      '_gopath',
   153      '__init__.py',
   154      'node_modules',
   155      '_output',
   156      'third_party',
   157      'vendor',
   158  ]
   159  
   160  # even when generated by bazel we will complain about some generated files
   161  # not having the headers. since they're just generated, ignore them
   162  IGNORE_HEADERS = [
   163      '// Code generated by go-bindata.'
   164  ]
   165  
   166  
   167  def has_ignored_header(pathname):
   168      # Pass the encoding parameter to avoid ascii decode error for some
   169      # platform.
   170      with open(pathname, 'r', encoding='utf-8') as myfile:
   171          data = myfile.read()
   172          for header in IGNORE_HEADERS:
   173              if data.startswith(header):
   174                  return True
   175      return False
   176  
   177  
   178  def normalize_files(files):
   179      newfiles = []
   180      for pathname in files:
   181          if any(x in pathname for x in SKIPPED_DIRS):
   182              continue
   183          newfiles.append(pathname)
   184      for idx, pathname in enumerate(newfiles):
   185          if not os.path.isabs(pathname):
   186              newfiles[idx] = os.path.join(ARGS.rootdir, pathname)
   187      return newfiles
   188  
   189  
   190  def get_files(extensions):
   191      files = []
   192      if ARGS.filenames:
   193          files = ARGS.filenames
   194      else:
   195          for root, dirs, walkfiles in os.walk(ARGS.rootdir):
   196              # don't visit certain dirs. This is just a performance improvement
   197              # as we would prune these later in normalize_files(). But doing it
   198              # cuts down the amount of filesystem walking we do and cuts down
   199              # the size of the file list
   200              for dpath in SKIPPED_DIRS:
   201                  if dpath in dirs:
   202                      dirs.remove(dpath)
   203  
   204              for name in walkfiles:
   205                  pathname = os.path.join(root, name)
   206                  files.append(pathname)
   207  
   208      files = normalize_files(files)
   209      outfiles = []
   210      for pathname in files:
   211          basename = os.path.basename(pathname)
   212          extension = file_extension(pathname)
   213          if extension in extensions or basename in extensions:
   214              if not has_ignored_header(pathname):
   215                  outfiles.append(pathname)
   216      return outfiles
   217  
   218  
   219  def get_dates():
   220      years = datetime.datetime.now().year
   221      return '(%s)' % '|'.join((str(year) for year in range(2014, years + 1)))
   222  
   223  
   224  def get_regexs():
   225      regexs = {}
   226      # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing
   227      regexs["year"] = re.compile(YEAR)
   228      # Search for "AUTHOR" which exists in the boilerplate, but shouldn't in the real thing
   229      regexs["author"] = re.compile(AUTHORS)
   230      # dates can be 2014, 2015, 2016 or 2017, company holder names can be anything
   231      regexs["date"] = re.compile(get_dates())
   232      # strip // +build \n\n build constraints
   233      regexs["go_build_constraints"] = re.compile(
   234              r"^(//go:build.*\n|// \+build.*\n)+\n", re.MULTILINE)
   235      # strip #!.* from shell/python scripts
   236      regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE)
   237      return regexs
   238  
   239  
   240  def main():
   241      regexs = get_regexs()
   242      refs = get_refs()
   243      filenames = get_files(refs.keys())
   244      nonconforming_files = []
   245      for filename in filenames:
   246          if not file_passes(filename, refs, regexs):
   247              nonconforming_files.append(filename)
   248  
   249      if nonconforming_files:
   250          print('%d files have incorrect boilerplate headers:' %
   251                len(nonconforming_files))
   252          for filename in sorted(nonconforming_files):
   253              print(os.path.relpath(filename, ARGS.rootdir))
   254          sys.exit(1)
   255  
   256  
   257  if __name__ == "__main__":
   258      ARGS = get_args()
   259      main()