github.com/abayer/test-infra@v0.0.5/hack/verify_boilerplate.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2015 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Verifies that all source files contain the necessary copyright boilerplate 18 # snippet. 19 20 from __future__ import print_function 21 22 import argparse 23 import glob 24 import os 25 import re 26 import sys 27 28 def get_args(): 29 parser = argparse.ArgumentParser() 30 parser.add_argument( 31 "filenames", help="list of files to check, all files if unspecified", nargs='*') 32 33 rootdir = os.path.dirname(__file__) + "/../" 34 rootdir = os.path.abspath(rootdir) 35 parser.add_argument("--rootdir", default=rootdir, help="root directory to examine") 36 37 default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate") 38 parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir) 39 return parser.parse_args() 40 41 42 def get_refs(): 43 refs = {} 44 45 for path in glob.glob(os.path.join(ARGS.boilerplate_dir, "boilerplate.*.txt")): 46 extension = os.path.basename(path).split(".")[1] 47 48 ref_file = open(path, 'r') 49 ref = ref_file.read().splitlines() 50 ref_file.close() 51 refs[extension] = ref 52 53 return refs 54 55 56 def file_passes(filename, refs, regexs): # pylint: disable=too-many-locals 57 try: 58 with open(filename, 'r') as fp: 59 data = fp.read() 60 except IOError: 61 return False 62 63 basename = os.path.basename(filename) 64 extension = file_extension(filename) 65 if extension != "": 66 ref = refs[extension] 67 else: 68 ref = refs[basename] 69 70 # remove build tags from the top of Go files 71 if extension == "go": 72 con = regexs["go_build_constraints"] 73 (data, found) = con.subn("", data, 1) 74 75 # remove shebang from the top of shell files 76 if extension == "sh" or extension == "py": 77 she = regexs["shebang"] 78 (data, found) = she.subn("", data, 1) 79 80 data = data.splitlines() 81 82 # if our test file is smaller than the reference it surely fails! 83 if len(ref) > len(data): 84 return False 85 86 # trim our file to the same number of lines as the reference file 87 data = data[:len(ref)] 88 89 year = regexs["year"] 90 for datum in data: 91 if year.search(datum): 92 return False 93 94 # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR" 95 when = regexs["date"] 96 for idx, datum in enumerate(data): 97 (data[idx], found) = when.subn('YEAR', datum) 98 if found != 0: 99 break 100 101 # if we don't match the reference at this point, fail 102 if ref != data: 103 return False 104 105 return True 106 107 def file_extension(filename): 108 return os.path.splitext(filename)[1].split(".")[-1].lower() 109 110 SKIPPED_DIRS = [ 111 'Godeps', 'third_party', '_gopath', '_output', 112 '.git', 'vendor', '__init__.py', 'node_modules' 113 ] 114 115 def normalize_files(files): 116 newfiles = [] 117 for pathname in files: 118 if any(x in pathname for x in SKIPPED_DIRS): 119 continue 120 newfiles.append(pathname) 121 for idx, pathname in enumerate(newfiles): 122 if not os.path.isabs(pathname): 123 newfiles[idx] = os.path.join(ARGS.rootdir, pathname) 124 return newfiles 125 126 127 def get_files(extensions): 128 files = [] 129 if ARGS.filenames: 130 files = ARGS.filenames 131 else: 132 for root, dirs, walkfiles in os.walk(ARGS.rootdir): 133 # don't visit certain dirs. This is just a performance improvement 134 # as we would prune these later in normalize_files(). But doing it 135 # cuts down the amount of filesystem walking we do and cuts down 136 # the size of the file list 137 for dpath in SKIPPED_DIRS: 138 if dpath in dirs: 139 dirs.remove(dpath) 140 141 for name in walkfiles: 142 pathname = os.path.join(root, name) 143 files.append(pathname) 144 145 files = normalize_files(files) 146 outfiles = [] 147 for pathname in files: 148 basename = os.path.basename(pathname) 149 extension = file_extension(pathname) 150 if extension in extensions or basename in extensions: 151 outfiles.append(pathname) 152 return outfiles 153 154 155 def get_regexs(): 156 regexs = {} 157 # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing 158 regexs["year"] = re.compile('YEAR') 159 # dates can be 2014, 2015, 2016 or 2017, company holder names can be anything 160 regexs["date"] = re.compile('(2014|2015|2016|2017|2018)') 161 # strip // +build \n\n build constraints 162 regexs["go_build_constraints"] = re.compile(r"^(// \+build.*\n)+\n", re.MULTILINE) 163 # strip #!.* from shell/python scripts 164 regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) 165 return regexs 166 167 168 def main(): 169 regexs = get_regexs() 170 refs = get_refs() 171 filenames = get_files(refs.keys()) 172 nonconforming_files = [] 173 for filename in filenames: 174 if not file_passes(filename, refs, regexs): 175 nonconforming_files.append(filename) 176 177 if nonconforming_files: 178 print('%d files have incorrect boilerplate headers:' % 179 len(nonconforming_files)) 180 for filename in sorted(nonconforming_files): 181 print(os.path.relpath(filename, ARGS.rootdir)) 182 sys.exit(1) 183 184 185 if __name__ == "__main__": 186 ARGS = get_args() 187 main()