github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/hack/verify_boilerplate.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2015 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Verifies that all source files contain the necessary copyright boilerplate 18 # snippet. 19 20 from __future__ import print_function 21 22 import argparse 23 import glob 24 import os 25 import re 26 import sys 27 28 29 def get_args(): 30 parser = argparse.ArgumentParser() 31 parser.add_argument( 32 "filenames", help="list of files to check, all files if unspecified", nargs='*') 33 34 rootdir = os.path.dirname(__file__) + "/../" 35 rootdir = os.path.abspath(rootdir) 36 parser.add_argument("--rootdir", default=rootdir, 37 help="root directory to examine") 38 39 default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate") 40 parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir) 41 return parser.parse_args() 42 43 44 def get_refs(): 45 refs = {} 46 47 for path in glob.glob(os.path.join(ARGS.boilerplate_dir, "boilerplate.*.txt")): 48 extension = os.path.basename(path).split(".")[1] 49 50 ref_file = open(path, 'r') 51 ref = ref_file.read().splitlines() 52 ref_file.close() 53 refs[extension] = ref 54 55 return refs 56 57 58 GENERATED_GO_MARKERS = [ 59 "// Code generated by client-gen. DO NOT EDIT.", 60 "// Code generated by deepcopy-gen. DO NOT EDIT.", 61 "// Code generated by informer-gen. DO NOT EDIT.", 62 "// Code generated by lister-gen. DO NOT EDIT.", 63 ] 64 65 # given the file contents, return true if the file appears to be generated 66 67 68 def is_generated(data): 69 for marker in GENERATED_GO_MARKERS: 70 if marker in data: 71 return True 72 return False 73 74 75 def file_passes(filename, refs, regexs): # pylint: disable=too-many-locals 76 try: 77 with open(filename, 'r') as fp: 78 data = fp.read() 79 except IOError: 80 return False 81 82 basename = os.path.basename(filename) 83 extension = file_extension(filename) 84 if extension != "": 85 ref = refs[extension] 86 else: 87 ref = refs[basename] 88 89 # check for and skip generated files 90 if is_generated(data): 91 return True 92 93 # remove build tags from the top of Go files 94 if extension == "go": 95 con = regexs["go_build_constraints"] 96 (data, found) = con.subn("", data, 1) 97 98 # remove shebang from the top of shell files 99 if extension == "sh" or extension == "py": 100 she = regexs["shebang"] 101 (data, found) = she.subn("", data, 1) 102 103 data = data.splitlines() 104 105 # if our test file is smaller than the reference it surely fails! 106 if len(ref) > len(data): 107 return False 108 109 # trim our file to the same number of lines as the reference file 110 data = data[:len(ref)] 111 112 year = regexs["year"] 113 for datum in data: 114 if year.search(datum): 115 return False 116 117 # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR" 118 when = regexs["date"] 119 for idx, datum in enumerate(data): 120 (data[idx], found) = when.subn('YEAR', datum) 121 if found != 0: 122 break 123 124 # if we don't match the reference at this point, fail 125 if ref != data: 126 return False 127 128 return True 129 130 131 def file_extension(filename): 132 return os.path.splitext(filename)[1].split(".")[-1].lower() 133 134 135 SKIPPED_DIRS = [ 136 'Godeps', 'third_party', '_gopath', '_output', 137 '.git', 'vendor', '__init__.py', 'node_modules' 138 ] 139 140 # even when generated by bazel we will complain about some generated files 141 # not having the headers. since they're just generated, ignore them 142 IGNORE_HEADERS = [ 143 '// Code generated by go-bindata.' 144 ] 145 146 147 def has_ignored_header(pathname): 148 with open(pathname, 'r') as myfile: 149 data = myfile.read() 150 for header in IGNORE_HEADERS: 151 if data.startswith(header): 152 return True 153 return False 154 155 156 def normalize_files(files): 157 newfiles = [] 158 for pathname in files: 159 if any(x in pathname for x in SKIPPED_DIRS): 160 continue 161 newfiles.append(pathname) 162 for idx, pathname in enumerate(newfiles): 163 if not os.path.isabs(pathname): 164 newfiles[idx] = os.path.join(ARGS.rootdir, pathname) 165 return newfiles 166 167 168 def get_files(extensions): 169 files = [] 170 if ARGS.filenames: 171 files = ARGS.filenames 172 else: 173 for root, dirs, walkfiles in os.walk(ARGS.rootdir): 174 # don't visit certain dirs. This is just a performance improvement 175 # as we would prune these later in normalize_files(). But doing it 176 # cuts down the amount of filesystem walking we do and cuts down 177 # the size of the file list 178 for dpath in SKIPPED_DIRS: 179 if dpath in dirs: 180 dirs.remove(dpath) 181 182 for name in walkfiles: 183 pathname = os.path.join(root, name) 184 files.append(pathname) 185 186 files = normalize_files(files) 187 outfiles = [] 188 for pathname in files: 189 basename = os.path.basename(pathname) 190 extension = file_extension(pathname) 191 if extension in extensions or basename in extensions: 192 if not has_ignored_header(pathname): 193 outfiles.append(pathname) 194 return outfiles 195 196 197 def get_regexs(): 198 regexs = {} 199 # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing 200 regexs["year"] = re.compile('YEAR') 201 # dates can be 2014, 2015, 2016 or 2017, company holder names can be anything 202 regexs["date"] = re.compile('(2014|2015|2016|2017|2018)') 203 # strip // +build \n\n build constraints 204 regexs["go_build_constraints"] = re.compile( 205 r"^(// \+build.*\n)+\n", re.MULTILINE) 206 # strip #!.* from shell/python scripts 207 regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) 208 return regexs 209 210 211 def main(): 212 regexs = get_regexs() 213 refs = get_refs() 214 filenames = get_files(refs.keys()) 215 nonconforming_files = [] 216 for filename in filenames: 217 if not file_passes(filename, refs, regexs): 218 nonconforming_files.append(filename) 219 220 if nonconforming_files: 221 print('%d files have incorrect boilerplate headers:' % 222 len(nonconforming_files)) 223 for filename in sorted(nonconforming_files): 224 print(os.path.relpath(filename, ARGS.rootdir)) 225 sys.exit(1) 226 227 228 if __name__ == "__main__": 229 ARGS = get_args() 230 main()