github.com/GoogleCloudPlatform/testgrid@v0.0.174/hack/verify_boilerplate.py (about) 1 #!/usr/bin/env python3 2 3 # Copyright 2015 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # Verifies that all source files contain the necessary copyright boilerplate 18 # snippet. 19 20 import argparse 21 import datetime 22 import glob 23 import os 24 import re 25 import sys 26 27 AUTHORS = r"TestGrid|Kubernetes" 28 YEAR = r"YEAR" 29 30 def get_args(): 31 parser = argparse.ArgumentParser() 32 parser.add_argument( 33 "filenames", help="list of files to check, all files if unspecified", nargs='*') 34 35 rootdir = os.path.dirname(__file__) + "/../" 36 rootdir = os.path.abspath(rootdir) 37 parser.add_argument("--rootdir", default=rootdir, 38 help="root directory to examine") 39 40 default_boilerplate_dir = os.path.join(rootdir, "hack/boilerplate") 41 parser.add_argument("--boilerplate-dir", default=default_boilerplate_dir) 42 return parser.parse_args() 43 44 45 def get_refs(): 46 refs = {} 47 48 for path in glob.glob(os.path.join(ARGS.boilerplate_dir, "boilerplate.*.txt")): 49 extension = os.path.basename(path).split(".")[1] 50 51 # Pass the encoding parameter to avoid ascii decode error for some 52 # platform. 53 ref_file = open(path, 'r', encoding='utf-8') 54 ref = ref_file.read().splitlines() 55 ref_file.close() 56 refs[extension] = ref 57 58 return refs 59 60 61 GENERATED_GO_MARKERS = [ 62 "// Code generated by client-gen. DO NOT EDIT.", 63 "// Code generated by deepcopy-gen. DO NOT EDIT.", 64 "// Code generated by informer-gen. DO NOT EDIT.", 65 "// Code generated by lister-gen. DO NOT EDIT.", 66 "// Code generated by protoc-gen-go. DO NOT EDIT.", 67 ] 68 69 # given the file contents, return true if the file appears to be generated 70 71 72 def is_generated(data): 73 for marker in GENERATED_GO_MARKERS: 74 if marker in data: 75 return True 76 return False 77 78 79 def file_passes(filename, refs, regexs): # pylint: disable=too-many-locals 80 try: 81 # Pass the encoding parameter to avoid ascii decode error for some 82 # platform. 83 with open(filename, 'r', encoding='utf-8') as fp: 84 data = fp.read() 85 except IOError: 86 return False 87 88 basename = os.path.basename(filename) 89 extension = file_extension(filename) 90 if extension != "": 91 ref = refs[extension] 92 else: 93 ref = refs[basename] 94 95 # check for and skip generated files 96 if is_generated(data): 97 return True 98 99 # remove build tags from the top of Go files 100 if extension == "go": 101 con = regexs["go_build_constraints"] 102 (data, found) = con.subn("", data, 1) 103 104 # remove shebang from the top of shell files 105 if extension in ("sh", "py"): 106 she = regexs["shebang"] 107 (data, found) = she.subn("", data, 1) 108 109 data = data.splitlines() 110 111 # if our test file is smaller than the reference it surely fails! 112 if len(ref) > len(data): 113 return False 114 115 # trim our file to the same number of lines as the reference file 116 data = data[:len(ref)] 117 118 year = regexs["year"] 119 for datum in data: 120 if year.search(datum): 121 return False 122 123 # Replace all occurrences of the regex "2017|2016|2015|2014" with "YEAR" 124 when = regexs["date"] 125 for idx, datum in enumerate(data): 126 (data[idx], found) = when.subn("YEAR", datum) 127 if found != 0: 128 break 129 130 # Replace all occurrences of the regex "Testgrid|Kubernetes" with "AUTHOR" 131 author = regexs["author"] 132 for idx, datum in enumerate(data): 133 (data[idx], found) = author.subn("AUTHOR", datum) 134 if found != 0: 135 break 136 137 # if we don't match the reference at this point, fail 138 if ref != data: 139 return False 140 141 return True 142 143 144 def file_extension(filename): 145 return os.path.splitext(filename)[1].split(".")[-1].lower() 146 147 148 SKIPPED_DIRS = [ 149 'external', 150 '.git', 151 'Godeps', 152 '_gopath', 153 '__init__.py', 154 'node_modules', 155 '_output', 156 'third_party', 157 'vendor', 158 ] 159 160 # even when generated by bazel we will complain about some generated files 161 # not having the headers. since they're just generated, ignore them 162 IGNORE_HEADERS = [ 163 '// Code generated by go-bindata.' 164 ] 165 166 167 def has_ignored_header(pathname): 168 # Pass the encoding parameter to avoid ascii decode error for some 169 # platform. 170 with open(pathname, 'r', encoding='utf-8') as myfile: 171 data = myfile.read() 172 for header in IGNORE_HEADERS: 173 if data.startswith(header): 174 return True 175 return False 176 177 178 def normalize_files(files): 179 newfiles = [] 180 for pathname in files: 181 if any(x in pathname for x in SKIPPED_DIRS): 182 continue 183 newfiles.append(pathname) 184 for idx, pathname in enumerate(newfiles): 185 if not os.path.isabs(pathname): 186 newfiles[idx] = os.path.join(ARGS.rootdir, pathname) 187 return newfiles 188 189 190 def get_files(extensions): 191 files = [] 192 if ARGS.filenames: 193 files = ARGS.filenames 194 else: 195 for root, dirs, walkfiles in os.walk(ARGS.rootdir): 196 # don't visit certain dirs. This is just a performance improvement 197 # as we would prune these later in normalize_files(). But doing it 198 # cuts down the amount of filesystem walking we do and cuts down 199 # the size of the file list 200 for dpath in SKIPPED_DIRS: 201 if dpath in dirs: 202 dirs.remove(dpath) 203 204 for name in walkfiles: 205 pathname = os.path.join(root, name) 206 files.append(pathname) 207 208 files = normalize_files(files) 209 outfiles = [] 210 for pathname in files: 211 basename = os.path.basename(pathname) 212 extension = file_extension(pathname) 213 if extension in extensions or basename in extensions: 214 if not has_ignored_header(pathname): 215 outfiles.append(pathname) 216 return outfiles 217 218 219 def get_dates(): 220 years = datetime.datetime.now().year 221 return '(%s)' % '|'.join((str(year) for year in range(2014, years + 1))) 222 223 224 def get_regexs(): 225 regexs = {} 226 # Search for "YEAR" which exists in the boilerplate, but shouldn't in the real thing 227 regexs["year"] = re.compile(YEAR) 228 # Search for "AUTHOR" which exists in the boilerplate, but shouldn't in the real thing 229 regexs["author"] = re.compile(AUTHORS) 230 # dates can be 2014, 2015, 2016 or 2017, company holder names can be anything 231 regexs["date"] = re.compile(get_dates()) 232 # strip // +build \n\n build constraints 233 regexs["go_build_constraints"] = re.compile( 234 r"^(//go:build.*\n|// \+build.*\n)+\n", re.MULTILINE) 235 # strip #!.* from shell/python scripts 236 regexs["shebang"] = re.compile(r"^(#!.*\n)\n*", re.MULTILINE) 237 return regexs 238 239 240 def main(): 241 regexs = get_regexs() 242 refs = get_refs() 243 filenames = get_files(refs.keys()) 244 nonconforming_files = [] 245 for filename in filenames: 246 if not file_passes(filename, refs, regexs): 247 nonconforming_files.append(filename) 248 249 if nonconforming_files: 250 print('%d files have incorrect boilerplate headers:' % 251 len(nonconforming_files)) 252 for filename in sorted(nonconforming_files): 253 print(os.path.relpath(filename, ARGS.rootdir)) 254 sys.exit(1) 255 256 257 if __name__ == "__main__": 258 ARGS = get_args() 259 main()