github.com/GuanceCloud/cliutils@v1.1.21/copyright.py (about) 1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 4 # This script copy from 5 # https://github.com/DataDog/datadog-agent/blob/main/tasks/libs/copyright.py 6 # We just made some adjust according to specific conditions. 7 8 import re 9 import subprocess 10 import sys 11 import argparse 12 from pathlib import Path, PurePosixPath 13 14 GLOB_PATTERN = "**/*.go" 15 16 COPYRIGHT_HEADER = """ 17 // Unless explicitly stated otherwise all files in this repository are licensed 18 // under the MIT License. 19 // This product includes software developed at Guance Cloud (https://www.guance.com/). 20 // Copyright 2021-present Guance, Inc. 21 """.strip() 22 23 COPYRIGHT_REGEX = [ 24 r'^// Unless explicitly stated otherwise all files in this repository are licensed$', 25 r'^// under the MIT License\.$', 26 r'^// This product includes software developed at Guance Cloud \(https://www\.guance\.com/\)\.$', 27 r'^// Copyright 20[1-3][0-9]-([Pp]resent|20[1-3][0-9]) Guance, (Inc|Inmetrics)\.$', 28 ] 29 30 # These path patterns are excluded from checks 31 PATH_EXCLUSION_REGEX = [ 32 # These are auto-generated files but without headers to indicate it 33 '/vendor', 34 'datakit/plugins/inputs/skywalking/compiled', 35 '/plugins/externals/ebpf', 36 '/plugins/inputs/skywalking/v3', 37 '/internal/win_utils/pdh', 38 '/internal/obfuscate', 39 '/internal/msgpack', 40 '/internal/obfuscate', 41 '/pipeline/grok', 42 '/pipeline/core/parser/lex_test.go', 43 '/pipeline/core/parser/strutil.go', 44 '/io/cachedata.pb.go', 45 '/.git/', 46 '/git/', 47 ] 48 49 # These header matchers skip enforcement of the rules if found in the first 50 # line of the file 51 HEADER_EXCLUSION_REGEX = [ 52 '^// Code generated ', 53 '^//go:generate ', 54 '^// AUTOGENERATED FILE: ', 55 '^// Copyright.* OpenTelemetry Authors', 56 '^// Copyright.* The Go Authors', 57 '^// This file includes software developed at CoreOS', 58 '^// Copyright 2017 Kinvolk', 59 ] 60 61 62 COMPILED_COPYRIGHT_REGEX = [re.compile(regex, re.UNICODE) for regex in COPYRIGHT_REGEX] 63 COMPILED_PATH_EXCLUSION_REGEX = [re.compile(regex, re.UNICODE) for regex in PATH_EXCLUSION_REGEX] 64 COMPILED_HEADER_EXCLUSION_REGEX = [re.compile(regex, re.UNICODE) for regex in HEADER_EXCLUSION_REGEX] 65 66 67 class CopyrightLinter: 68 """ 69 This class is used to enforce copyright headers on specified file patterns 70 """ 71 72 def __init__(self, debug=False): 73 self._debug = debug 74 75 @staticmethod 76 def _get_repo_dir(): 77 script_dir = PurePosixPath(__file__).parent 78 79 repo_dir = ( 80 subprocess.check_output( 81 ['git', 'rev-parse', '--show-toplevel'], 82 cwd=script_dir, 83 ) 84 .decode(sys.stdout.encoding) 85 .strip() 86 ) 87 88 return PurePosixPath(repo_dir) 89 90 @staticmethod 91 def _is_excluded_path(filepath, exclude_matchers): 92 for matcher in exclude_matchers: 93 if re.search(matcher, filepath.as_posix()): 94 return True 95 96 return False 97 98 @staticmethod 99 def _get_matching_files(root_dir, glob_pattern, exclude=None): 100 if exclude is None: 101 exclude = [] 102 103 # Glob is a generator so we have to do the counting ourselves 104 all_matching_files_cnt = 0 105 106 filtered_files = [] 107 for filepath in Path(root_dir).glob(glob_pattern): 108 all_matching_files_cnt += 1 109 if not CopyrightLinter._is_excluded_path(filepath, exclude): 110 filtered_files.append(filepath) 111 112 excluded_files_cnt = all_matching_files_cnt - len(filtered_files) 113 print(f"[INFO] Excluding {excluded_files_cnt} files based on path filters!") 114 115 return sorted(filtered_files) 116 117 @staticmethod 118 def _get_header(filepath): 119 header = [] 120 with open(filepath, "r") as file_obj: 121 # We expect a specific header format which should be 4 lines 122 for _ in range(4): 123 header.append(file_obj.readline().strip()) 124 125 return header 126 127 @staticmethod 128 def _is_excluded_header(header, exclude=None): 129 if exclude is None: 130 exclude = [] 131 132 for matcher in exclude: 133 if re.search(matcher, header[0]): 134 return True 135 136 return False 137 138 def _has_copyright(self, filepath): 139 header = CopyrightLinter._get_header(filepath) 140 if header is None: 141 print("[WARN] Mismatch found! Could not find any content in file!") 142 return False 143 144 if len(header) > 0 and CopyrightLinter._is_excluded_header(header, exclude=COMPILED_HEADER_EXCLUSION_REGEX): 145 if self._debug: 146 print(f"[INFO] Excluding {filepath} based on header '{header[0]}'") 147 return True 148 149 if len(header) <= 3: 150 print("[WARN] Mismatch found! File too small for header stanza!") 151 return False 152 153 for line_idx, matcher in enumerate(COMPILED_COPYRIGHT_REGEX): 154 if not re.match(matcher, header[line_idx]): 155 print( 156 f"[WARN] Mismatch found! Expected '{COPYRIGHT_REGEX[line_idx]}' pattern but got '{header[line_idx]}'" 157 ) 158 return False 159 160 return True 161 162 def _assert_copyrights(self, files): 163 failing_files = [] 164 for filepath in files: 165 if self._has_copyright(filepath): 166 if self._debug: 167 print(f"[ OK ] {filepath}") 168 169 continue 170 171 print(f"[FAIL] {filepath}") 172 failing_files.append(filepath) 173 174 total_files = len(files) 175 if failing_files: 176 pct_failing = (len(failing_files) / total_files) * 100 177 print() 178 print( 179 f"FAIL: There are {len(failing_files)} files out of " 180 + f"{total_files} ({pct_failing:.2f}%) that are missing the proper copyright!" 181 ) 182 183 return failing_files 184 185 def _prepend_header(self, filepath, dry_run=True): 186 with open(filepath, 'r+') as file_obj: 187 existing_content = file_obj.read() 188 189 if dry_run: 190 return True 191 192 file_obj.seek(0) 193 new_content = COPYRIGHT_HEADER + "\n\n" + existing_content 194 file_obj.write(new_content) 195 196 # Verify result. A problem here is not benign so we stop the whole run. 197 if not self._has_copyright(filepath): 198 raise Exception(f"[ERROR] Header prepend failed to produce correct output for {filepath}!") 199 200 return True 201 202 @staticmethod 203 def _is_build_header(line): 204 return line.startswith("// +build ") or line.startswith("//+build ") or line.startswith("//go:build ") 205 206 def _is_package_comment_or_nolint(line): 207 return line.startswith("// Package ") or line.startswith("//nolint") 208 209 def _fix_file_header(self, filepath, dry_run=True): 210 header = CopyrightLinter._get_header(filepath) 211 212 # Empty file - ignore 213 if len(header) < 1: 214 return False 215 216 # If the file starts with a comment and it's not a build comment, 217 # there is likely a manual fix to the header needed 218 if header[0].startswith("//") and not CopyrightLinter._is_build_header(header[0]) and not CopyrightLinter._is_package_comment_or_nolint(header[0]): 219 return False 220 221 if dry_run: 222 return True 223 224 return self._prepend_header(filepath, dry_run=dry_run) 225 226 def _fix(self, failing_files, dry_run=True): 227 failing_files_cnt = len(failing_files) 228 errors = [] 229 for idx, filepath in enumerate(failing_files): 230 print(f"[INFO] ({idx+1:3d}/{failing_files_cnt:3}) Fixing '{filepath}'...") 231 232 if not self._fix_file_header(filepath, dry_run=dry_run): 233 error_message = f"'{filepath}' could not be fixed!" 234 print(f"[WARN] ({idx+1:3d}/{failing_files_cnt:3}) {error_message}") 235 errors.append(Exception(error_message)) 236 237 return errors 238 239 def assert_compliance(self, fix=False, dry_run=True): 240 """ 241 This method applies the GLOB_PATTERN to the root of the repository and 242 verifies that all files have the expected copyright header. 243 """ 244 git_repo_dir = CopyrightLinter._get_repo_dir() 245 246 if self._debug: 247 print(f"[DEBG] Repo root: {git_repo_dir}") 248 print(f"[DEBG] Finding all files in {git_repo_dir} matching '{GLOB_PATTERN}'...") 249 250 matching_files = CopyrightLinter._get_matching_files( 251 git_repo_dir, 252 GLOB_PATTERN, 253 exclude=COMPILED_PATH_EXCLUSION_REGEX, 254 ) 255 print(f"[INFO] Found {len(matching_files)} files matching '{GLOB_PATTERN}'") 256 257 failing_files = self._assert_copyrights(matching_files) 258 if len(failing_files) > 0: 259 if not fix: 260 print("CHECK: FAIL") 261 raise Exception( 262 f"Copyright linting found {len(failing_files)} files that did not have the expected header!" 263 ) 264 265 # If "fix=True", we will attempt to fix the failing files 266 errors = self._fix(failing_files, dry_run=dry_run) 267 if errors: 268 raise Exception(f"Copyright linter was unable to fix {len(errors)}/{len(failing_files)} files!") 269 270 return 271 272 print("CHECK: OK") 273 274 275 if __name__ == '__main__': 276 parser = argparse.ArgumentParser() 277 parser.add_argument("--fix", dest="fix", action='store_true', help='auto add copyright to code') 278 parser.add_argument("--dry-run", dest="dry_run", action='store_true', help='dry run') 279 280 args = parser.parse_args() 281 #CopyrightLinter(debug=True).assert_compliance(fix=True, dry_run=False) 282 283 print(args) 284 285 CopyrightLinter(debug=True).assert_compliance(fix=args.fix, dry_run=args.dry_run)