github.com/verrazzano/verrazzano@v1.7.0/tools/copyright/copyright.go (about) 1 // Copyright (c) 2021, 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 package main 4 5 import ( 6 "bufio" 7 "bytes" 8 "encoding/csv" 9 "flag" 10 "fmt" 11 "os" 12 "path/filepath" 13 "regexp" 14 "sort" 15 "strconv" 16 "strings" 17 "time" 18 ) 19 20 // This program will accept a list of files and directories and scan all of the files found therin to make sure that 21 // they have the correct Oracle copyright header and UPL license headers. 22 // 23 // Internally, we manage a list of file extensions and relative file/directory names to ignore. We also load a list 24 // of ignore paths from the working directory of the program containing a list of paths relative to that working dir 25 // to explicitly ignore. 26 27 const ( 28 // ignoreFileDefaultName is the name of the special file that contains a list of files to ignore 29 ignoreFileDefaultName = "ignore_copyright_check.txt" 30 31 // maxLines is the maximum number of lines to read in a file before giving up 32 maxLines = 5 33 ) 34 35 var ( 36 // filesToSkip is a list of well-known filenames to skip while scanning, relative to the directory being scanned 37 filesToSkip = []string{ 38 ".gitlab-ci.yml", 39 "go.mod", 40 "go.sum", 41 "LICENSE", 42 "LICENSE.txt", 43 "THIRD_PARTY_LICENSES.txt", 44 "coverage.html", 45 "clair-scanner", 46 ".DS_Store", 47 } 48 49 // directoriesToShip is a list of well-known (sub)directories to skip while scanning, relative to the working 50 // directory being scanned 51 directoriesToSkip = []string{ 52 ".git", 53 "out", 54 "bin", 55 ".settings", 56 "thirdparty_licenses", 57 "vendor", 58 "_output", 59 "_gen", "target", 60 "node_modules", 61 } 62 63 // extensionsToSkip is a list of well-known file extensions that we will skip while scanning, including 64 // binary files and file types that do not support comments (like json) 65 extensionsToSkip = []string{ 66 ".json", 67 ".png", 68 ".csv", 69 ".ico", 70 ".md", 71 ".jpeg", 72 ".jpg", 73 ".log", 74 "-test-result.xml", 75 ".woff", 76 ".woff2", 77 ".ttf", 78 ".min.js", 79 ".min.css", 80 ".map", 81 ".cov", 82 ".iml", 83 ".jar", 84 ".zip", 85 ".gz", 86 ".test", 87 } 88 89 // copyrightRegex is the regular expression for recognizing correctly formatted copyright statements 90 // Explanation of the regular expression 91 // ------------------------------------- 92 // ^ matches start of the line 93 // (#|\/\/|<!--|\/\*) matches either a # character, or two / characters or the literal string "<!--", or "/*" 94 // Copyright matches the literal string " Copyright " 95 // \([cC]\) matches "(c)" or "(C)" 96 // ([1-2][0-9][0-9][0-9], ) matches a year in the range 1000-2999 followed by a comma and a space 97 // ?([1-2][0-9][0-9][0-9], ) matches an OPTIONAL second year in the range 1000-2999 followed by a comma and a space 98 // Oracle ... affiliates matches that literal string 99 // (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */" 100 // $ matches the end of the line 101 // the correct copyright line looks like this: 102 // Copyright (c) 2020, Oracle and/or its affiliates. 103 copyrightRegex = regexp.MustCompile(`^(#|\/\/|<!--|\/\*|<%--) Copyright \([cC]\) ([1-2][0-9][0-9][0-9], )?([1-2][0-9][0-9][0-9], )Oracle and\/or its affiliates(\.|\. -->|\. \*\/|\. --%>)$`) 104 105 // uplRegex is the regular express for recognizing correctly formatted UPL license headers 106 // Explanation of the regular expression 107 // ------------------------------------- 108 // ^ matches start of the line 109 // (#|\/\/|<!--|\/\*|<%--) matches either a # character, or two / characters or the literal string "<!--", "/*" or "<%--" 110 // Licensed ... licenses\\/upl matches that literal string 111 // (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */" or ". --%>" 112 // $ matches the end of the line 113 // the correct copyright line looks like this: 114 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 115 uplRegex = regexp.MustCompile(`^(#|\/\/|<!--|\/\*|<%--) Licensed under the Universal Permissive License v 1\.0 as shown at https:\/\/oss\.oracle\.com\/licenses\/upl(\.|\. -->|\. \*\/|\. --%>)$`) 116 117 // filesWithErrors Map to track files that failed the check with their error messages 118 filesWithErrors map[string][]string 119 120 // numFilesAnalyzed Total number of files analyzed 121 numFilesAnalyzed uint 122 123 // numFilesSkipped Total number of files skipped 124 numFilesSkipped uint 125 126 // numDirectoriesSkipped Total number of directories skipped 127 numDirectoriesSkipped uint 128 129 // filesToIgnore Files to ignore 130 filesToIgnore = []string{} 131 132 // directoriesToIgnore Directories to ignore 133 directoriesToIgnore = []string{} 134 135 // enforceCurrentYear Enforce that the current year is present in the copyright string (for modified files checks) 136 enforceCurrentYear bool 137 138 // currentYear Holds the current year string if we are enforcing that 139 currentYear string 140 141 // verbose If true enables verbose output 142 verbose = false 143 ) 144 145 func main() { 146 147 help := false 148 149 flag.BoolVar(&enforceCurrentYear, "enforce-current", false, "Enforce the current year is present") 150 flag.BoolVar(&verbose, "verbose", false, "Verbose output") 151 flag.BoolVar(&help, "help", false, "Display usage help") 152 flag.Parse() 153 154 if help { 155 printUsage() 156 os.Exit(0) 157 } 158 159 os.Exit(runScan(flag.Args())) 160 } 161 162 // runScan Execute the scan against the provided targets 163 func runScan(args []string) int { 164 165 if len(args) < 1 { 166 fmt.Printf("\nNo pathnames provided for scan, exiting.\n") 167 printUsage() 168 return 1 169 } 170 171 year, _, _ := time.Now().Date() 172 currentYear = strconv.Itoa(year) + ", " 173 174 if enforceCurrentYear { 175 fmt.Println("Enforcing current year in copyright string") 176 } 177 178 if err := loadIgnoreFile(); err != nil { 179 fmt.Printf("Error updating ingore files list: %v\n", err) 180 return 1 181 } 182 183 filesWithErrors = make(map[string][]string, 10) 184 185 // Arguments are a list of directories and/or files. Iterate through each one and 186 // - if it's a file,scan it 187 // - if it's a dir, walk it and scan it recursively 188 for _, arg := range args { 189 fmt.Printf("Scanning target %s\n", arg) 190 argInfo, err := os.Stat(arg) 191 if err != nil { 192 if os.IsNotExist(err) { 193 fmt.Printf("WARNING: %s does not exist, skipping\n", arg) 194 continue 195 } 196 fmt.Printf("Error getting file info for %s: %v", arg, err.Error()) 197 return 1 198 } 199 if argInfo.IsDir() { 200 err = filepath.Walk(arg, func(path string, info os.FileInfo, err error) error { 201 if err != nil { 202 return err 203 } 204 if info.IsDir() { 205 if skipOrIgnoreDir(info.Name(), path) { 206 if verbose { 207 fmt.Printf("Skipping directory %s and all its contents\n", path) 208 } 209 return filepath.SkipDir 210 } 211 return nil 212 } 213 err = checkFile(path, info) 214 if err != nil { 215 return err 216 } 217 return nil 218 }) 219 } else { 220 err = checkFile(arg, argInfo) 221 } 222 if err != nil { 223 fmt.Printf("Error processing %s: %v", arg, err.Error()) 224 return 1 225 } 226 } 227 printScanReport() 228 if len(filesWithErrors) > 0 { 229 return 1 230 } 231 return 0 232 } 233 234 // checkFile Scans the specified file if it does not match the ignore criteria 235 func checkFile(path string, info os.FileInfo) error { 236 // Ignore the file if 237 // - the extension matches one in the global set of ignored extensions 238 // - the name matches one in the global set of ignored relative file names 239 // - it is in the global ignores list read from disk 240 if skipFile(path, info) { 241 numFilesSkipped++ 242 if verbose { 243 fmt.Printf("Skipping file %s/n", path) 244 } 245 return nil 246 } 247 248 fileErrors, err := checkCopyrightAndLicense(path) 249 if err != nil { 250 return err 251 } 252 numFilesAnalyzed++ 253 if verbose { 254 fmt.Printf("Scanning %s\n", path) 255 } 256 if len(fileErrors) > 0 { 257 filesWithErrors[path] = fileErrors 258 } 259 return nil 260 } 261 262 // checkCopyrightAndLicense returns true if the file has a valid/correct copyright notice 263 func checkCopyrightAndLicense(path string) (fileErrors []string, err error) { 264 file, err := os.Open(path) 265 if err != nil { 266 return fileErrors, err 267 } 268 reader := bufio.NewScanner(file) 269 reader.Split(bufio.ScanLines) 270 defer file.Close() 271 272 foundCopyright := false 273 foundLicense := false 274 275 linesRead := 0 276 for reader.Scan() && linesRead < maxLines { 277 line := reader.Text() 278 if copyrightRegex.MatchString(line) { 279 foundCopyright = true 280 if enforceCurrentYear && !strings.Contains(line, currentYear) { 281 fileErrors = append(fileErrors, "Copyright does not contain current year") 282 } 283 } 284 if uplRegex.MatchString(line) { 285 foundLicense = true 286 } 287 if foundCopyright && foundLicense { 288 break 289 } 290 linesRead++ 291 } 292 if !foundCopyright { 293 fileErrors = append(fileErrors, "Copyright not found") 294 } 295 if !foundLicense { 296 fileErrors = append(fileErrors, "License not found") 297 } 298 return fileErrors, nil 299 } 300 301 // printScanReport Dump the scan to stdout 302 func printScanReport() { 303 fmt.Printf("\nResults of scan:\n\tFiles analyzed: %d\n\tFiles with error: %d\n\tFiles skipped: %d\n\tDirectories skipped: %d\n", 304 numFilesAnalyzed, len(filesWithErrors), numFilesSkipped, numDirectoriesSkipped) 305 306 if len(filesWithErrors) > 0 { 307 fmt.Printf("\nThe following files have errors:\n") 308 309 // Sort the keys so the files are grouped lexicographically in the output, 310 // instead of randomized by just walking the map 311 keys := make([]string, 0, len(filesWithErrors)) 312 for key := range filesWithErrors { 313 if len(key) > 0 { 314 keys = append(keys, key) 315 } 316 } 317 sort.Strings(keys) 318 319 for _, key := range keys { 320 errors := filesWithErrors[key] 321 buff := new(bytes.Buffer) 322 writer := csv.NewWriter(buff) 323 writer.Write(errors) 324 writer.Flush() 325 326 fmt.Printf("\tFile: %s, Errors: %s\n", key, buff.String()) 327 } 328 329 fmt.Println("\nExamples of valid comments:") 330 fmt.Println("With forward slash (Java-style):") 331 fmt.Println("// Copyright (c) 2021, Oracle and/or its affiliates.") 332 fmt.Println("// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.") 333 fmt.Println("With dash (For SQL files for example):") 334 fmt.Println("-- Copyright (c) 2021, Oracle and/or its affiliates.") 335 fmt.Println("-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.") 336 fmt.Println("XML comments:") 337 fmt.Println("<!-- Copyright (c) 2021, Oracle and/or its affiliates. -->") 338 fmt.Println("<!-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. -->") 339 fmt.Println("With #:") 340 fmt.Println("# Copyright (c) 2021, Oracle and/or its affiliates.") 341 fmt.Println("# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.") 342 } 343 } 344 345 // loadIgnoreFile Loads the set of user-specified ignore files/paths 346 func loadIgnoreFile() error { 347 ignoreFileName := os.Getenv("COPYRIGHT_INGOREFILE_PATH") 348 if len(ignoreFileName) == 0 { 349 ignoreFileName = ignoreFileDefaultName 350 } 351 352 ignoreFile, err := os.Open(ignoreFileName) 353 if err != nil { 354 return err 355 } 356 reader := bufio.NewScanner(ignoreFile) 357 reader.Split(bufio.ScanLines) 358 defer ignoreFile.Close() 359 360 // ignoreFileList Contents of ignore file 361 ignoreFileList := []string{} 362 363 for reader.Scan() { 364 line := strings.TrimSpace(reader.Text()) 365 // skip empty lines - otherwise the code below will end up skipping entire 366 if len(line) == 0 { 367 continue 368 } 369 // ignore lines starting with "#" 370 if strings.HasPrefix(line, "#") { 371 continue 372 } 373 ignoreFileList = append(ignoreFileList, line) 374 } 375 376 for _, ignoreLine := range ignoreFileList { 377 info, err := os.Stat(ignoreLine) 378 if err != nil { 379 continue 380 } 381 if info.IsDir() { 382 // if the path points to an existing directory, add it to directories to ignore 383 directoriesToIgnore = append(directoriesToIgnore, ignoreLine) 384 } else { 385 filesToIgnore = append(filesToIgnore, ignoreLine) 386 } 387 } 388 389 fmt.Printf("Files to ignore: %v\n", filesToIgnore) 390 fmt.Printf("Directories to ignore: %v\n", directoriesToIgnore) 391 fmt.Println() 392 return nil 393 } 394 395 // skipOrIgnoreDir Returns true if a directory matches the skip or ignore lists 396 func skipOrIgnoreDir(relativeName string, path string) bool { 397 if contains(directoriesToSkip, relativeName) || contains(directoriesToIgnore, path) { 398 numDirectoriesSkipped++ 399 return true 400 } 401 return false 402 } 403 404 // skipFile Returns true if the file should be ignored/skipped 405 func skipFile(pathToFile string, info os.FileInfo) bool { 406 return contains(filesToSkip, info.Name()) || 407 contains(extensionsToSkip, filepath.Ext(info.Name())) || 408 contains(filesToIgnore, pathToFile) || 409 isFileOnIgnoredPath(pathToFile) 410 } 411 412 // isFileOnIgnoredPath Returns true if the file is under one of the dirs specified in the ignore file 413 func isFileOnIgnoredPath(filepath string) bool { 414 for index := range directoriesToIgnore { 415 if strings.Contains(filepath, directoriesToIgnore[index]) { 416 return true 417 } 418 } 419 return false 420 } 421 422 // contains Search a list of strings for a value 423 func contains(strings []string, value string) bool { 424 for i := range strings { 425 if value == strings[i] { 426 return true 427 } 428 } 429 return false 430 } 431 432 // printUsage Prints the help for this program 433 func printUsage() { 434 usageString := ` 435 436 go run copyright.go [options] path1 [path2 path3 ...] 437 438 Options: 439 --enforce-current Enforce that files provided to the tool have the current year in the copyright 440 --verbose Verbose output 441 442 ` 443 fmt.Println(usageString) 444 }