github.com/verrazzano/verrazzano@v1.7.0/tools/copyright/copyright.go (about)

     1  // Copyright (c) 2021, 2022, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  package main
     4  
     5  import (
     6  	"bufio"
     7  	"bytes"
     8  	"encoding/csv"
     9  	"flag"
    10  	"fmt"
    11  	"os"
    12  	"path/filepath"
    13  	"regexp"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"time"
    18  )
    19  
    20  // This program will accept a list of files and directories and scan all of the files found therin to make sure that
    21  // they have the correct Oracle copyright header and UPL license headers.
    22  //
    23  // Internally, we manage a list of file extensions and relative file/directory names to ignore.  We also load a list
    24  // of ignore paths from the working directory of the program containing a list of paths relative to that working dir
    25  // to explicitly ignore.
    26  
    27  const (
    28  	// ignoreFileDefaultName is the name of the special file that contains a list of files to ignore
    29  	ignoreFileDefaultName = "ignore_copyright_check.txt"
    30  
    31  	// maxLines is the maximum number of lines to read in a file before giving up
    32  	maxLines = 5
    33  )
    34  
    35  var (
    36  	// filesToSkip is a list of well-known filenames to skip while scanning, relative to the directory being scanned
    37  	filesToSkip = []string{
    38  		".gitlab-ci.yml",
    39  		"go.mod",
    40  		"go.sum",
    41  		"LICENSE",
    42  		"LICENSE.txt",
    43  		"THIRD_PARTY_LICENSES.txt",
    44  		"coverage.html",
    45  		"clair-scanner",
    46  		".DS_Store",
    47  	}
    48  
    49  	// directoriesToShip is a list of well-known (sub)directories to skip while scanning, relative to the working
    50  	// directory being scanned
    51  	directoriesToSkip = []string{
    52  		".git",
    53  		"out",
    54  		"bin",
    55  		".settings",
    56  		"thirdparty_licenses",
    57  		"vendor",
    58  		"_output",
    59  		"_gen", "target",
    60  		"node_modules",
    61  	}
    62  
    63  	// extensionsToSkip is a list of well-known file extensions that we will skip while scanning, including
    64  	// binary files and file types that do not support comments (like json)
    65  	extensionsToSkip = []string{
    66  		".json",
    67  		".png",
    68  		".csv",
    69  		".ico",
    70  		".md",
    71  		".jpeg",
    72  		".jpg",
    73  		".log",
    74  		"-test-result.xml",
    75  		".woff",
    76  		".woff2",
    77  		".ttf",
    78  		".min.js",
    79  		".min.css",
    80  		".map",
    81  		".cov",
    82  		".iml",
    83  		".jar",
    84  		".zip",
    85  		".gz",
    86  		".test",
    87  	}
    88  
    89  	// copyrightRegex is the regular expression for recognizing correctly formatted copyright statements
    90  	// Explanation of the regular expression
    91  	// -------------------------------------
    92  	// ^                           matches start of the line
    93  	// (#|\/\/|<!--|\/\*)          matches either a # character, or two / characters or the literal string "<!--", or "/*"
    94  	// Copyright                   matches the literal string " Copyright "
    95  	// \([cC]\)                    matches "(c)" or "(C)"
    96  	// ([1-2][0-9][0-9][0-9], )    matches a year in the range 1000-2999 followed by a comma and a space
    97  	// ?([1-2][0-9][0-9][0-9], )   matches an OPTIONAL second year in the range 1000-2999 followed by a comma and a space
    98  	// Oracle ... affiliates       matches that literal string
    99  	// (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */"
   100  	// $                           matches the end of the line
   101  	// the correct copyright line looks like this:
   102  	// Copyright (c) 2020, Oracle and/or its affiliates.
   103  	copyrightRegex = regexp.MustCompile(`^(#|\/\/|<!--|\/\*|<%--) Copyright \([cC]\) ([1-2][0-9][0-9][0-9], )?([1-2][0-9][0-9][0-9], )Oracle and\/or its affiliates(\.|\. -->|\. \*\/|\. --%>)$`)
   104  
   105  	// uplRegex is the regular express for recognizing correctly formatted UPL license headers
   106  	// Explanation of the regular expression
   107  	// -------------------------------------
   108  	// ^                           matches start of the line
   109  	// (#|\/\/|<!--|\/\*|<%--)     matches either a # character, or two / characters or the literal string "<!--", "/*" or "<%--"
   110  	// Licensed ... licenses\\/upl matches that literal string
   111  	// (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */" or ". --%>"
   112  	// $                           matches the end of the line
   113  	// the correct copyright line looks like this:
   114  	// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
   115  	uplRegex = regexp.MustCompile(`^(#|\/\/|<!--|\/\*|<%--) Licensed under the Universal Permissive License v 1\.0 as shown at https:\/\/oss\.oracle\.com\/licenses\/upl(\.|\. -->|\. \*\/|\. --%>)$`)
   116  
   117  	// filesWithErrors Map to track files that failed the check with their error messages
   118  	filesWithErrors map[string][]string
   119  
   120  	// numFilesAnalyzed Total number of files analyzed
   121  	numFilesAnalyzed uint
   122  
   123  	// numFilesSkipped Total number of files skipped
   124  	numFilesSkipped uint
   125  
   126  	// numDirectoriesSkipped Total number of directories skipped
   127  	numDirectoriesSkipped uint
   128  
   129  	// filesToIgnore Files to ignore
   130  	filesToIgnore = []string{}
   131  
   132  	// directoriesToIgnore Directories to ignore
   133  	directoriesToIgnore = []string{}
   134  
   135  	// enforceCurrentYear Enforce that the current year is present in the copyright string (for modified files checks)
   136  	enforceCurrentYear bool
   137  
   138  	// currentYear Holds the current year string if we are enforcing that
   139  	currentYear string
   140  
   141  	// verbose If true enables verbose output
   142  	verbose = false
   143  )
   144  
   145  func main() {
   146  
   147  	help := false
   148  
   149  	flag.BoolVar(&enforceCurrentYear, "enforce-current", false, "Enforce the current year is present")
   150  	flag.BoolVar(&verbose, "verbose", false, "Verbose output")
   151  	flag.BoolVar(&help, "help", false, "Display usage help")
   152  	flag.Parse()
   153  
   154  	if help {
   155  		printUsage()
   156  		os.Exit(0)
   157  	}
   158  
   159  	os.Exit(runScan(flag.Args()))
   160  }
   161  
   162  // runScan Execute the scan against the provided targets
   163  func runScan(args []string) int {
   164  
   165  	if len(args) < 1 {
   166  		fmt.Printf("\nNo pathnames provided for scan, exiting.\n")
   167  		printUsage()
   168  		return 1
   169  	}
   170  
   171  	year, _, _ := time.Now().Date()
   172  	currentYear = strconv.Itoa(year) + ", "
   173  
   174  	if enforceCurrentYear {
   175  		fmt.Println("Enforcing current year in copyright string")
   176  	}
   177  
   178  	if err := loadIgnoreFile(); err != nil {
   179  		fmt.Printf("Error updating ingore files list: %v\n", err)
   180  		return 1
   181  	}
   182  
   183  	filesWithErrors = make(map[string][]string, 10)
   184  
   185  	// Arguments are a list of directories and/or files.  Iterate through each one and
   186  	// - if it's a file,scan it
   187  	// - if it's a dir, walk it and scan it recursively
   188  	for _, arg := range args {
   189  		fmt.Printf("Scanning target %s\n", arg)
   190  		argInfo, err := os.Stat(arg)
   191  		if err != nil {
   192  			if os.IsNotExist(err) {
   193  				fmt.Printf("WARNING: %s does not exist, skipping\n", arg)
   194  				continue
   195  			}
   196  			fmt.Printf("Error getting file info for %s: %v", arg, err.Error())
   197  			return 1
   198  		}
   199  		if argInfo.IsDir() {
   200  			err = filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
   201  				if err != nil {
   202  					return err
   203  				}
   204  				if info.IsDir() {
   205  					if skipOrIgnoreDir(info.Name(), path) {
   206  						if verbose {
   207  							fmt.Printf("Skipping directory %s and all its contents\n", path)
   208  						}
   209  						return filepath.SkipDir
   210  					}
   211  					return nil
   212  				}
   213  				err = checkFile(path, info)
   214  				if err != nil {
   215  					return err
   216  				}
   217  				return nil
   218  			})
   219  		} else {
   220  			err = checkFile(arg, argInfo)
   221  		}
   222  		if err != nil {
   223  			fmt.Printf("Error processing %s: %v", arg, err.Error())
   224  			return 1
   225  		}
   226  	}
   227  	printScanReport()
   228  	if len(filesWithErrors) > 0 {
   229  		return 1
   230  	}
   231  	return 0
   232  }
   233  
   234  // checkFile Scans the specified file if it does not match the ignore criteria
   235  func checkFile(path string, info os.FileInfo) error {
   236  	// Ignore the file if
   237  	// - the extension matches one in the global set of ignored extensions
   238  	// - the name matches one in the global set of ignored relative file names
   239  	// - it is in the global ignores list read from disk
   240  	if skipFile(path, info) {
   241  		numFilesSkipped++
   242  		if verbose {
   243  			fmt.Printf("Skipping file %s/n", path)
   244  		}
   245  		return nil
   246  	}
   247  
   248  	fileErrors, err := checkCopyrightAndLicense(path)
   249  	if err != nil {
   250  		return err
   251  	}
   252  	numFilesAnalyzed++
   253  	if verbose {
   254  		fmt.Printf("Scanning %s\n", path)
   255  	}
   256  	if len(fileErrors) > 0 {
   257  		filesWithErrors[path] = fileErrors
   258  	}
   259  	return nil
   260  }
   261  
   262  // checkCopyrightAndLicense returns true if the file has a valid/correct copyright notice
   263  func checkCopyrightAndLicense(path string) (fileErrors []string, err error) {
   264  	file, err := os.Open(path)
   265  	if err != nil {
   266  		return fileErrors, err
   267  	}
   268  	reader := bufio.NewScanner(file)
   269  	reader.Split(bufio.ScanLines)
   270  	defer file.Close()
   271  
   272  	foundCopyright := false
   273  	foundLicense := false
   274  
   275  	linesRead := 0
   276  	for reader.Scan() && linesRead < maxLines {
   277  		line := reader.Text()
   278  		if copyrightRegex.MatchString(line) {
   279  			foundCopyright = true
   280  			if enforceCurrentYear && !strings.Contains(line, currentYear) {
   281  				fileErrors = append(fileErrors, "Copyright does not contain current year")
   282  			}
   283  		}
   284  		if uplRegex.MatchString(line) {
   285  			foundLicense = true
   286  		}
   287  		if foundCopyright && foundLicense {
   288  			break
   289  		}
   290  		linesRead++
   291  	}
   292  	if !foundCopyright {
   293  		fileErrors = append(fileErrors, "Copyright not found")
   294  	}
   295  	if !foundLicense {
   296  		fileErrors = append(fileErrors, "License not found")
   297  	}
   298  	return fileErrors, nil
   299  }
   300  
   301  // printScanReport Dump the scan to stdout
   302  func printScanReport() {
   303  	fmt.Printf("\nResults of scan:\n\tFiles analyzed: %d\n\tFiles with error: %d\n\tFiles skipped: %d\n\tDirectories skipped: %d\n",
   304  		numFilesAnalyzed, len(filesWithErrors), numFilesSkipped, numDirectoriesSkipped)
   305  
   306  	if len(filesWithErrors) > 0 {
   307  		fmt.Printf("\nThe following files have errors:\n")
   308  
   309  		// Sort the keys so the files are grouped lexicographically in the output,
   310  		// instead of randomized by just walking the map
   311  		keys := make([]string, 0, len(filesWithErrors))
   312  		for key := range filesWithErrors {
   313  			if len(key) > 0 {
   314  				keys = append(keys, key)
   315  			}
   316  		}
   317  		sort.Strings(keys)
   318  
   319  		for _, key := range keys {
   320  			errors := filesWithErrors[key]
   321  			buff := new(bytes.Buffer)
   322  			writer := csv.NewWriter(buff)
   323  			writer.Write(errors)
   324  			writer.Flush()
   325  
   326  			fmt.Printf("\tFile: %s, Errors: %s\n", key, buff.String())
   327  		}
   328  
   329  		fmt.Println("\nExamples of valid comments:")
   330  		fmt.Println("With forward slash (Java-style):")
   331  		fmt.Println("// Copyright (c) 2021, Oracle and/or its affiliates.")
   332  		fmt.Println("// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.")
   333  		fmt.Println("With dash (For SQL files for example):")
   334  		fmt.Println("-- Copyright (c) 2021, Oracle and/or its affiliates.")
   335  		fmt.Println("-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.")
   336  		fmt.Println("XML comments:")
   337  		fmt.Println("<!-- Copyright (c) 2021, Oracle and/or its affiliates. -->")
   338  		fmt.Println("<!-- Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. -->")
   339  		fmt.Println("With #:")
   340  		fmt.Println("# Copyright (c) 2021, Oracle and/or its affiliates.")
   341  		fmt.Println("# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.")
   342  	}
   343  }
   344  
   345  // loadIgnoreFile Loads the set of user-specified ignore files/paths
   346  func loadIgnoreFile() error {
   347  	ignoreFileName := os.Getenv("COPYRIGHT_INGOREFILE_PATH")
   348  	if len(ignoreFileName) == 0 {
   349  		ignoreFileName = ignoreFileDefaultName
   350  	}
   351  
   352  	ignoreFile, err := os.Open(ignoreFileName)
   353  	if err != nil {
   354  		return err
   355  	}
   356  	reader := bufio.NewScanner(ignoreFile)
   357  	reader.Split(bufio.ScanLines)
   358  	defer ignoreFile.Close()
   359  
   360  	// ignoreFileList Contents of ignore file
   361  	ignoreFileList := []string{}
   362  
   363  	for reader.Scan() {
   364  		line := strings.TrimSpace(reader.Text())
   365  		// skip empty lines - otherwise the code below will end up skipping entire
   366  		if len(line) == 0 {
   367  			continue
   368  		}
   369  		// ignore lines starting with "#"
   370  		if strings.HasPrefix(line, "#") {
   371  			continue
   372  		}
   373  		ignoreFileList = append(ignoreFileList, line)
   374  	}
   375  
   376  	for _, ignoreLine := range ignoreFileList {
   377  		info, err := os.Stat(ignoreLine)
   378  		if err != nil {
   379  			continue
   380  		}
   381  		if info.IsDir() {
   382  			// if the path points to an existing directory, add it to directories to ignore
   383  			directoriesToIgnore = append(directoriesToIgnore, ignoreLine)
   384  		} else {
   385  			filesToIgnore = append(filesToIgnore, ignoreLine)
   386  		}
   387  	}
   388  
   389  	fmt.Printf("Files to ignore: %v\n", filesToIgnore)
   390  	fmt.Printf("Directories to ignore: %v\n", directoriesToIgnore)
   391  	fmt.Println()
   392  	return nil
   393  }
   394  
   395  // skipOrIgnoreDir Returns true if a directory matches the skip or ignore lists
   396  func skipOrIgnoreDir(relativeName string, path string) bool {
   397  	if contains(directoriesToSkip, relativeName) || contains(directoriesToIgnore, path) {
   398  		numDirectoriesSkipped++
   399  		return true
   400  	}
   401  	return false
   402  }
   403  
   404  // skipFile Returns true if the file should be ignored/skipped
   405  func skipFile(pathToFile string, info os.FileInfo) bool {
   406  	return contains(filesToSkip, info.Name()) ||
   407  		contains(extensionsToSkip, filepath.Ext(info.Name())) ||
   408  		contains(filesToIgnore, pathToFile) ||
   409  		isFileOnIgnoredPath(pathToFile)
   410  }
   411  
   412  // isFileOnIgnoredPath Returns true if the file is under one of the dirs specified in the ignore file
   413  func isFileOnIgnoredPath(filepath string) bool {
   414  	for index := range directoriesToIgnore {
   415  		if strings.Contains(filepath, directoriesToIgnore[index]) {
   416  			return true
   417  		}
   418  	}
   419  	return false
   420  }
   421  
   422  // contains Search a list of strings for a value
   423  func contains(strings []string, value string) bool {
   424  	for i := range strings {
   425  		if value == strings[i] {
   426  			return true
   427  		}
   428  	}
   429  	return false
   430  }
   431  
   432  // printUsage Prints the help for this program
   433  func printUsage() {
   434  	usageString := `
   435  
   436  go run copyright.go [options] path1 [path2 path3 ...]
   437  
   438  Options:
   439  	--enforce-current   Enforce that files provided to the tool have the current year in the copyright
   440  	--verbose           Verbose output
   441  
   442  `
   443  	fmt.Println(usageString)
   444  }