github.com/verrazzano/verrazzano@v1.7.0/tools/fix-copyright/copyright.go (about)

     1  // Copyright (c) 2021, 2022, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  package main
     4  
     5  import (
     6  	"bufio"
     7  	"bytes"
     8  	"flag"
     9  	"fmt"
    10  	"io"
    11  	"log"
    12  	"os"
    13  	"os/exec"
    14  	"path/filepath"
    15  	"regexp"
    16  	"strconv"
    17  	"strings"
    18  	"text/template"
    19  	"time"
    20  )
    21  
    22  const (
    23  	copyrightTemplate = `{{- $createdYear:=.CreatedYear -}}{{- $updatedYear:=.UpdatedYear -}}{{ .Comment }} Copyright (c) {{if ne $createdYear $updatedYear }}{{printf "%s" $createdYear}}, {{end}}{{printf "%s" $updatedYear}}, Oracle and/or its affiliates.
    24  {{ .Comment}} Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
    25  `
    26  )
    27  
    28  type pattern []*regexp.Regexp
    29  
    30  func (p *pattern) String() string {
    31  	return fmt.Sprint(*p)
    32  }
    33  
    34  func (p *pattern) Set(value string) error {
    35  	for _, val := range strings.Split(value, ",") {
    36  		re := regexp.MustCompile(val)
    37  		*p = append(*p, re)
    38  	}
    39  	return nil
    40  }
    41  
    42  // This program will accept a list of files and directories and scan all of the files found therin to make sure that
    43  // they have the correct Oracle copyright header and UPL license headers.
    44  //
    45  // Internally, we manage a list of file extensions and relative file/directory names to ignore.  We also load a list
    46  // of ignore paths from the working directory of the program containing a list of paths relative to that working dir
    47  // to explicitly ignore.
    48  
    49  var (
    50  	// copyrightRegex is the regular expression for recognizing correctly formatted copyright statements
    51  	// Explanation of the regular expression
    52  	// -------------------------------------
    53  	// ^                           matches start of the line
    54  	// (#|\/\/|<!--|\/\*)          matches either a # character, or two / characters or the literal string "<!--", or "/*"
    55  	// Copyright                   matches the literal string " Copyright "
    56  	// \([cC]\)                    matches "(c)" or "(C)"
    57  	// ([1-2][0-9][0-9][0-9], )    matches a year in the range 1000-2999 followed by a comma and a space
    58  	// ?([1-2][0-9][0-9][0-9], )   matches an OPTIONAL second year in the range 1000-2999 followed by a comma and a space
    59  	// Oracle ... affiliates       matches that literal string
    60  	// (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */"
    61  	// $                           matches the end of the line
    62  	// the correct copyright line looks like this:
    63  	// Copyright (c) 2020, Oracle and/or its affiliates.
    64  	copyrightPattern = `^(#|\/\/|<!--|\/\*|<%--) Copyright \([cC]\) ((?P<CreatedYear>[1-2][0-9][0-9][0-9]), )((?P<UpdatedYear>[1-2][0-9][0-9][0-9]), )?Oracle and\/or its affiliates(\.|\. -->|\. \*\/|\. --%>)$`
    65  	_                = regexp.MustCompile(copyrightPattern)
    66  
    67  	// uplRegex is the regular express for recognizing correctly formatted UPL license headers
    68  	// Explanation of the regular expression
    69  	// -------------------------------------
    70  	// ^                           matches start of the line
    71  	// (#|\/\/|<!--|\/\*|<%--)     matches either a # character, or two / characters or the literal string "<!--", "/*" or "<%--"
    72  	// Licensed ... licenses\\/upl matches that literal string
    73  	// (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */" or ". --%>"
    74  	// $                           matches the end of the line
    75  	// the correct copyright line looks like this:
    76  	// Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
    77  	uplPattern = `^(#|\/\/|<!--|\/\*|<%--) Licensed under the Universal Permissive License v 1\.0 as shown at https:\/\/oss\.oracle\.com\/licenses\/upl(\.|\. -->|\. \*\/|\. --%>)$`
    78  	_          = regexp.MustCompile(uplPattern)
    79  
    80  	copyrightUplPattern = "(?m)" + copyrightPattern + "\n" + uplPattern + "\n"
    81  	copyrightUplRegex   = regexp.MustCompile(copyrightUplPattern)
    82  
    83  	verbose = false
    84  
    85  	excludePatterns  pattern = []*regexp.Regexp{}
    86  	includePatterns  pattern = []*regexp.Regexp{}
    87  	extensionFlagVal string
    88  
    89  	// useExistingUpdateYearFromHeader - use the update date from the existing header
    90  	useExistingUpdateYearFromHeader *bool
    91  )
    92  
    93  func shouldFilter(path string) bool {
    94  	if len(includePatterns) > 0 {
    95  		var shouldInclude = false
    96  		for _, re := range includePatterns {
    97  			if re.MatchString(path) {
    98  				shouldInclude = true
    99  				break
   100  			}
   101  		}
   102  		if !shouldInclude {
   103  			log.Printf("Skipping %s as it does not match include patterns %v\n", path, includePatterns)
   104  			return true
   105  		}
   106  	}
   107  	if len(excludePatterns) > 0 {
   108  		var shouldInclude = true
   109  		for _, re := range excludePatterns {
   110  			if re.MatchString(path) {
   111  				shouldInclude = false
   112  				break
   113  			}
   114  		}
   115  		if !shouldInclude {
   116  			log.Printf("Skipping %s as it matches exclude patterns %v\n", path, includePatterns)
   117  			return true
   118  		}
   119  	}
   120  	return false
   121  }
   122  
   123  type GitFileStatus int
   124  
   125  const (
   126  	Unmodified GitFileStatus = iota
   127  	Modified
   128  	Added
   129  	Deleted
   130  	Copied
   131  	Unmerged
   132  	Untracked
   133  	Ignored
   134  )
   135  
   136  func (s GitFileStatus) String() string {
   137  	return [...]string{"unmodified", "modified", "added", "deleted", "renamed", "copied", "unmerged", "untracked", "ignored"}[s]
   138  }
   139  
   140  func ParseGitFileStatus(s string) (GitFileStatus, error) {
   141  	switch s {
   142  	default:
   143  		return 0, fmt.Errorf("Unknown git file status %s", s)
   144  	case " ":
   145  		return Unmodified, nil
   146  	case "M":
   147  		return Modified, nil
   148  	case "A":
   149  		return Added, nil
   150  	case "D":
   151  		return Deleted, nil
   152  	case "C":
   153  		return Copied, nil
   154  	case "U":
   155  		return Unmerged, nil
   156  	case "?":
   157  		return Untracked, nil
   158  	case "!":
   159  		return Ignored, nil
   160  
   161  	}
   162  }
   163  
   164  type GitStatus struct {
   165  	IndexStatus    GitFileStatus
   166  	WorkTreeStatus GitFileStatus
   167  }
   168  
   169  func ParseGitStatus(s string) (*GitStatus, error) {
   170  	if strings.TrimSpace(s) == "" {
   171  		return &GitStatus{
   172  			IndexStatus:    Unmodified,
   173  			WorkTreeStatus: Unmodified,
   174  		}, nil
   175  	}
   176  	x, err := ParseGitFileStatus(string(s[0]))
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	y, err := ParseGitFileStatus(string(s[1]))
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  
   185  	return &GitStatus{
   186  		IndexStatus:    x,
   187  		WorkTreeStatus: y,
   188  	}, nil
   189  }
   190  
   191  type GitFileInfo struct {
   192  	FileName    string
   193  	CreatedYear string
   194  	UpdatedYear string
   195  	GitStatus   *GitStatus
   196  }
   197  
   198  type TemplateParams struct {
   199  	Comment     string
   200  	CreatedYear string
   201  	UpdatedYear string
   202  }
   203  
   204  func gitFileInfo(path string) (*GitFileInfo, error) {
   205  	currentYear := strconv.Itoa(time.Now().Year())
   206  
   207  	out, err := exec.Command("git", "status", "--porcelain", "--", path).Output()
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	log.Printf("git status %s: %v", path, string(out))
   212  	gitStatus, err := ParseGitStatus(string(out))
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  
   217  	fi := GitFileInfo{
   218  		FileName:    path,
   219  		CreatedYear: currentYear,
   220  		UpdatedYear: currentYear,
   221  		GitStatus:   gitStatus,
   222  	}
   223  
   224  	// if file is untracked or added, use current year only
   225  	if gitStatus.WorkTreeStatus == Untracked || gitStatus.WorkTreeStatus == Added {
   226  		return &fi, nil
   227  	}
   228  
   229  	out, err = exec.Command("git", "log", "--format=%at", "--follow", "--", path).Output()
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  	log.Printf("git log --format=%%at --follow -- %s\n%s", path, string(out))
   234  
   235  	scanner := bufio.NewScanner(strings.NewReader(string(out)))
   236  	var first, last string
   237  	for scanner.Scan() {
   238  		if first == "" {
   239  			first = scanner.Text()
   240  			last = first
   241  		} else {
   242  			last = scanner.Text()
   243  		}
   244  	}
   245  	log.Printf("git log %s: first date=%s : last date=%s\n", path, first, last)
   246  	ilast, err := strconv.ParseInt(last, 10, 64)
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  	createdYear := strconv.Itoa(time.Unix(ilast, 0).UTC().Year())
   251  
   252  	updatedYear := currentYear
   253  	if gitStatus.WorkTreeStatus != Modified {
   254  		ifirst, err := strconv.ParseInt(first, 10, 64)
   255  		if err != nil {
   256  			return nil, err
   257  		}
   258  		updatedYear = strconv.Itoa(time.Unix(ifirst, 0).UTC().Year())
   259  	}
   260  
   261  	log.Printf("CreatedYear %s\n", createdYear)
   262  	log.Printf("UpdatedYear %s\n", updatedYear)
   263  	return &GitFileInfo{
   264  		FileName:    path,
   265  		CreatedYear: createdYear,
   266  		UpdatedYear: updatedYear,
   267  		GitStatus:   gitStatus,
   268  	}, nil
   269  }
   270  
   271  func renderTemplate(t *template.Template, params TemplateParams) ([]byte, error) {
   272  	var header bytes.Buffer
   273  	err := t.Execute(&header, params)
   274  	if err != nil {
   275  		return nil, err
   276  	}
   277  	log.Printf("rendered header: %s\n", header.String())
   278  	return header.Bytes(), nil
   279  }
   280  
   281  func parseYearsFromHeader(fileContents []byte) ([]byte, string, string) {
   282  	lengthToSearch := 1024
   283  	if len(fileContents) < 1024 {
   284  		lengthToSearch = len(fileContents)
   285  	}
   286  	firstBytes := fileContents[:lengthToSearch]
   287  	log.Printf("firstbytes: %s", string(firstBytes))
   288  
   289  	createdYear := ""
   290  	updatedYear := ""
   291  	if copyrightUplRegex.Match(firstBytes) {
   292  		log.Printf("matched copyrightUplRegex")
   293  		match := copyrightUplRegex.FindSubmatch(firstBytes)
   294  
   295  		paramsMap := make(map[string]string)
   296  		for i, name := range copyrightUplRegex.SubexpNames() {
   297  			if i > 0 && i <= len(match) {
   298  				paramsMap[name] = string(match[i])
   299  			}
   300  		}
   301  		log.Printf("extracted regex params from parsed header: %q", paramsMap)
   302  		createdYear = paramsMap["CreatedYear"]
   303  		updatedYear = paramsMap["UpdatedYear"]
   304  	}
   305  	return firstBytes, createdYear, updatedYear
   306  }
   307  
   308  func fixHeaders(args []string) error {
   309  
   310  	var err error
   311  	out, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
   312  	if err != nil {
   313  		return err
   314  	}
   315  	repoRoot := strings.TrimSpace(string(out))
   316  	for _, arg := range args {
   317  		err = filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
   318  			if err != nil {
   319  				log.Printf("WARNING: failure accessing a path %q: %v\n", path, err)
   320  				return err
   321  			}
   322  			if info.IsDir() {
   323  				return nil
   324  			}
   325  			if shouldFilter(path) {
   326  				return nil
   327  			}
   328  			extension := extensionFlagVal
   329  			if extensionFlagVal == "" {
   330  				extension = strings.ToLower(filepath.Ext(path))
   331  				if extension == "" {
   332  					extension = path
   333  				}
   334  			}
   335  			var comment string
   336  			switch extension {
   337  			default:
   338  				log.Printf("Unknown extension %s\n", extension)
   339  				return nil
   340  			case ".go":
   341  				comment = "//"
   342  			case ".yaml", ".yml":
   343  				comment = "#"
   344  			}
   345  			gfi, err := gitFileInfo(path)
   346  			if err != nil {
   347  				log.Printf("Error getting git file info for path %s: %v", path, err)
   348  				return err
   349  			}
   350  			log.Printf("Git file info: %v\n", gfi)
   351  
   352  			t, err := template.New("").Parse(copyrightTemplate)
   353  			if err != nil {
   354  				return err
   355  			}
   356  
   357  			params := TemplateParams{
   358  				Comment:     comment,
   359  				CreatedYear: gfi.CreatedYear,
   360  				UpdatedYear: gfi.UpdatedYear,
   361  			}
   362  
   363  			fileContents, err := os.ReadFile(path)
   364  			if err != nil {
   365  				return err
   366  			}
   367  			var replacement []byte
   368  			// if file already contains header, use the created year from that copyright header
   369  			firstBytes, createdYearFromHeader, updatedYearFromHeader := parseYearsFromHeader(fileContents)
   370  			modifyExistingHeader := true
   371  			if createdYearFromHeader == "" {
   372  				modifyExistingHeader = false
   373  				// No header matches in file
   374  				if gfi.GitStatus.WorkTreeStatus == Modified || gfi.GitStatus.IndexStatus == Modified {
   375  					log.Printf("No copyright header in file but modified, checking version-controlled file for header for %s", path)
   376  					// Check HEAD revision to see if the header matches there in modified files
   377  					gitPath, err := filepath.Rel(repoRoot, path)
   378  					if err != nil {
   379  						return err
   380  					}
   381  					getGitHead := fmt.Sprintf("HEAD:%s", gitPath)
   382  					cmd := exec.Command("git", "show", getGitHead)
   383  					out, err := cmd.Output()
   384  					if err != nil {
   385  						return err
   386  					}
   387  					_, createdYearFromHeader, updatedYearFromHeader = parseYearsFromHeader(out)
   388  				}
   389  			}
   390  
   391  			// Always trust the created year in the file header
   392  			if createdYearFromHeader != "" {
   393  				log.Printf("Using created year in copyright header %s, created year derived from Git is %s\n", createdYearFromHeader, gfi.CreatedYear)
   394  				params.CreatedYear = createdYearFromHeader
   395  			}
   396  
   397  			// Determine if updated year from header is to be trusted over the year derived from git log history.
   398  			if *useExistingUpdateYearFromHeader {
   399  				log.Printf("Using updated year from existing header, UpdatedYear = %s", updatedYearFromHeader)
   400  				params.UpdatedYear = createdYearFromHeader
   401  				if updatedYearFromHeader != "" {
   402  					params.UpdatedYear = updatedYearFromHeader
   403  				}
   404  			}
   405  
   406  			header, err := renderTemplate(t, params)
   407  			if err != nil {
   408  				return err
   409  			}
   410  
   411  			if modifyExistingHeader {
   412  				replacementHeader := copyrightUplRegex.ReplaceAll(firstBytes, header)
   413  				if !bytes.Equal(firstBytes, replacementHeader) {
   414  					replacement = append(replacementHeader, fileContents[len(firstBytes):]...)
   415  				}
   416  			} else {
   417  				replacement = append(header, fileContents...)
   418  			}
   419  
   420  			if !bytes.Equal(replacement, []byte{}) {
   421  				st, err := os.Stat(path)
   422  				if err != nil {
   423  					return err
   424  				}
   425  				err = os.WriteFile(path, replacement, st.Mode())
   426  				if err != nil {
   427  					return err
   428  				}
   429  			}
   430  
   431  			return nil
   432  		})
   433  		if err != nil {
   434  			log.Printf("error walking the path %q: %v\n", arg, err)
   435  			return err
   436  		}
   437  	}
   438  	return nil
   439  }
   440  
   441  // printUsage Prints the help for this program
   442  func printUsage() {
   443  	usageString := `
   444  Usage: %s [options] path1 [path2 path3 ...]
   445  Options:
   446  `
   447  	fmt.Printf(usageString, os.Args[0])
   448  	flag.PrintDefaults()
   449  }
   450  
   451  func init() {
   452  	flag.Var(&includePatterns, "include", "comma separated include regexp file filters")
   453  	flag.Var(&excludePatterns, "exclude", "comma separated exclude regexp file filter")
   454  	useExistingUpdateYearFromHeader = flag.Bool("useExistingUpdateYearFromHeader", false, "use years from existing headers in SCM if they exist")
   455  }
   456  
   457  func main() {
   458  
   459  	help := false
   460  	flag.StringVar(&extensionFlagVal, "extension", "", "Filename extension to force")
   461  	flag.BoolVar(&verbose, "verbose", false, "Verbose output")
   462  	flag.BoolVar(&help, "help", false, "Display usage help")
   463  	flag.Usage = printUsage
   464  	flag.Parse()
   465  
   466  	if !verbose {
   467  		log.SetOutput(io.Discard)
   468  	}
   469  
   470  	if help {
   471  		flag.Usage()
   472  		os.Exit(0)
   473  	}
   474  
   475  	if flag.NArg() == 0 {
   476  		flag.Usage()
   477  		os.Exit(1)
   478  	}
   479  
   480  	err := fixHeaders(flag.Args())
   481  	if err != nil {
   482  		os.Exit(1)
   483  	}
   484  	os.Exit(0)
   485  }