github.com/verrazzano/verrazzano@v1.7.0/tools/fix-copyright/copyright.go (about) 1 // Copyright (c) 2021, 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 package main 4 5 import ( 6 "bufio" 7 "bytes" 8 "flag" 9 "fmt" 10 "io" 11 "log" 12 "os" 13 "os/exec" 14 "path/filepath" 15 "regexp" 16 "strconv" 17 "strings" 18 "text/template" 19 "time" 20 ) 21 22 const ( 23 copyrightTemplate = `{{- $createdYear:=.CreatedYear -}}{{- $updatedYear:=.UpdatedYear -}}{{ .Comment }} Copyright (c) {{if ne $createdYear $updatedYear }}{{printf "%s" $createdYear}}, {{end}}{{printf "%s" $updatedYear}}, Oracle and/or its affiliates. 24 {{ .Comment}} Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 25 ` 26 ) 27 28 type pattern []*regexp.Regexp 29 30 func (p *pattern) String() string { 31 return fmt.Sprint(*p) 32 } 33 34 func (p *pattern) Set(value string) error { 35 for _, val := range strings.Split(value, ",") { 36 re := regexp.MustCompile(val) 37 *p = append(*p, re) 38 } 39 return nil 40 } 41 42 // This program will accept a list of files and directories and scan all of the files found therin to make sure that 43 // they have the correct Oracle copyright header and UPL license headers. 44 // 45 // Internally, we manage a list of file extensions and relative file/directory names to ignore. We also load a list 46 // of ignore paths from the working directory of the program containing a list of paths relative to that working dir 47 // to explicitly ignore. 48 49 var ( 50 // copyrightRegex is the regular expression for recognizing correctly formatted copyright statements 51 // Explanation of the regular expression 52 // ------------------------------------- 53 // ^ matches start of the line 54 // (#|\/\/|<!--|\/\*) matches either a # character, or two / characters or the literal string "<!--", or "/*" 55 // Copyright matches the literal string " Copyright " 56 // \([cC]\) matches "(c)" or "(C)" 57 // ([1-2][0-9][0-9][0-9], ) matches a year in the range 1000-2999 followed by a comma and a space 58 // ?([1-2][0-9][0-9][0-9], ) matches an OPTIONAL second year in the range 1000-2999 followed by a comma and a space 59 // Oracle ... affiliates matches that literal string 60 // (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */" 61 // $ matches the end of the line 62 // the correct copyright line looks like this: 63 // Copyright (c) 2020, Oracle and/or its affiliates. 64 copyrightPattern = `^(#|\/\/|<!--|\/\*|<%--) Copyright \([cC]\) ((?P<CreatedYear>[1-2][0-9][0-9][0-9]), )((?P<UpdatedYear>[1-2][0-9][0-9][0-9]), )?Oracle and\/or its affiliates(\.|\. -->|\. \*\/|\. --%>)$` 65 _ = regexp.MustCompile(copyrightPattern) 66 67 // uplRegex is the regular express for recognizing correctly formatted UPL license headers 68 // Explanation of the regular expression 69 // ------------------------------------- 70 // ^ matches start of the line 71 // (#|\/\/|<!--|\/\*|<%--) matches either a # character, or two / characters or the literal string "<!--", "/*" or "<%--" 72 // Licensed ... licenses\\/upl matches that literal string 73 // (\.|\. -->|\. \*\/|\. --%>) matches "." or ". -->" or ". */" or ". --%>" 74 // $ matches the end of the line 75 // the correct copyright line looks like this: 76 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 77 uplPattern = `^(#|\/\/|<!--|\/\*|<%--) Licensed under the Universal Permissive License v 1\.0 as shown at https:\/\/oss\.oracle\.com\/licenses\/upl(\.|\. -->|\. \*\/|\. --%>)$` 78 _ = regexp.MustCompile(uplPattern) 79 80 copyrightUplPattern = "(?m)" + copyrightPattern + "\n" + uplPattern + "\n" 81 copyrightUplRegex = regexp.MustCompile(copyrightUplPattern) 82 83 verbose = false 84 85 excludePatterns pattern = []*regexp.Regexp{} 86 includePatterns pattern = []*regexp.Regexp{} 87 extensionFlagVal string 88 89 // useExistingUpdateYearFromHeader - use the update date from the existing header 90 useExistingUpdateYearFromHeader *bool 91 ) 92 93 func shouldFilter(path string) bool { 94 if len(includePatterns) > 0 { 95 var shouldInclude = false 96 for _, re := range includePatterns { 97 if re.MatchString(path) { 98 shouldInclude = true 99 break 100 } 101 } 102 if !shouldInclude { 103 log.Printf("Skipping %s as it does not match include patterns %v\n", path, includePatterns) 104 return true 105 } 106 } 107 if len(excludePatterns) > 0 { 108 var shouldInclude = true 109 for _, re := range excludePatterns { 110 if re.MatchString(path) { 111 shouldInclude = false 112 break 113 } 114 } 115 if !shouldInclude { 116 log.Printf("Skipping %s as it matches exclude patterns %v\n", path, includePatterns) 117 return true 118 } 119 } 120 return false 121 } 122 123 type GitFileStatus int 124 125 const ( 126 Unmodified GitFileStatus = iota 127 Modified 128 Added 129 Deleted 130 Copied 131 Unmerged 132 Untracked 133 Ignored 134 ) 135 136 func (s GitFileStatus) String() string { 137 return [...]string{"unmodified", "modified", "added", "deleted", "renamed", "copied", "unmerged", "untracked", "ignored"}[s] 138 } 139 140 func ParseGitFileStatus(s string) (GitFileStatus, error) { 141 switch s { 142 default: 143 return 0, fmt.Errorf("Unknown git file status %s", s) 144 case " ": 145 return Unmodified, nil 146 case "M": 147 return Modified, nil 148 case "A": 149 return Added, nil 150 case "D": 151 return Deleted, nil 152 case "C": 153 return Copied, nil 154 case "U": 155 return Unmerged, nil 156 case "?": 157 return Untracked, nil 158 case "!": 159 return Ignored, nil 160 161 } 162 } 163 164 type GitStatus struct { 165 IndexStatus GitFileStatus 166 WorkTreeStatus GitFileStatus 167 } 168 169 func ParseGitStatus(s string) (*GitStatus, error) { 170 if strings.TrimSpace(s) == "" { 171 return &GitStatus{ 172 IndexStatus: Unmodified, 173 WorkTreeStatus: Unmodified, 174 }, nil 175 } 176 x, err := ParseGitFileStatus(string(s[0])) 177 if err != nil { 178 return nil, err 179 } 180 y, err := ParseGitFileStatus(string(s[1])) 181 if err != nil { 182 return nil, err 183 } 184 185 return &GitStatus{ 186 IndexStatus: x, 187 WorkTreeStatus: y, 188 }, nil 189 } 190 191 type GitFileInfo struct { 192 FileName string 193 CreatedYear string 194 UpdatedYear string 195 GitStatus *GitStatus 196 } 197 198 type TemplateParams struct { 199 Comment string 200 CreatedYear string 201 UpdatedYear string 202 } 203 204 func gitFileInfo(path string) (*GitFileInfo, error) { 205 currentYear := strconv.Itoa(time.Now().Year()) 206 207 out, err := exec.Command("git", "status", "--porcelain", "--", path).Output() 208 if err != nil { 209 return nil, err 210 } 211 log.Printf("git status %s: %v", path, string(out)) 212 gitStatus, err := ParseGitStatus(string(out)) 213 if err != nil { 214 return nil, err 215 } 216 217 fi := GitFileInfo{ 218 FileName: path, 219 CreatedYear: currentYear, 220 UpdatedYear: currentYear, 221 GitStatus: gitStatus, 222 } 223 224 // if file is untracked or added, use current year only 225 if gitStatus.WorkTreeStatus == Untracked || gitStatus.WorkTreeStatus == Added { 226 return &fi, nil 227 } 228 229 out, err = exec.Command("git", "log", "--format=%at", "--follow", "--", path).Output() 230 if err != nil { 231 return nil, err 232 } 233 log.Printf("git log --format=%%at --follow -- %s\n%s", path, string(out)) 234 235 scanner := bufio.NewScanner(strings.NewReader(string(out))) 236 var first, last string 237 for scanner.Scan() { 238 if first == "" { 239 first = scanner.Text() 240 last = first 241 } else { 242 last = scanner.Text() 243 } 244 } 245 log.Printf("git log %s: first date=%s : last date=%s\n", path, first, last) 246 ilast, err := strconv.ParseInt(last, 10, 64) 247 if err != nil { 248 return nil, err 249 } 250 createdYear := strconv.Itoa(time.Unix(ilast, 0).UTC().Year()) 251 252 updatedYear := currentYear 253 if gitStatus.WorkTreeStatus != Modified { 254 ifirst, err := strconv.ParseInt(first, 10, 64) 255 if err != nil { 256 return nil, err 257 } 258 updatedYear = strconv.Itoa(time.Unix(ifirst, 0).UTC().Year()) 259 } 260 261 log.Printf("CreatedYear %s\n", createdYear) 262 log.Printf("UpdatedYear %s\n", updatedYear) 263 return &GitFileInfo{ 264 FileName: path, 265 CreatedYear: createdYear, 266 UpdatedYear: updatedYear, 267 GitStatus: gitStatus, 268 }, nil 269 } 270 271 func renderTemplate(t *template.Template, params TemplateParams) ([]byte, error) { 272 var header bytes.Buffer 273 err := t.Execute(&header, params) 274 if err != nil { 275 return nil, err 276 } 277 log.Printf("rendered header: %s\n", header.String()) 278 return header.Bytes(), nil 279 } 280 281 func parseYearsFromHeader(fileContents []byte) ([]byte, string, string) { 282 lengthToSearch := 1024 283 if len(fileContents) < 1024 { 284 lengthToSearch = len(fileContents) 285 } 286 firstBytes := fileContents[:lengthToSearch] 287 log.Printf("firstbytes: %s", string(firstBytes)) 288 289 createdYear := "" 290 updatedYear := "" 291 if copyrightUplRegex.Match(firstBytes) { 292 log.Printf("matched copyrightUplRegex") 293 match := copyrightUplRegex.FindSubmatch(firstBytes) 294 295 paramsMap := make(map[string]string) 296 for i, name := range copyrightUplRegex.SubexpNames() { 297 if i > 0 && i <= len(match) { 298 paramsMap[name] = string(match[i]) 299 } 300 } 301 log.Printf("extracted regex params from parsed header: %q", paramsMap) 302 createdYear = paramsMap["CreatedYear"] 303 updatedYear = paramsMap["UpdatedYear"] 304 } 305 return firstBytes, createdYear, updatedYear 306 } 307 308 func fixHeaders(args []string) error { 309 310 var err error 311 out, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() 312 if err != nil { 313 return err 314 } 315 repoRoot := strings.TrimSpace(string(out)) 316 for _, arg := range args { 317 err = filepath.Walk(arg, func(path string, info os.FileInfo, err error) error { 318 if err != nil { 319 log.Printf("WARNING: failure accessing a path %q: %v\n", path, err) 320 return err 321 } 322 if info.IsDir() { 323 return nil 324 } 325 if shouldFilter(path) { 326 return nil 327 } 328 extension := extensionFlagVal 329 if extensionFlagVal == "" { 330 extension = strings.ToLower(filepath.Ext(path)) 331 if extension == "" { 332 extension = path 333 } 334 } 335 var comment string 336 switch extension { 337 default: 338 log.Printf("Unknown extension %s\n", extension) 339 return nil 340 case ".go": 341 comment = "//" 342 case ".yaml", ".yml": 343 comment = "#" 344 } 345 gfi, err := gitFileInfo(path) 346 if err != nil { 347 log.Printf("Error getting git file info for path %s: %v", path, err) 348 return err 349 } 350 log.Printf("Git file info: %v\n", gfi) 351 352 t, err := template.New("").Parse(copyrightTemplate) 353 if err != nil { 354 return err 355 } 356 357 params := TemplateParams{ 358 Comment: comment, 359 CreatedYear: gfi.CreatedYear, 360 UpdatedYear: gfi.UpdatedYear, 361 } 362 363 fileContents, err := os.ReadFile(path) 364 if err != nil { 365 return err 366 } 367 var replacement []byte 368 // if file already contains header, use the created year from that copyright header 369 firstBytes, createdYearFromHeader, updatedYearFromHeader := parseYearsFromHeader(fileContents) 370 modifyExistingHeader := true 371 if createdYearFromHeader == "" { 372 modifyExistingHeader = false 373 // No header matches in file 374 if gfi.GitStatus.WorkTreeStatus == Modified || gfi.GitStatus.IndexStatus == Modified { 375 log.Printf("No copyright header in file but modified, checking version-controlled file for header for %s", path) 376 // Check HEAD revision to see if the header matches there in modified files 377 gitPath, err := filepath.Rel(repoRoot, path) 378 if err != nil { 379 return err 380 } 381 getGitHead := fmt.Sprintf("HEAD:%s", gitPath) 382 cmd := exec.Command("git", "show", getGitHead) 383 out, err := cmd.Output() 384 if err != nil { 385 return err 386 } 387 _, createdYearFromHeader, updatedYearFromHeader = parseYearsFromHeader(out) 388 } 389 } 390 391 // Always trust the created year in the file header 392 if createdYearFromHeader != "" { 393 log.Printf("Using created year in copyright header %s, created year derived from Git is %s\n", createdYearFromHeader, gfi.CreatedYear) 394 params.CreatedYear = createdYearFromHeader 395 } 396 397 // Determine if updated year from header is to be trusted over the year derived from git log history. 398 if *useExistingUpdateYearFromHeader { 399 log.Printf("Using updated year from existing header, UpdatedYear = %s", updatedYearFromHeader) 400 params.UpdatedYear = createdYearFromHeader 401 if updatedYearFromHeader != "" { 402 params.UpdatedYear = updatedYearFromHeader 403 } 404 } 405 406 header, err := renderTemplate(t, params) 407 if err != nil { 408 return err 409 } 410 411 if modifyExistingHeader { 412 replacementHeader := copyrightUplRegex.ReplaceAll(firstBytes, header) 413 if !bytes.Equal(firstBytes, replacementHeader) { 414 replacement = append(replacementHeader, fileContents[len(firstBytes):]...) 415 } 416 } else { 417 replacement = append(header, fileContents...) 418 } 419 420 if !bytes.Equal(replacement, []byte{}) { 421 st, err := os.Stat(path) 422 if err != nil { 423 return err 424 } 425 err = os.WriteFile(path, replacement, st.Mode()) 426 if err != nil { 427 return err 428 } 429 } 430 431 return nil 432 }) 433 if err != nil { 434 log.Printf("error walking the path %q: %v\n", arg, err) 435 return err 436 } 437 } 438 return nil 439 } 440 441 // printUsage Prints the help for this program 442 func printUsage() { 443 usageString := ` 444 Usage: %s [options] path1 [path2 path3 ...] 445 Options: 446 ` 447 fmt.Printf(usageString, os.Args[0]) 448 flag.PrintDefaults() 449 } 450 451 func init() { 452 flag.Var(&includePatterns, "include", "comma separated include regexp file filters") 453 flag.Var(&excludePatterns, "exclude", "comma separated exclude regexp file filter") 454 useExistingUpdateYearFromHeader = flag.Bool("useExistingUpdateYearFromHeader", false, "use years from existing headers in SCM if they exist") 455 } 456 457 func main() { 458 459 help := false 460 flag.StringVar(&extensionFlagVal, "extension", "", "Filename extension to force") 461 flag.BoolVar(&verbose, "verbose", false, "Verbose output") 462 flag.BoolVar(&help, "help", false, "Display usage help") 463 flag.Usage = printUsage 464 flag.Parse() 465 466 if !verbose { 467 log.SetOutput(io.Discard) 468 } 469 470 if help { 471 flag.Usage() 472 os.Exit(0) 473 } 474 475 if flag.NArg() == 0 { 476 flag.Usage() 477 os.Exit(1) 478 } 479 480 err := fixHeaders(flag.Args()) 481 if err != nil { 482 os.Exit(1) 483 } 484 os.Exit(0) 485 }