golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/gerritbot/internal/rules/rules.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package rules specifies a simple set of rules for checking GitHub PRs or Gerrit CLs
     6  // for certain common mistakes, like no package in the first line of the commit message
     7  // or having long lines in the commit message body.
     8  //
     9  // A rule is primarily defined via a function that takes a Change (CL or PR) as input
    10  // and reports zero or 1 findings, which is just a string (usually 1-2 short sentences).
    11  // A rule can also optionally return a note, which might be auxiliary advice such as
    12  // how to edit the commit message.
    13  //
    14  // The FormatResults function renders the results into markdown.
    15  // Each finding is shown to the user, currently in a numbered list of findings.
    16  // The auxiliary notes are deduplicated, so that for example a set of rules
    17  // looking for problems in a commit message can all return the same editing advice
    18  // but that advice is then only shown to the user only once.
    19  //
    20  // For example, for a commit message with a first line of:
    21  //
    22  //	fmt: Improve foo and bar.
    23  //
    24  // That would currently trigger two rules, with an example formatted result of
    25  // the following (including the deduplicated advice at the end):
    26  //
    27  //	Possible problems detected:
    28  //	  1. The first word in the commit title after the package should be a
    29  //	     lowercase English word (usually a verb).
    30  //	  2. The commit title should not end with a period.
    31  //
    32  //	 To edit the commit message, see instructions [here](...). For guidance on commit
    33  //	 messages for the Go project, see [here](...).
    34  //
    35  // Rules currently err on the side of simplicity and avoiding false positives.
    36  // It is intended to be straightforward to add a new rule.
    37  //
    38  // Rules can be arranged to trigger completely independently of one another,
    39  // or alternatively a set of rules can optionally be arranged in groups that
    40  // form a precedence based on order within the group, where at most one rule
    41  // from that group will trigger. This can be helpful in cases like:
    42  //   - You'd like to have a "catch all" rule that should only trigger if another rule
    43  //     in the same group hasn't triggered.
    44  //   - You have a rule that spots a more general problem and rules that spot
    45  //     more specific or pickier variant of the same problem and want to have
    46  //     the general problem reported if applicable without also reporting
    47  //     pickier findings that would seem redundant.
    48  //   - A subset of rules that are otherwise intended to be mutually exclusive.
    49  package rules
    50  
    51  import (
    52  	"fmt"
    53  	"strings"
    54  )
    55  
    56  // rule defines a single rule that can report a finding.
    57  // See the package comment for an overview.
    58  type rule struct {
    59  	// name is a short internal rule name that don't expect to tweak frequently.
    60  	// We don't show it to users, but do use in tests.
    61  	name string
    62  
    63  	// f is the rule function that reports a single finding and optionally
    64  	// an auxiliary note, which for example could be advice on how to edit the commit message.
    65  	// Notes are deduplicated across different rules, and ignored for a given rule if there is no finding.
    66  	f func(change Change) (finding string, note string)
    67  
    68  	// skip lists repos to skip for this rule.
    69  	skip []string
    70  
    71  	// only lists the only repos that this rule will run against.
    72  	only []string
    73  }
    74  
    75  // ruleGroups defines our live set of rules. It is a [][]rule
    76  // because the individual rules are arranged into groups of rules
    77  // that are mutually exclusive, where the first triggering rule wins
    78  // within a []rule in ruleGroups. See the package comment for details.
    79  var ruleGroups = [][]rule{
    80  	{
    81  		// We have two rules in this rule group, and hence we report at most one of them.
    82  		// The second (pickier) rule is checked only if the first doesn't trigger.
    83  		{
    84  			name: "title: no package found",
    85  			skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title.
    86  			f: func(change Change) (finding string, note string) {
    87  				component, example := packageExample(change.Repo)
    88  				finding = fmt.Sprintf("The commit title should start with the primary affected %s name followed by a colon, like \"%s\".", component, example)
    89  				start, _, ok := strings.Cut(change.Title, ":")
    90  				if !ok {
    91  					// No colon.
    92  					return finding, commitMessageAdvice
    93  				}
    94  				var pattern string
    95  				switch change.Repo {
    96  				default:
    97  					// A single package starts with a lowercase ASCII and has no spaces prior to the colon.
    98  					pattern = `^[a-z][^ ]+$`
    99  				case "website":
   100  					// Allow leading underscore (e.g., "_content").
   101  					pattern = `^[a-z_][^ ]+$`
   102  				case "vscode-go":
   103  					// Allow leading uppercase and period (e.g., ".github/workflow").
   104  					pattern = `^[a-zA-Z.][^ ]+$`
   105  				}
   106  				if match(pattern, start) {
   107  					return "", ""
   108  				}
   109  				if match(`^(README|DEBUG)`, start) {
   110  					// Rare, but also allow a root README or DEBUG file as a component.
   111  					return "", ""
   112  				}
   113  				pkgs := strings.Split(start, ", ")
   114  				if match(`^[a-z]`, start) && len(pkgs) > 1 && !matchAny(` `, pkgs) {
   115  					// We also allow things like "go/types, types2: ..." (but not "hello, fun world: ...").
   116  					return "", ""
   117  				}
   118  				return finding, commitMessageAdvice
   119  			},
   120  		},
   121  		{
   122  			name: "title: no colon then single space after package",
   123  			skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title.
   124  			f: func(change Change) (finding string, note string) {
   125  				component, example := packageExample(change.Repo)
   126  				finding = fmt.Sprintf("The %s in the commit title should be followed by a colon and single space, like \"%s\".", component, example)
   127  				if !match(`[^ ]: [^ ]`, change.Title) {
   128  					return finding, commitMessageAdvice
   129  				}
   130  				return "", ""
   131  			},
   132  		},
   133  	},
   134  	{
   135  		{
   136  			name: "title: no lowercase word after a first colon",
   137  			skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title.
   138  			f: func(change Change) (finding string, note string) {
   139  				component, _ := packageExample(change.Repo)
   140  				finding = fmt.Sprintf("The first word in the commit title after the %s should be a lowercase English word (usually a verb).", component)
   141  				_, after, ok := strings.Cut(change.Title, ":")
   142  				if !ok {
   143  					// No colon. Someone who doesn't have any colon probably can use a reminder about what comes next.
   144  					return finding, commitMessageAdvice
   145  				}
   146  				if !match(`^[ ]*[a-z]+\b`, after) {
   147  					// Probably not a lowercase English verb.
   148  					return finding, commitMessageAdvice
   149  				}
   150  				return "", ""
   151  			},
   152  		},
   153  	},
   154  	{
   155  		{
   156  			name: "title: ends with period",
   157  			f: func(change Change) (finding string, note string) {
   158  				finding = "The commit title should not end with a period."
   159  				if len(change.Title) > 0 && change.Title[len(change.Title)-1] == '.' {
   160  					return finding, commitMessageAdvice
   161  				}
   162  				return "", ""
   163  			},
   164  		},
   165  	},
   166  	{
   167  		// We have two rules in this group, and hence we report at most one of them.
   168  		// The second rule is pickier.
   169  		{
   170  			name: "body: short",
   171  			f: func(change Change) (finding string, note string) {
   172  				finding = "The commit message body is %s. " +
   173  					"That can be OK if the change is trivial like correcting spelling or fixing a broken link, " +
   174  					"but usually the description should provide context for the change and explain what it does in complete sentences."
   175  				if !mightBeTrivial(change) {
   176  					size := len([]rune(change.Body))
   177  					switch {
   178  					case size == 0:
   179  						return fmt.Sprintf(finding, "missing"), commitMessageAdvice
   180  					case size < 24: // len("Updates golang/go#12345") == 23
   181  						return fmt.Sprintf(finding, "very brief"), commitMessageAdvice
   182  					}
   183  				}
   184  				return "", ""
   185  			},
   186  		},
   187  		{
   188  			name: "body: no sentence candidates found",
   189  			f: func(change Change) (finding string, note string) {
   190  				finding = "Are you describing the change in complete sentences with correct punctuation in the commit message body, including ending sentences with periods?"
   191  				if !mightBeTrivial(change) && !match("[a-zA-Z0-9\"'`)][.:?!)]( |\\n|$)", change.Body) {
   192  					// A complete English sentence usually ends with an alphanumeric immediately followed by a terminating punctuation.
   193  					// (This is an approximate test, but currently has a very low false positive rate. Brief manual checking suggests
   194  					// ~zero false positives in a corpus of 1000 CLs).
   195  					return finding, commitMessageAdvice
   196  				}
   197  				return "", ""
   198  			},
   199  		},
   200  	},
   201  	{
   202  		{
   203  			name: "body: long lines",
   204  			f: func(change Change) (finding string, note string) {
   205  				finding = "Lines in the commit message should be wrapped at ~76 characters unless needed for things like URLs or tables. You have a %d character line."
   206  				// Check if something smells like a table, ASCII art, or benchstat output.
   207  				if match("----|____|[\u2500-\u257F]", change.Body) || match(` ± [0-9.]+%`, change.Body) {
   208  					// This might be something that is allowed to be wide.
   209  					// (The Unicode character range above covers box drawing characters, which is probably overkill).
   210  					// We are lenient here and don't check the rest of the body,
   211  					// mainly to be friendly and also we don't want to guess
   212  					// line by line whether something looks like a table.
   213  					return "", ""
   214  				}
   215  				longest := 0
   216  				for _, line := range splitLines(change.Body) {
   217  					if match(`https?://|www\.|\.(com|org|dev)`, line) || matchCount(`[/\\]`, line) > 4 {
   218  						// Might be a long url or other path.
   219  						continue
   220  					}
   221  					l := len([]rune(line))
   222  					if longest < l {
   223  						longest = l
   224  					}
   225  				}
   226  				if longest > 78 { // Official guidance on wiki is "~76".
   227  					return fmt.Sprintf(finding, longest), commitMessageAdvice
   228  				}
   229  				return "", ""
   230  			},
   231  		},
   232  	},
   233  	{
   234  		{
   235  			name: "body: might use markdown",
   236  			f: func(change Change) (finding string, note string) {
   237  				finding = "Are you using markdown? Markdown should not be used to augment text in the commit message."
   238  				if match(`(?i)markdown`, change.Title) || match(`(?i)markdown`, change.Body) {
   239  					// Could be a markdown-related fix.
   240  					return "", ""
   241  				}
   242  				if matchCount("(?m)^```", change.Body) >= 2 {
   243  					// Looks like a markdown code block.
   244  					return finding, commitMessageAdvice
   245  				}
   246  				// Now look for markdown backticks (which might be most common offender),
   247  				// but with a mild attempt to avoid flagging a Go regexp or other Go raw string.
   248  				if !match(`regex|string`, change.Title) {
   249  					for _, line := range splitLines(change.Body) {
   250  						if strings.ContainsAny(line, "={}()[]") || match(`regex|string`, line) {
   251  							// This line might contain Go code. This is fairly lenient, but
   252  							// in practice, if someone uses markdown once in a CL, they tend to use
   253  							// it multiple times, so we often have multiple chances to find a hit.
   254  							continue
   255  						}
   256  						if match("`.+`", line) {
   257  							// Could be a markdown backtick.
   258  							return finding, commitMessageAdvice
   259  						}
   260  					}
   261  				}
   262  				if match(`\[.+]\(https?://.+\)`, change.Body) {
   263  					// Looks like a markdown link. (Sorry, currently our ugliest regexp).
   264  					return finding, commitMessageAdvice
   265  				}
   266  				return "", ""
   267  			},
   268  		},
   269  	},
   270  	{
   271  		{
   272  			name: "body: still contains PR instructions",
   273  			f: func(change Change) (finding string, note string) {
   274  				finding = "Do you still have the GitHub PR instructions in your commit message text? The PR instructions should be deleted once you have applied them."
   275  				if strings.Contains(change.Body, "Delete these instructions once you have read and applied them") {
   276  					return finding, commitMessageAdvice
   277  				}
   278  				return "", ""
   279  			},
   280  		},
   281  	},
   282  	{
   283  		{
   284  			name: "body: contains Signed-off-by",
   285  			f: func(change Change) (finding string, note string) {
   286  				finding = "Please do not use 'Signed-off-by'. We instead rely on contributors signing CLAs."
   287  				if match(`(?mi)^Signed-off-by: `, change.Body) {
   288  					return finding, commitMessageAdvice
   289  				}
   290  				return "", ""
   291  			},
   292  		},
   293  	},
   294  	{
   295  		// We have three rules in this group, and hence we report at most one of them.
   296  		// The three rules get progressively pickier.
   297  		// We exempt the proposal repo from these rules because almost all GitHub PRs
   298  		// for the proposal repo are for typos or similar.
   299  		{
   300  			name: "body: no bug reference candidate found",
   301  			skip: []string{"proposal"},
   302  			f: func(change Change) (finding string, note string) {
   303  				finding = "You usually need to reference a bug number for all but trivial or cosmetic fixes. " +
   304  					"%s at the end of the commit message. Should you have a bug reference?"
   305  				if mightBeTrivial(change) {
   306  					return "", ""
   307  				}
   308  				var bugPattern string
   309  				switch usesTracker(change.Repo) {
   310  				case mainTracker:
   311  					bugPattern = `#\d{4}`
   312  				default:
   313  					bugPattern = `#\d{2}`
   314  				}
   315  				if !match(bugPattern, change.Body) {
   316  					return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice
   317  				}
   318  				return "", ""
   319  			},
   320  		},
   321  		{
   322  			name: "body: bug format looks incorrect",
   323  			skip: []string{"proposal"},
   324  			f: func(change Change) (finding string, note string) {
   325  				finding = "Do you have the right bug reference format? %s at the end of the commit message."
   326  				if mightBeTrivial(change) {
   327  					return "", ""
   328  				}
   329  				var bugPattern string
   330  				switch {
   331  				case change.Repo == "go":
   332  					bugPattern = `#\d{4,}`
   333  				case usesTracker(change.Repo) == mainTracker:
   334  					bugPattern = `golang/go#\d{4,}`
   335  				case usesTracker(change.Repo) == ownTracker:
   336  					bugPattern = fmt.Sprintf(`golang/%s#\d{2,}`, change.Repo)
   337  				default:
   338  					bugPattern = `golang/go#\d{4,}`
   339  				}
   340  				if !match(`(?m)^(Fixes|Updates|For|Closes|Resolves) `+bugPattern+`\.?$`, change.Body) {
   341  					return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice
   342  				}
   343  				return "", ""
   344  			},
   345  		},
   346  		{
   347  			name: "body: no bug reference candidate at end",
   348  			skip: []string{"proposal"},
   349  			f: func(change Change) (finding string, note string) {
   350  				// If this rule is running, it means it passed the earlier bug-related rules
   351  				// in this group, so we know there is what looks like a well-formed bug.
   352  				finding = "It looks like you have a properly formated bug reference, but the convention is to " +
   353  					"put bug references at the bottom of the commit message, even if a bug is also mentioned " +
   354  					"in the body of the message."
   355  				if mightBeTrivial(change) {
   356  					return "", ""
   357  				}
   358  				var bugPattern string
   359  				switch {
   360  				case usesTracker(change.Repo) == mainTracker:
   361  					bugPattern = `#\d{4}`
   362  				default:
   363  					bugPattern = `#\d{2}`
   364  				}
   365  				lines := splitLines(change.Body)
   366  				if len(lines) > 0 && !match(bugPattern, lines[len(lines)-1]) {
   367  					return finding, commitMessageAdvice
   368  				}
   369  				return "", ""
   370  			},
   371  		},
   372  	},
   373  }
   374  
   375  // Auxiliary advice.
   376  var commitMessageAdvice = "The commit title and commit message body come from the GitHub PR title and description, and must be edited in the GitHub web interface (not via git). " +
   377  	"For instructions, see [here](https://go.dev/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). " +
   378  	"For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages)."
   379  
   380  // mightBeTrivial reports if a change looks like something
   381  // where a commit body is often allowed to be skipped, such
   382  // as correcting spelling, fixing a broken link, or
   383  // adding or correcting an example.
   384  func mightBeTrivial(change Change) bool {
   385  	return match(`(?i)typo|spell|grammar|grammatical|comment|readme|document|\bdocs?\b|example|(fix|correct|broken|wrong).*(link|url)`, change.Title)
   386  }
   387  
   388  // bugExamples returns a snippet of text that includes example bug references
   389  // formatted as expected for a repo.
   390  func bugExamples(repo string) string {
   391  	switch {
   392  	case repo == "go":
   393  		return "For this repo, the format is usually 'Fixes #12345' or 'Updates #12345'"
   394  	case usesTracker(repo) == mainTracker:
   395  		return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/go#12345' or 'Updates golang/go#12345'", repo)
   396  	case usesTracker(repo) == ownTracker:
   397  		return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/%s#1234' or 'Updates golang/%s#1234'", repo, repo, repo)
   398  	default:
   399  		// We don't know how issues should be referenced for this repo, including this might be
   400  		// some future created after these rules were last updated, so be a bit vaguer.
   401  		return "For most repos outside the main go repo, the format is usually 'Fixes golang/go#12345' or 'Updates golang/go#12345'"
   402  	}
   403  }
   404  
   405  // packageExample returns an example usage of a package/component in a commit title
   406  // for a given repo, along with what to call the leading words before the first
   407  // colon ("package" vs. "component").
   408  func packageExample(repo string) (component string, example string) {
   409  	switch repo {
   410  	default:
   411  		return "package", "net/http: improve [...]"
   412  	case "website":
   413  		return "component", "_content/doc/go1.21: fix [...]"
   414  	case "vscode-go":
   415  		return "component", "src/goInstallTools: improve [...]"
   416  	}
   417  }
   418  
   419  // tracker is the issue tracker used by a repo.
   420  type tracker int
   421  
   422  const (
   423  	mainTracker    tracker = iota // uses the main golang/go issue tracker.
   424  	ownTracker                    // uses a repo-specific tracker, like golang/vscode-go.
   425  	unknownTracker                // we don't know what tracker is used, which usually means the main tracker is used.
   426  )
   427  
   428  // usesTracker reports if a repo uses the main golang/go tracker or a repo-specific tracker.
   429  // Callers should deal gracefully with unknownTracker being reported (such as by giving less precise
   430  // advice that is still generally useful), which might happen with a less commonly used repo or some future repo.
   431  func usesTracker(repo string) tracker {
   432  	// It's OK for a repo to be missing from our list. If something is missing, either it is not frequently used
   433  	// and hence hopefully not a major problem, or in the rare case we are missing something frequently used,
   434  	// this can be updated if someone complains.
   435  	// TODO: not immediately clear where vulndb should go. In practice, it seems to use the main golang/go tracker,
   436  	// but it also has its own tracker (which might just be for security reports?). Leaving unspecified for now,
   437  	// which results in vaguer advice.
   438  	// TODO: oauth2 might transition from its own tracker to the main tracker (https://go.dev/issue/56402), so
   439  	// we also leave unspecified for now.
   440  	switch repo {
   441  	case "go", "arch", "build", "crypto", "debug", "exp", "image", "mobile", "mod", "net", "perf", "pkgsite", "playground",
   442  		"proposal", "review", "sync", "sys", "telemetry", "term", "text", "time", "tools", "tour", "vuln", "website", "xerrors":
   443  		return mainTracker
   444  	case "vscode-go":
   445  		return ownTracker
   446  	default:
   447  		return unknownTracker
   448  	}
   449  }