golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/gerritbot/internal/rules/rules.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package rules specifies a simple set of rules for checking GitHub PRs or Gerrit CLs 6 // for certain common mistakes, like no package in the first line of the commit message 7 // or having long lines in the commit message body. 8 // 9 // A rule is primarily defined via a function that takes a Change (CL or PR) as input 10 // and reports zero or 1 findings, which is just a string (usually 1-2 short sentences). 11 // A rule can also optionally return a note, which might be auxiliary advice such as 12 // how to edit the commit message. 13 // 14 // The FormatResults function renders the results into markdown. 15 // Each finding is shown to the user, currently in a numbered list of findings. 16 // The auxiliary notes are deduplicated, so that for example a set of rules 17 // looking for problems in a commit message can all return the same editing advice 18 // but that advice is then only shown to the user only once. 19 // 20 // For example, for a commit message with a first line of: 21 // 22 // fmt: Improve foo and bar. 23 // 24 // That would currently trigger two rules, with an example formatted result of 25 // the following (including the deduplicated advice at the end): 26 // 27 // Possible problems detected: 28 // 1. The first word in the commit title after the package should be a 29 // lowercase English word (usually a verb). 30 // 2. The commit title should not end with a period. 31 // 32 // To edit the commit message, see instructions [here](...). For guidance on commit 33 // messages for the Go project, see [here](...). 34 // 35 // Rules currently err on the side of simplicity and avoiding false positives. 36 // It is intended to be straightforward to add a new rule. 37 // 38 // Rules can be arranged to trigger completely independently of one another, 39 // or alternatively a set of rules can optionally be arranged in groups that 40 // form a precedence based on order within the group, where at most one rule 41 // from that group will trigger. This can be helpful in cases like: 42 // - You'd like to have a "catch all" rule that should only trigger if another rule 43 // in the same group hasn't triggered. 44 // - You have a rule that spots a more general problem and rules that spot 45 // more specific or pickier variant of the same problem and want to have 46 // the general problem reported if applicable without also reporting 47 // pickier findings that would seem redundant. 48 // - A subset of rules that are otherwise intended to be mutually exclusive. 49 package rules 50 51 import ( 52 "fmt" 53 "strings" 54 ) 55 56 // rule defines a single rule that can report a finding. 57 // See the package comment for an overview. 58 type rule struct { 59 // name is a short internal rule name that don't expect to tweak frequently. 60 // We don't show it to users, but do use in tests. 61 name string 62 63 // f is the rule function that reports a single finding and optionally 64 // an auxiliary note, which for example could be advice on how to edit the commit message. 65 // Notes are deduplicated across different rules, and ignored for a given rule if there is no finding. 66 f func(change Change) (finding string, note string) 67 68 // skip lists repos to skip for this rule. 69 skip []string 70 71 // only lists the only repos that this rule will run against. 72 only []string 73 } 74 75 // ruleGroups defines our live set of rules. It is a [][]rule 76 // because the individual rules are arranged into groups of rules 77 // that are mutually exclusive, where the first triggering rule wins 78 // within a []rule in ruleGroups. See the package comment for details. 79 var ruleGroups = [][]rule{ 80 { 81 // We have two rules in this rule group, and hence we report at most one of them. 82 // The second (pickier) rule is checked only if the first doesn't trigger. 83 { 84 name: "title: no package found", 85 skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. 86 f: func(change Change) (finding string, note string) { 87 component, example := packageExample(change.Repo) 88 finding = fmt.Sprintf("The commit title should start with the primary affected %s name followed by a colon, like \"%s\".", component, example) 89 start, _, ok := strings.Cut(change.Title, ":") 90 if !ok { 91 // No colon. 92 return finding, commitMessageAdvice 93 } 94 var pattern string 95 switch change.Repo { 96 default: 97 // A single package starts with a lowercase ASCII and has no spaces prior to the colon. 98 pattern = `^[a-z][^ ]+$` 99 case "website": 100 // Allow leading underscore (e.g., "_content"). 101 pattern = `^[a-z_][^ ]+$` 102 case "vscode-go": 103 // Allow leading uppercase and period (e.g., ".github/workflow"). 104 pattern = `^[a-zA-Z.][^ ]+$` 105 } 106 if match(pattern, start) { 107 return "", "" 108 } 109 if match(`^(README|DEBUG)`, start) { 110 // Rare, but also allow a root README or DEBUG file as a component. 111 return "", "" 112 } 113 pkgs := strings.Split(start, ", ") 114 if match(`^[a-z]`, start) && len(pkgs) > 1 && !matchAny(` `, pkgs) { 115 // We also allow things like "go/types, types2: ..." (but not "hello, fun world: ..."). 116 return "", "" 117 } 118 return finding, commitMessageAdvice 119 }, 120 }, 121 { 122 name: "title: no colon then single space after package", 123 skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. 124 f: func(change Change) (finding string, note string) { 125 component, example := packageExample(change.Repo) 126 finding = fmt.Sprintf("The %s in the commit title should be followed by a colon and single space, like \"%s\".", component, example) 127 if !match(`[^ ]: [^ ]`, change.Title) { 128 return finding, commitMessageAdvice 129 } 130 return "", "" 131 }, 132 }, 133 }, 134 { 135 { 136 name: "title: no lowercase word after a first colon", 137 skip: []string{"proposal"}, // We allow the proposal repo to have an irregular title. 138 f: func(change Change) (finding string, note string) { 139 component, _ := packageExample(change.Repo) 140 finding = fmt.Sprintf("The first word in the commit title after the %s should be a lowercase English word (usually a verb).", component) 141 _, after, ok := strings.Cut(change.Title, ":") 142 if !ok { 143 // No colon. Someone who doesn't have any colon probably can use a reminder about what comes next. 144 return finding, commitMessageAdvice 145 } 146 if !match(`^[ ]*[a-z]+\b`, after) { 147 // Probably not a lowercase English verb. 148 return finding, commitMessageAdvice 149 } 150 return "", "" 151 }, 152 }, 153 }, 154 { 155 { 156 name: "title: ends with period", 157 f: func(change Change) (finding string, note string) { 158 finding = "The commit title should not end with a period." 159 if len(change.Title) > 0 && change.Title[len(change.Title)-1] == '.' { 160 return finding, commitMessageAdvice 161 } 162 return "", "" 163 }, 164 }, 165 }, 166 { 167 // We have two rules in this group, and hence we report at most one of them. 168 // The second rule is pickier. 169 { 170 name: "body: short", 171 f: func(change Change) (finding string, note string) { 172 finding = "The commit message body is %s. " + 173 "That can be OK if the change is trivial like correcting spelling or fixing a broken link, " + 174 "but usually the description should provide context for the change and explain what it does in complete sentences." 175 if !mightBeTrivial(change) { 176 size := len([]rune(change.Body)) 177 switch { 178 case size == 0: 179 return fmt.Sprintf(finding, "missing"), commitMessageAdvice 180 case size < 24: // len("Updates golang/go#12345") == 23 181 return fmt.Sprintf(finding, "very brief"), commitMessageAdvice 182 } 183 } 184 return "", "" 185 }, 186 }, 187 { 188 name: "body: no sentence candidates found", 189 f: func(change Change) (finding string, note string) { 190 finding = "Are you describing the change in complete sentences with correct punctuation in the commit message body, including ending sentences with periods?" 191 if !mightBeTrivial(change) && !match("[a-zA-Z0-9\"'`)][.:?!)]( |\\n|$)", change.Body) { 192 // A complete English sentence usually ends with an alphanumeric immediately followed by a terminating punctuation. 193 // (This is an approximate test, but currently has a very low false positive rate. Brief manual checking suggests 194 // ~zero false positives in a corpus of 1000 CLs). 195 return finding, commitMessageAdvice 196 } 197 return "", "" 198 }, 199 }, 200 }, 201 { 202 { 203 name: "body: long lines", 204 f: func(change Change) (finding string, note string) { 205 finding = "Lines in the commit message should be wrapped at ~76 characters unless needed for things like URLs or tables. You have a %d character line." 206 // Check if something smells like a table, ASCII art, or benchstat output. 207 if match("----|____|[\u2500-\u257F]", change.Body) || match(` ± [0-9.]+%`, change.Body) { 208 // This might be something that is allowed to be wide. 209 // (The Unicode character range above covers box drawing characters, which is probably overkill). 210 // We are lenient here and don't check the rest of the body, 211 // mainly to be friendly and also we don't want to guess 212 // line by line whether something looks like a table. 213 return "", "" 214 } 215 longest := 0 216 for _, line := range splitLines(change.Body) { 217 if match(`https?://|www\.|\.(com|org|dev)`, line) || matchCount(`[/\\]`, line) > 4 { 218 // Might be a long url or other path. 219 continue 220 } 221 l := len([]rune(line)) 222 if longest < l { 223 longest = l 224 } 225 } 226 if longest > 78 { // Official guidance on wiki is "~76". 227 return fmt.Sprintf(finding, longest), commitMessageAdvice 228 } 229 return "", "" 230 }, 231 }, 232 }, 233 { 234 { 235 name: "body: might use markdown", 236 f: func(change Change) (finding string, note string) { 237 finding = "Are you using markdown? Markdown should not be used to augment text in the commit message." 238 if match(`(?i)markdown`, change.Title) || match(`(?i)markdown`, change.Body) { 239 // Could be a markdown-related fix. 240 return "", "" 241 } 242 if matchCount("(?m)^```", change.Body) >= 2 { 243 // Looks like a markdown code block. 244 return finding, commitMessageAdvice 245 } 246 // Now look for markdown backticks (which might be most common offender), 247 // but with a mild attempt to avoid flagging a Go regexp or other Go raw string. 248 if !match(`regex|string`, change.Title) { 249 for _, line := range splitLines(change.Body) { 250 if strings.ContainsAny(line, "={}()[]") || match(`regex|string`, line) { 251 // This line might contain Go code. This is fairly lenient, but 252 // in practice, if someone uses markdown once in a CL, they tend to use 253 // it multiple times, so we often have multiple chances to find a hit. 254 continue 255 } 256 if match("`.+`", line) { 257 // Could be a markdown backtick. 258 return finding, commitMessageAdvice 259 } 260 } 261 } 262 if match(`\[.+]\(https?://.+\)`, change.Body) { 263 // Looks like a markdown link. (Sorry, currently our ugliest regexp). 264 return finding, commitMessageAdvice 265 } 266 return "", "" 267 }, 268 }, 269 }, 270 { 271 { 272 name: "body: still contains PR instructions", 273 f: func(change Change) (finding string, note string) { 274 finding = "Do you still have the GitHub PR instructions in your commit message text? The PR instructions should be deleted once you have applied them." 275 if strings.Contains(change.Body, "Delete these instructions once you have read and applied them") { 276 return finding, commitMessageAdvice 277 } 278 return "", "" 279 }, 280 }, 281 }, 282 { 283 { 284 name: "body: contains Signed-off-by", 285 f: func(change Change) (finding string, note string) { 286 finding = "Please do not use 'Signed-off-by'. We instead rely on contributors signing CLAs." 287 if match(`(?mi)^Signed-off-by: `, change.Body) { 288 return finding, commitMessageAdvice 289 } 290 return "", "" 291 }, 292 }, 293 }, 294 { 295 // We have three rules in this group, and hence we report at most one of them. 296 // The three rules get progressively pickier. 297 // We exempt the proposal repo from these rules because almost all GitHub PRs 298 // for the proposal repo are for typos or similar. 299 { 300 name: "body: no bug reference candidate found", 301 skip: []string{"proposal"}, 302 f: func(change Change) (finding string, note string) { 303 finding = "You usually need to reference a bug number for all but trivial or cosmetic fixes. " + 304 "%s at the end of the commit message. Should you have a bug reference?" 305 if mightBeTrivial(change) { 306 return "", "" 307 } 308 var bugPattern string 309 switch usesTracker(change.Repo) { 310 case mainTracker: 311 bugPattern = `#\d{4}` 312 default: 313 bugPattern = `#\d{2}` 314 } 315 if !match(bugPattern, change.Body) { 316 return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice 317 } 318 return "", "" 319 }, 320 }, 321 { 322 name: "body: bug format looks incorrect", 323 skip: []string{"proposal"}, 324 f: func(change Change) (finding string, note string) { 325 finding = "Do you have the right bug reference format? %s at the end of the commit message." 326 if mightBeTrivial(change) { 327 return "", "" 328 } 329 var bugPattern string 330 switch { 331 case change.Repo == "go": 332 bugPattern = `#\d{4,}` 333 case usesTracker(change.Repo) == mainTracker: 334 bugPattern = `golang/go#\d{4,}` 335 case usesTracker(change.Repo) == ownTracker: 336 bugPattern = fmt.Sprintf(`golang/%s#\d{2,}`, change.Repo) 337 default: 338 bugPattern = `golang/go#\d{4,}` 339 } 340 if !match(`(?m)^(Fixes|Updates|For|Closes|Resolves) `+bugPattern+`\.?$`, change.Body) { 341 return fmt.Sprintf(finding, bugExamples(change.Repo)), commitMessageAdvice 342 } 343 return "", "" 344 }, 345 }, 346 { 347 name: "body: no bug reference candidate at end", 348 skip: []string{"proposal"}, 349 f: func(change Change) (finding string, note string) { 350 // If this rule is running, it means it passed the earlier bug-related rules 351 // in this group, so we know there is what looks like a well-formed bug. 352 finding = "It looks like you have a properly formated bug reference, but the convention is to " + 353 "put bug references at the bottom of the commit message, even if a bug is also mentioned " + 354 "in the body of the message." 355 if mightBeTrivial(change) { 356 return "", "" 357 } 358 var bugPattern string 359 switch { 360 case usesTracker(change.Repo) == mainTracker: 361 bugPattern = `#\d{4}` 362 default: 363 bugPattern = `#\d{2}` 364 } 365 lines := splitLines(change.Body) 366 if len(lines) > 0 && !match(bugPattern, lines[len(lines)-1]) { 367 return finding, commitMessageAdvice 368 } 369 return "", "" 370 }, 371 }, 372 }, 373 } 374 375 // Auxiliary advice. 376 var commitMessageAdvice = "The commit title and commit message body come from the GitHub PR title and description, and must be edited in the GitHub web interface (not via git). " + 377 "For instructions, see [here](https://go.dev/wiki/GerritBot/#how-does-gerritbot-determine-the-final-commit-message). " + 378 "For guidelines on commit messages for the Go project, see [here](https://go.dev/doc/contribute#commit_messages)." 379 380 // mightBeTrivial reports if a change looks like something 381 // where a commit body is often allowed to be skipped, such 382 // as correcting spelling, fixing a broken link, or 383 // adding or correcting an example. 384 func mightBeTrivial(change Change) bool { 385 return match(`(?i)typo|spell|grammar|grammatical|comment|readme|document|\bdocs?\b|example|(fix|correct|broken|wrong).*(link|url)`, change.Title) 386 } 387 388 // bugExamples returns a snippet of text that includes example bug references 389 // formatted as expected for a repo. 390 func bugExamples(repo string) string { 391 switch { 392 case repo == "go": 393 return "For this repo, the format is usually 'Fixes #12345' or 'Updates #12345'" 394 case usesTracker(repo) == mainTracker: 395 return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/go#12345' or 'Updates golang/go#12345'", repo) 396 case usesTracker(repo) == ownTracker: 397 return fmt.Sprintf("For the %s repo, the format is usually 'Fixes golang/%s#1234' or 'Updates golang/%s#1234'", repo, repo, repo) 398 default: 399 // We don't know how issues should be referenced for this repo, including this might be 400 // some future created after these rules were last updated, so be a bit vaguer. 401 return "For most repos outside the main go repo, the format is usually 'Fixes golang/go#12345' or 'Updates golang/go#12345'" 402 } 403 } 404 405 // packageExample returns an example usage of a package/component in a commit title 406 // for a given repo, along with what to call the leading words before the first 407 // colon ("package" vs. "component"). 408 func packageExample(repo string) (component string, example string) { 409 switch repo { 410 default: 411 return "package", "net/http: improve [...]" 412 case "website": 413 return "component", "_content/doc/go1.21: fix [...]" 414 case "vscode-go": 415 return "component", "src/goInstallTools: improve [...]" 416 } 417 } 418 419 // tracker is the issue tracker used by a repo. 420 type tracker int 421 422 const ( 423 mainTracker tracker = iota // uses the main golang/go issue tracker. 424 ownTracker // uses a repo-specific tracker, like golang/vscode-go. 425 unknownTracker // we don't know what tracker is used, which usually means the main tracker is used. 426 ) 427 428 // usesTracker reports if a repo uses the main golang/go tracker or a repo-specific tracker. 429 // Callers should deal gracefully with unknownTracker being reported (such as by giving less precise 430 // advice that is still generally useful), which might happen with a less commonly used repo or some future repo. 431 func usesTracker(repo string) tracker { 432 // It's OK for a repo to be missing from our list. If something is missing, either it is not frequently used 433 // and hence hopefully not a major problem, or in the rare case we are missing something frequently used, 434 // this can be updated if someone complains. 435 // TODO: not immediately clear where vulndb should go. In practice, it seems to use the main golang/go tracker, 436 // but it also has its own tracker (which might just be for security reports?). Leaving unspecified for now, 437 // which results in vaguer advice. 438 // TODO: oauth2 might transition from its own tracker to the main tracker (https://go.dev/issue/56402), so 439 // we also leave unspecified for now. 440 switch repo { 441 case "go", "arch", "build", "crypto", "debug", "exp", "image", "mobile", "mod", "net", "perf", "pkgsite", "playground", 442 "proposal", "review", "sync", "sys", "telemetry", "term", "text", "time", "tools", "tour", "vuln", "website", "xerrors": 443 return mainTracker 444 case "vscode-go": 445 return ownTracker 446 default: 447 return unknownTracker 448 } 449 }