github.com/golangci/revgrep@v0.5.4-0.20240409234448-4d9d98340cb9/revgrep.go (about) 1 // Package revgrep filter static analysis tools to only lines changed based on a commit reference. 2 package revgrep 3 4 import ( 5 "bufio" 6 "bytes" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "regexp" 14 "strconv" 15 "strings" 16 ) 17 18 // Checker provides APIs to filter static analysis tools to specific commits, 19 // such as showing only issues since last commit. 20 type Checker struct { 21 // Patch file (unified) to read to detect lines being changed, 22 // if nil revgrep will attempt to detect the VCS and generate an appropriate patch. 23 // Auto-detection will search for uncommitted changes first, 24 // if none found, will generate a patch from last committed change. 25 // File paths within patches must be relative to current working directory. 26 Patch io.Reader 27 // NewFiles is a list of file names (with absolute paths) where the entire contents of the file is new. 28 NewFiles []string 29 // Debug sets the debug writer for additional output. 30 Debug io.Writer 31 // RevisionFrom check revision starting at, leave blank for auto-detection ignored if patch is set. 32 RevisionFrom string 33 // WholeFiles indicates that the user wishes to see all issues that comes up anywhere in any file that has been changed in this revision or patch. 34 WholeFiles bool 35 // RevisionTo checks revision finishing at, leave blank for auto-detection ignored if patch is set. 36 RevisionTo string 37 // Regexp to match path, line number, optional column number, and message. 38 Regexp string 39 // AbsPath is used to make an absolute path of an issue's filename to be relative in order to match patch file. 40 // If not set, current working directory is used. 41 AbsPath string 42 43 // Calculated changes for next calls to IsNewIssue 44 changes map[string][]pos 45 } 46 47 // Issue contains metadata about an issue found. 48 type Issue struct { 49 // File is the name of the file as it appeared from the patch. 50 File string 51 // LineNo is the line number of the file. 52 LineNo int 53 // ColNo is the column number or 0 if none could be parsed. 54 ColNo int 55 // HunkPos is position from file's first @@, for new files this will be the line number. 56 // See also: https://developer.github.com/v3/pulls/comments/#create-a-comment 57 HunkPos int 58 // Issue text as it appeared from the tool. 59 Issue string 60 // Message is the issue without file name, line number and column number. 61 Message string 62 } 63 64 // InputIssue represents issue found by some linter. 65 type InputIssue interface { 66 FilePath() string 67 Line() int 68 } 69 70 type simpleInputIssue struct { 71 filePath string 72 lineNumber int 73 } 74 75 type pos struct { 76 lineNo int // line number 77 hunkPos int // position relative to first @@ in file 78 } 79 80 func (i simpleInputIssue) FilePath() string { 81 return i.filePath 82 } 83 84 func (i simpleInputIssue) Line() int { 85 return i.lineNumber 86 } 87 88 // Prepare extracts a patch and changed lines. 89 func (c *Checker) Prepare() error { 90 returnErr := c.preparePatch() 91 c.changes = c.linesChanged() 92 return returnErr 93 } 94 95 // IsNewIssue checks whether issue found by linter is new: it was found in changed lines. 96 func (c *Checker) IsNewIssue(i InputIssue) (hunkPos int, isNew bool) { 97 fchanges, ok := c.changes[filepath.ToSlash(i.FilePath())] 98 if !ok { // file wasn't changed 99 return 0, false 100 } 101 102 if c.WholeFiles { 103 return i.Line(), true 104 } 105 106 var ( 107 fpos pos 108 changed bool 109 ) 110 // found file, see if lines matched 111 for _, pos := range fchanges { 112 if pos.lineNo == i.Line() { 113 fpos = pos 114 changed = true 115 break 116 } 117 } 118 119 if changed || fchanges == nil { 120 // either file changed or it's a new file 121 hunkPos := fpos.lineNo 122 if changed { // existing file changed 123 hunkPos = fpos.hunkPos 124 } 125 126 return hunkPos, true 127 } 128 129 return 0, false 130 } 131 132 // Check scans reader and writes any lines to writer that have been added in Checker.Patch. 133 // 134 // Returns the issues written to writer when no error occurs. 135 // 136 // If no VCS could be found or other VCS errors occur, 137 // all issues are written to writer and an error is returned. 138 // 139 // File paths in reader must be relative to current working directory or absolute. 140 func (c *Checker) Check(reader io.Reader, writer io.Writer) (issues []Issue, err error) { 141 returnErr := c.Prepare() 142 writeAll := returnErr != nil 143 144 // file.go:lineNo:colNo:message 145 // colNo is optional, strip spaces before message 146 lineRE := regexp.MustCompile(`(.+\.go):([0-9]+):([0-9]+)?:?\s*(.*)`) 147 if c.Regexp != "" { 148 lineRE, err = regexp.Compile(c.Regexp) 149 if err != nil { 150 return nil, fmt.Errorf("could not parse regexp: %w", err) 151 } 152 } 153 154 // TODO consider lazy loading this, if there's nothing in stdin, no point 155 // checking for recent changes 156 c.debugf("lines changed: %+v", c.changes) 157 158 absPath := c.AbsPath 159 if absPath == "" { 160 absPath, err = os.Getwd() 161 if err != nil { 162 returnErr = fmt.Errorf("could not get current working directory: %w", err) 163 } 164 } 165 166 // Scan each line in reader and only write those lines if lines changed 167 scanner := bufio.NewScanner(reader) 168 for scanner.Scan() { 169 line := lineRE.FindSubmatch(scanner.Bytes()) 170 if line == nil { 171 c.debugf("cannot parse file+line number: %s", scanner.Text()) 172 continue 173 } 174 175 if writeAll { 176 _, _ = fmt.Fprintln(writer, scanner.Text()) 177 continue 178 } 179 180 // Make absolute path names relative 181 path := string(line[1]) 182 if rel, err := filepath.Rel(absPath, path); err == nil { 183 c.debugf("rewrote path from %q to %q (absPath: %q)", path, rel, absPath) 184 path = rel 185 } 186 187 // Parse line number 188 lno, err := strconv.ParseUint(string(line[2]), 10, 64) 189 if err != nil { 190 c.debugf("cannot parse line number: %q", scanner.Text()) 191 continue 192 } 193 194 // Parse optional column number 195 var cno uint64 196 if len(line[3]) > 0 { 197 cno, err = strconv.ParseUint(string(line[3]), 10, 64) 198 if err != nil { 199 c.debugf("cannot parse column number: %q", scanner.Text()) 200 // Ignore this error and continue 201 } 202 } 203 204 // Extract message 205 msg := string(line[4]) 206 207 c.debugf("path: %q, lineNo: %v, colNo: %v, msg: %q", path, lno, cno, msg) 208 209 simpleIssue := simpleInputIssue{filePath: path, lineNumber: int(lno)} 210 211 hunkPos, changed := c.IsNewIssue(simpleIssue) 212 if changed { 213 issue := Issue{ 214 File: path, 215 LineNo: int(lno), 216 ColNo: int(cno), 217 HunkPos: hunkPos, 218 Issue: scanner.Text(), 219 Message: msg, 220 } 221 issues = append(issues, issue) 222 223 _, _ = fmt.Fprintln(writer, scanner.Text()) 224 } else { 225 c.debugf("unchanged: %s", scanner.Text()) 226 } 227 } 228 229 if err := scanner.Err(); err != nil { 230 returnErr = fmt.Errorf("error reading standard input: %w", err) 231 } 232 233 return issues, returnErr 234 } 235 236 func (c *Checker) debugf(format string, s ...interface{}) { 237 if c.Debug != nil { 238 _, _ = fmt.Fprint(c.Debug, "DEBUG: ") 239 _, _ = fmt.Fprintf(c.Debug, format+"\n", s...) 240 } 241 } 242 243 func (c *Checker) preparePatch() error { 244 // Check if patch is supplied, if not, retrieve from VCS 245 if c.Patch == nil { 246 var err error 247 c.Patch, c.NewFiles, err = GitPatch(c.RevisionFrom, c.RevisionTo) 248 if err != nil { 249 return fmt.Errorf("could not read git repo: %w", err) 250 } 251 if c.Patch == nil { 252 return errors.New("no version control repository found") 253 } 254 } 255 256 return nil 257 } 258 259 // linesChanges returns a map of file names to line numbers being changed. 260 // If key is nil, the file has been recently added, else it contains a slice of positions that have been added. 261 func (c *Checker) linesChanged() map[string][]pos { 262 type state struct { 263 file string 264 lineNo int // current line number within chunk 265 hunkPos int // current line count since first @@ in file 266 changes []pos // position of changes 267 } 268 269 changes := make(map[string][]pos) 270 271 for _, file := range c.NewFiles { 272 changes[file] = nil 273 } 274 275 if c.Patch == nil { 276 return changes 277 } 278 279 var s state 280 281 scanner := bufio.NewReader(c.Patch) 282 var scanErr error 283 for { 284 lineB, isPrefix, err := scanner.ReadLine() 285 if isPrefix { 286 // If a single line overflowed the buffer, don't bother processing it as 287 // it's likey part of a file and not relevant to the patch. 288 continue 289 } 290 if err != nil { 291 scanErr = err 292 break 293 } 294 line := strings.TrimRight(string(lineB), "\n") 295 296 c.debugf(line) 297 s.lineNo++ 298 s.hunkPos++ 299 switch { 300 case strings.HasPrefix(line, "+++ ") && len(line) > 4: 301 if s.changes != nil { 302 // record the last state 303 changes[s.file] = s.changes 304 } 305 // 6 removes "+++ b/" 306 s = state{file: line[6:], hunkPos: -1, changes: []pos{}} 307 case strings.HasPrefix(line, "@@ "): 308 // @@ -1 +2,4 @@ 309 // chdr ^^^^^^^^^^^^^ 310 // ahdr ^^^^ 311 // cstart ^ 312 chdr := strings.Split(line, " ") 313 ahdr := strings.Split(chdr[2], ",") 314 // [1:] to remove leading plus 315 cstart, err := strconv.ParseUint(ahdr[0][1:], 10, 64) 316 if err != nil { 317 panic(err) 318 } 319 s.lineNo = int(cstart) - 1 // -1 as cstart is the next line number 320 case strings.HasPrefix(line, "-"): 321 s.lineNo-- 322 case strings.HasPrefix(line, "+"): 323 s.changes = append(s.changes, pos{lineNo: s.lineNo, hunkPos: s.hunkPos}) 324 } 325 } 326 327 if !errors.Is(scanErr, io.EOF) { 328 _, _ = fmt.Fprintln(os.Stderr, "reading standard input:", scanErr) 329 } 330 331 // record the last state 332 changes[s.file] = s.changes 333 334 return changes 335 } 336 337 // GitPatch returns a patch from a git repository. 338 // If no git repository was found and no errors occurred, nil is returned, 339 // else an error is returned revisionFrom and revisionTo defines the git diff parameters, 340 // if left blank and there are unstaged changes or untracked files, 341 // only those will be returned else only check changes since HEAD~. 342 // If revisionFrom is set but revisionTo is not, 343 // untracked files will be included, to exclude untracked files set revisionTo to HEAD~. 344 // It's incorrect to specify revisionTo without a revisionFrom. 345 func GitPatch(revisionFrom, revisionTo string) (io.Reader, []string, error) { 346 // check if git repo exists 347 if err := exec.Command("git", "status", "--porcelain").Run(); err != nil { 348 // don't return an error, we assume the error is not repo exists 349 return nil, nil, nil 350 } 351 352 // make a patch for untracked files 353 ls, err := exec.Command("git", "ls-files", "--others", "--exclude-standard").CombinedOutput() 354 if err != nil { 355 return nil, nil, fmt.Errorf("error executing git ls-files: %w", err) 356 } 357 358 var newFiles []string 359 for _, file := range bytes.Split(ls, []byte{'\n'}) { 360 if len(file) == 0 || bytes.HasSuffix(file, []byte{'/'}) { 361 // ls-files was sometimes showing directories when they were ignored 362 // I couldn't create a test case for this as I couldn't reproduce correctly for the moment, 363 // just exclude files with trailing / 364 continue 365 } 366 367 newFiles = append(newFiles, string(file)) 368 } 369 370 if revisionFrom != "" { 371 args := []string{revisionFrom} 372 373 if revisionTo != "" { 374 args = append(args, revisionTo) 375 } 376 377 args = append(args, "--") 378 379 patch, errDiff := gitDiff(args...) 380 if errDiff != nil { 381 return nil, nil, errDiff 382 } 383 384 if revisionTo == "" { 385 return patch, newFiles, nil 386 } 387 388 return patch, nil, nil 389 } 390 391 // make a patch for unstaged changes 392 patch, err := gitDiff("--") 393 if err != nil { 394 return nil, nil, err 395 } 396 397 unstaged := patch.Len() > 0 398 399 // If there's unstaged changes OR untracked changes (or both), 400 // then this is a suitable patch 401 if unstaged || newFiles != nil { 402 return patch, newFiles, nil 403 } 404 405 // check for changes in recent commit 406 patch, err = gitDiff("HEAD~", "--") 407 if err != nil { 408 return nil, nil, err 409 } 410 411 return patch, nil, nil 412 } 413 414 func gitDiff(extraArgs ...string) (*bytes.Buffer, error) { 415 cmd := exec.Command("git", "diff", "--color=never", "--no-ext-diff") 416 417 if isSupportedByGit(2, 41, 0) { 418 cmd.Args = append(cmd.Args, "--default-prefix") 419 } 420 421 cmd.Args = append(cmd.Args, "--relative") 422 cmd.Args = append(cmd.Args, extraArgs...) 423 424 patch := new(bytes.Buffer) 425 errBuff := new(bytes.Buffer) 426 427 cmd.Stdout = patch 428 cmd.Stderr = errBuff 429 430 if err := cmd.Run(); err != nil { 431 return nil, fmt.Errorf("error executing %q: %w: %w", strings.Join(cmd.Args, " "), err, readAsError(errBuff)) 432 } 433 434 return patch, nil 435 } 436 437 func readAsError(buff io.Reader) error { 438 output, err := io.ReadAll(buff) 439 if err != nil { 440 return fmt.Errorf("read stderr: %w", err) 441 } 442 443 return errors.New(string(output)) 444 } 445 446 func isSupportedByGit(major, minor, patch int) bool { 447 output, err := exec.Command("git", "version").CombinedOutput() 448 if err != nil { 449 return false 450 } 451 452 parts := bytes.Split(bytes.TrimSpace(output), []byte(" ")) 453 if len(parts) < 3 { 454 return false 455 } 456 457 v := string(parts[2]) 458 if v == "" { 459 return false 460 } 461 462 vp := regexp.MustCompile(`^(\d+)\.(\d+)(?:\.(\d+))?.*$`).FindStringSubmatch(v) 463 if len(vp) < 4 { 464 return false 465 } 466 467 currentMajor, err := strconv.Atoi(vp[1]) 468 if err != nil { 469 return false 470 } 471 472 currentMinor, err := strconv.Atoi(vp[2]) 473 if err != nil { 474 return false 475 } 476 477 currentPatch, err := strconv.Atoi(vp[3]) 478 if err != nil { 479 return false 480 } 481 482 return currentMajor*1_000_000_000+currentMinor*1_000_000+currentPatch*1_000 >= major*1_000_000_000+minor*1_000_000+patch*1_000 483 }