github.com/naive/revgrep@v0.0.0-20240331191128-ab485935cedc/revgrep.go (about) 1 // Package revgrep filter static analysis tools to only lines changed based on a commit reference. 2 package revgrep 3 4 import ( 5 "bufio" 6 "bytes" 7 "errors" 8 "fmt" 9 "io" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "regexp" 14 "strconv" 15 "strings" 16 ) 17 18 // Checker provides APIs to filter static analysis tools to specific commits, 19 // such as showing only issues since last commit. 20 type Checker struct { 21 // Patch file (unified) to read to detect lines being changed, 22 // if nil revgrep will attempt to detect the VCS and generate an appropriate patch. 23 // Auto-detection will search for uncommitted changes first, 24 // if none found, will generate a patch from last committed change. 25 // File paths within patches must be relative to current working directory. 26 Patch io.Reader 27 // NewFiles is a list of file names (with absolute paths) where the entire contents of the file is new. 28 NewFiles []string 29 // Debug sets the debug writer for additional output. 30 Debug io.Writer 31 // RevisionFrom check revision starting at, leave blank for auto-detection ignored if patch is set. 32 RevisionFrom string 33 // WholeFiles indicates that the user wishes to see all issues that comes up anywhere in any file that has been changed in this revision or patch. 34 WholeFiles bool 35 // RevisionTo checks revision finishing at, leave blank for auto-detection ignored if patch is set. 36 RevisionTo string 37 // Regexp to match path, line number, optional column number, and message. 38 Regexp string 39 // AbsPath is used to make an absolute path of an issue's filename to be relative in order to match patch file. 40 // If not set, current working directory is used. 41 AbsPath string 42 43 // Calculated changes for next calls to IsNewIssue 44 changes map[string][]pos 45 } 46 47 // Issue contains metadata about an issue found. 48 type Issue struct { 49 // File is the name of the file as it appeared from the patch. 50 File string 51 // LineNo is the line number of the file. 52 LineNo int 53 // ColNo is the column number or 0 if none could be parsed. 54 ColNo int 55 // HunkPos is position from file's first @@, for new files this will be the line number. 56 // See also: https://developer.github.com/v3/pulls/comments/#create-a-comment 57 HunkPos int 58 // Issue text as it appeared from the tool. 59 Issue string 60 // Message is the issue without file name, line number and column number. 61 Message string 62 } 63 64 // InputIssue represents issue found by some linter. 65 type InputIssue interface { 66 FilePath() string 67 Line() int 68 } 69 70 type simpleInputIssue struct { 71 filePath string 72 lineNumber int 73 } 74 75 type pos struct { 76 lineNo int // line number 77 hunkPos int // position relative to first @@ in file 78 } 79 80 func (i simpleInputIssue) FilePath() string { 81 return i.filePath 82 } 83 84 func (i simpleInputIssue) Line() int { 85 return i.lineNumber 86 } 87 88 // Prepare extracts a patch and changed lines. 89 func (c *Checker) Prepare() error { 90 returnErr := c.preparePatch() 91 c.changes = c.linesChanged() 92 return returnErr 93 } 94 95 // IsNewIssue checks whether issue found by linter is new: it was found in changed lines. 96 func (c *Checker) IsNewIssue(i InputIssue) (hunkPos int, isNew bool) { 97 fchanges, ok := c.changes[filepath.ToSlash(i.FilePath())] 98 if !ok { // file wasn't changed 99 return 0, false 100 } 101 102 if c.WholeFiles { 103 return i.Line(), true 104 } 105 106 var ( 107 fpos pos 108 changed bool 109 ) 110 // found file, see if lines matched 111 for _, pos := range fchanges { 112 if pos.lineNo == i.Line() { 113 fpos = pos 114 changed = true 115 break 116 } 117 } 118 119 if changed || fchanges == nil { 120 // either file changed or it's a new file 121 hunkPos := fpos.lineNo 122 if changed { // existing file changed 123 hunkPos = fpos.hunkPos 124 } 125 126 return hunkPos, true 127 } 128 129 return 0, false 130 } 131 132 // Check scans reader and writes any lines to writer that have been added in Checker.Patch. 133 // 134 // Returns the issues written to writer when no error occurs. 135 // 136 // If no VCS could be found or other VCS errors occur, 137 // all issues are written to writer and an error is returned. 138 // 139 // File paths in reader must be relative to current working directory or absolute. 140 func (c *Checker) Check(reader io.Reader, writer io.Writer) (issues []Issue, err error) { 141 returnErr := c.Prepare() 142 writeAll := returnErr != nil 143 144 // file.go:lineNo:colNo:message 145 // colNo is optional, strip spaces before message 146 lineRE := regexp.MustCompile(`(.+\.go):([0-9]+):([0-9]+)?:?\s*(.*)`) 147 if c.Regexp != "" { 148 lineRE, err = regexp.Compile(c.Regexp) 149 if err != nil { 150 return nil, fmt.Errorf("could not parse regexp: %w", err) 151 } 152 } 153 154 // TODO consider lazy loading this, if there's nothing in stdin, no point 155 // checking for recent changes 156 c.debugf("lines changed: %+v", c.changes) 157 158 absPath := c.AbsPath 159 if absPath == "" { 160 absPath, err = os.Getwd() 161 if err != nil { 162 returnErr = fmt.Errorf("could not get current working directory: %w", err) 163 } 164 } 165 166 // Scan each line in reader and only write those lines if lines changed 167 scanner := bufio.NewScanner(reader) 168 for scanner.Scan() { 169 line := lineRE.FindSubmatch(scanner.Bytes()) 170 if line == nil { 171 c.debugf("cannot parse file+line number: %s", scanner.Text()) 172 continue 173 } 174 175 if writeAll { 176 _, _ = fmt.Fprintln(writer, scanner.Text()) 177 continue 178 } 179 180 // Make absolute path names relative 181 path := string(line[1]) 182 if rel, err := filepath.Rel(absPath, path); err == nil { 183 c.debugf("rewrote path from %q to %q (absPath: %q)", path, rel, absPath) 184 path = rel 185 } 186 187 // Parse line number 188 lno, err := strconv.ParseUint(string(line[2]), 10, 64) 189 if err != nil { 190 c.debugf("cannot parse line number: %q", scanner.Text()) 191 continue 192 } 193 194 // Parse optional column number 195 var cno uint64 196 if len(line[3]) > 0 { 197 cno, err = strconv.ParseUint(string(line[3]), 10, 64) 198 if err != nil { 199 c.debugf("cannot parse column number: %q", scanner.Text()) 200 // Ignore this error and continue 201 } 202 } 203 204 // Extract message 205 msg := string(line[4]) 206 207 c.debugf("path: %q, lineNo: %v, colNo: %v, msg: %q", path, lno, cno, msg) 208 209 simpleIssue := simpleInputIssue{filePath: path, lineNumber: int(lno)} 210 211 hunkPos, changed := c.IsNewIssue(simpleIssue) 212 if changed { 213 issue := Issue{ 214 File: path, 215 LineNo: int(lno), 216 ColNo: int(cno), 217 HunkPos: hunkPos, 218 Issue: scanner.Text(), 219 Message: msg, 220 } 221 issues = append(issues, issue) 222 223 _, _ = fmt.Fprintln(writer, scanner.Text()) 224 } else { 225 c.debugf("unchanged: %s", scanner.Text()) 226 } 227 } 228 229 if err := scanner.Err(); err != nil { 230 returnErr = fmt.Errorf("error reading standard input: %w", err) 231 } 232 233 return issues, returnErr 234 } 235 236 func (c *Checker) debugf(format string, s ...interface{}) { 237 if c.Debug != nil { 238 _, _ = fmt.Fprint(c.Debug, "DEBUG: ") 239 _, _ = fmt.Fprintf(c.Debug, format+"\n", s...) 240 } 241 } 242 243 func (c *Checker) preparePatch() error { 244 // Check if patch is supplied, if not, retrieve from VCS 245 if c.Patch == nil { 246 var err error 247 c.Patch, c.NewFiles, err = GitPatch(c.RevisionFrom, c.RevisionTo) 248 if err != nil { 249 return fmt.Errorf("could not read git repo: %w", err) 250 } 251 if c.Patch == nil { 252 return errors.New("no version control repository found") 253 } 254 } 255 256 return nil 257 } 258 259 // linesChanges returns a map of file names to line numbers being changed. 260 // If key is nil, the file has been recently added, else it contains a slice of positions that have been added. 261 func (c *Checker) linesChanged() map[string][]pos { 262 type state struct { 263 file string 264 lineNo int // current line number within chunk 265 hunkPos int // current line count since first @@ in file 266 changes []pos // position of changes 267 } 268 269 changes := make(map[string][]pos) 270 271 for _, file := range c.NewFiles { 272 changes[file] = nil 273 } 274 275 if c.Patch == nil { 276 return changes 277 } 278 279 var s state 280 281 scanner := bufio.NewReader(c.Patch) 282 var scanErr error 283 for { 284 lineB, isPrefix, err := scanner.ReadLine() 285 if isPrefix { 286 // If a single line overflowed the buffer, don't bother processing it as 287 // it's likey part of a file and not relevant to the patch. 288 continue 289 } 290 if err != nil { 291 scanErr = err 292 break 293 } 294 line := strings.TrimRight(string(lineB), "\n") 295 296 c.debugf(line) 297 s.lineNo++ 298 s.hunkPos++ 299 switch { 300 case strings.HasPrefix(line, "+++ ") && len(line) > 4: 301 if s.changes != nil { 302 // record the last state 303 changes[s.file] = s.changes 304 } 305 // 6 removes "+++ b/" 306 s = state{file: line[6:], hunkPos: -1, changes: []pos{}} 307 case strings.HasPrefix(line, "@@ "): 308 // @@ -1 +2,4 @@ 309 // chdr ^^^^^^^^^^^^^ 310 // ahdr ^^^^ 311 // cstart ^ 312 chdr := strings.Split(line, " ") 313 ahdr := strings.Split(chdr[2], ",") 314 // [1:] to remove leading plus 315 cstart, err := strconv.ParseUint(ahdr[0][1:], 10, 64) 316 if err != nil { 317 panic(err) 318 } 319 s.lineNo = int(cstart) - 1 // -1 as cstart is the next line number 320 case strings.HasPrefix(line, "-"): 321 s.lineNo-- 322 case strings.HasPrefix(line, "+"): 323 s.changes = append(s.changes, pos{lineNo: s.lineNo, hunkPos: s.hunkPos}) 324 } 325 } 326 327 if !errors.Is(scanErr, io.EOF) { 328 _, _ = fmt.Fprintln(os.Stderr, "reading standard input:", scanErr) 329 } 330 331 // record the last state 332 changes[s.file] = s.changes 333 334 return changes 335 } 336 337 // readGitDiffStderr returns the error from git diff stderr. 338 func readGitDiffStderr(buff bytes.Buffer) error { 339 output, err := io.ReadAll(&buff) 340 341 if err != nil { 342 return fmt.Errorf("could not read git diff stderr: %v", err) 343 } 344 return errors.New(string(output)) 345 } 346 347 // GitPatch returns a patch from a git repository, 348 // if no git repository was found and no errors occurred, nil is returned, else an error is returned revisionFrom and revisionTo defines the git diff parameters, 349 // if left blank and there are unstaged changes or untracked files, only those will be returned else only check changes since HEAD~. 350 // If revisionFrom is set but revisionTo is not, untracked files will be included, to exclude untracked files set revisionTo to HEAD~. 351 // It's incorrect to specify revisionTo without a revisionFrom. 352 func GitPatch(revisionFrom, revisionTo string) (io.Reader, []string, error) { 353 var errBuff bytes.Buffer 354 355 // check if git repo exists 356 if err := exec.Command("git", "status", "--porcelain").Run(); err != nil { 357 // don't return an error, we assume the error is not repo exists 358 return nil, nil, nil 359 } 360 361 // make a patch for untracked files 362 ls, err := exec.Command("git", "ls-files", "--others", "--exclude-standard").CombinedOutput() 363 if err != nil { 364 return nil, nil, fmt.Errorf("error executing git ls-files: %w", err) 365 } 366 367 var newFiles []string 368 for _, file := range bytes.Split(ls, []byte{'\n'}) { 369 if len(file) == 0 || bytes.HasSuffix(file, []byte{'/'}) { 370 // ls-files was sometimes showing directories when they were ignored 371 // I couldn't create a test case for this as I couldn't reproduce correctly 372 // for the moment, just exclude files with trailing / 373 continue 374 } 375 newFiles = append(newFiles, string(file)) 376 } 377 378 var patch bytes.Buffer 379 if revisionFrom != "" { 380 cmd := gitDiff(revisionFrom) 381 if revisionTo != "" { 382 cmd.Args = append(cmd.Args, revisionTo) 383 } 384 cmd.Args = append(cmd.Args, "--") 385 386 cmd.Stdout = &patch 387 cmd.Stderr = &errBuff 388 if err := cmd.Run(); err != nil { 389 gitDiffStderr := readGitDiffStderr(errBuff) 390 return nil, nil, fmt.Errorf("error executing git diff %q %q: %s\n%v", revisionFrom, revisionTo, err, gitDiffStderr) 391 } 392 393 if revisionTo == "" { 394 return &patch, newFiles, nil 395 } 396 397 return &patch, nil, nil 398 } 399 400 // make a patch for unstaged changes 401 cmd := gitDiff("--") 402 cmd.Stdout = &patch 403 cmd.Stderr = &errBuff 404 if err := cmd.Run(); err != nil { 405 gitDiffStderr := readGitDiffStderr(errBuff) 406 return nil, nil, fmt.Errorf("error executing git diff: %s\n%v", err, gitDiffStderr) 407 } 408 unstaged := patch.Len() > 0 409 410 // If there's unstaged changes OR untracked changes (or both), then this is 411 // a suitable patch 412 if unstaged || newFiles != nil { 413 return &patch, newFiles, nil 414 } 415 416 // check for changes in recent commit 417 418 cmd = gitDiff("HEAD~", "--") 419 cmd.Stdout = &patch 420 cmd.Stderr = &errBuff 421 if err := cmd.Run(); err != nil { 422 gitDiffStderr := readGitDiffStderr(errBuff) 423 return nil, nil, fmt.Errorf("error executing git diff HEAD~: %s\n%v", err, gitDiffStderr) 424 } 425 426 return &patch, nil, nil 427 } 428 429 func gitDiff(extraArgs ...string) *exec.Cmd { 430 cmd := exec.Command("git", "diff", "--color=never", "--no-ext-diff") 431 432 if isSupportedByGit(2, 41, 0) { 433 cmd.Args = append(cmd.Args, "--default-prefix") 434 } 435 436 cmd.Args = append(cmd.Args, "--relative") 437 cmd.Args = append(cmd.Args, extraArgs...) 438 439 return cmd 440 } 441 442 func isSupportedByGit(major, minor, patch int) bool { 443 output, err := exec.Command("git", "version").CombinedOutput() 444 if err != nil { 445 return false 446 } 447 448 parts := bytes.Split(bytes.TrimSpace(output), []byte(" ")) 449 if len(parts) < 3 { 450 return false 451 } 452 453 v := string(parts[2]) 454 if v == "" { 455 return false 456 } 457 458 vp := regexp.MustCompile(`^(\d+)\.(\d+)(?:\.(\d+))?.*$`).FindStringSubmatch(v) 459 if len(vp) < 4 { 460 return false 461 } 462 463 currentMajor, err := strconv.Atoi(vp[1]) 464 if err != nil { 465 return false 466 } 467 468 currentMinor, err := strconv.Atoi(vp[2]) 469 if err != nil { 470 return false 471 } 472 473 currentPatch, err := strconv.Atoi(vp[3]) 474 if err != nil { 475 return false 476 } 477 478 return currentMajor*1_000_000_000+currentMinor*1_000_000+currentPatch*1_000 >= major*1_000_000_000+minor*1_000_000+patch*1_000 479 }