github.com/golangci/revgrep@v0.5.4-0.20240409234448-4d9d98340cb9/revgrep.go (about)

     1  // Package revgrep filter static analysis tools to only lines changed based on a commit reference.
     2  package revgrep
     3  
     4  import (
     5  	"bufio"
     6  	"bytes"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"os/exec"
    12  	"path/filepath"
    13  	"regexp"
    14  	"strconv"
    15  	"strings"
    16  )
    17  
    18  // Checker provides APIs to filter static analysis tools to specific commits,
    19  // such as showing only issues since last commit.
    20  type Checker struct {
    21  	// Patch file (unified) to read to detect lines being changed,
    22  	// if nil revgrep will attempt to detect the VCS and generate an appropriate patch.
    23  	// Auto-detection will search for uncommitted changes first,
    24  	// if none found, will generate a patch from last committed change.
    25  	// File paths within patches must be relative to current working directory.
    26  	Patch io.Reader
    27  	// NewFiles is a list of file names (with absolute paths) where the entire contents of the file is new.
    28  	NewFiles []string
    29  	// Debug sets the debug writer for additional output.
    30  	Debug io.Writer
    31  	// RevisionFrom check revision starting at, leave blank for auto-detection ignored if patch is set.
    32  	RevisionFrom string
    33  	// WholeFiles indicates that the user wishes to see all issues that comes up anywhere in any file that has been changed in this revision or patch.
    34  	WholeFiles bool
    35  	// RevisionTo checks revision finishing at, leave blank for auto-detection ignored if patch is set.
    36  	RevisionTo string
    37  	// Regexp to match path, line number, optional column number, and message.
    38  	Regexp string
    39  	// AbsPath is used to make an absolute path of an issue's filename to be relative in order to match patch file.
    40  	// If not set, current working directory is used.
    41  	AbsPath string
    42  
    43  	// Calculated changes for next calls to IsNewIssue
    44  	changes map[string][]pos
    45  }
    46  
    47  // Issue contains metadata about an issue found.
    48  type Issue struct {
    49  	// File is the name of the file as it appeared from the patch.
    50  	File string
    51  	// LineNo is the line number of the file.
    52  	LineNo int
    53  	// ColNo is the column number or 0 if none could be parsed.
    54  	ColNo int
    55  	// HunkPos is position from file's first @@, for new files this will be the line number.
    56  	// See also: https://developer.github.com/v3/pulls/comments/#create-a-comment
    57  	HunkPos int
    58  	// Issue text as it appeared from the tool.
    59  	Issue string
    60  	// Message is the issue without file name, line number and column number.
    61  	Message string
    62  }
    63  
    64  // InputIssue represents issue found by some linter.
    65  type InputIssue interface {
    66  	FilePath() string
    67  	Line() int
    68  }
    69  
    70  type simpleInputIssue struct {
    71  	filePath   string
    72  	lineNumber int
    73  }
    74  
    75  type pos struct {
    76  	lineNo  int // line number
    77  	hunkPos int // position relative to first @@ in file
    78  }
    79  
    80  func (i simpleInputIssue) FilePath() string {
    81  	return i.filePath
    82  }
    83  
    84  func (i simpleInputIssue) Line() int {
    85  	return i.lineNumber
    86  }
    87  
    88  // Prepare extracts a patch and changed lines.
    89  func (c *Checker) Prepare() error {
    90  	returnErr := c.preparePatch()
    91  	c.changes = c.linesChanged()
    92  	return returnErr
    93  }
    94  
    95  // IsNewIssue checks whether issue found by linter is new: it was found in changed lines.
    96  func (c *Checker) IsNewIssue(i InputIssue) (hunkPos int, isNew bool) {
    97  	fchanges, ok := c.changes[filepath.ToSlash(i.FilePath())]
    98  	if !ok { // file wasn't changed
    99  		return 0, false
   100  	}
   101  
   102  	if c.WholeFiles {
   103  		return i.Line(), true
   104  	}
   105  
   106  	var (
   107  		fpos    pos
   108  		changed bool
   109  	)
   110  	// found file, see if lines matched
   111  	for _, pos := range fchanges {
   112  		if pos.lineNo == i.Line() {
   113  			fpos = pos
   114  			changed = true
   115  			break
   116  		}
   117  	}
   118  
   119  	if changed || fchanges == nil {
   120  		// either file changed or it's a new file
   121  		hunkPos := fpos.lineNo
   122  		if changed { // existing file changed
   123  			hunkPos = fpos.hunkPos
   124  		}
   125  
   126  		return hunkPos, true
   127  	}
   128  
   129  	return 0, false
   130  }
   131  
   132  // Check scans reader and writes any lines to writer that have been added in Checker.Patch.
   133  //
   134  // Returns the issues written to writer when no error occurs.
   135  //
   136  // If no VCS could be found or other VCS errors occur,
   137  // all issues are written to writer and an error is returned.
   138  //
   139  // File paths in reader must be relative to current working directory or absolute.
   140  func (c *Checker) Check(reader io.Reader, writer io.Writer) (issues []Issue, err error) {
   141  	returnErr := c.Prepare()
   142  	writeAll := returnErr != nil
   143  
   144  	// file.go:lineNo:colNo:message
   145  	// colNo is optional, strip spaces before message
   146  	lineRE := regexp.MustCompile(`(.+\.go):([0-9]+):([0-9]+)?:?\s*(.*)`)
   147  	if c.Regexp != "" {
   148  		lineRE, err = regexp.Compile(c.Regexp)
   149  		if err != nil {
   150  			return nil, fmt.Errorf("could not parse regexp: %w", err)
   151  		}
   152  	}
   153  
   154  	// TODO consider lazy loading this, if there's nothing in stdin, no point
   155  	// checking for recent changes
   156  	c.debugf("lines changed: %+v", c.changes)
   157  
   158  	absPath := c.AbsPath
   159  	if absPath == "" {
   160  		absPath, err = os.Getwd()
   161  		if err != nil {
   162  			returnErr = fmt.Errorf("could not get current working directory: %w", err)
   163  		}
   164  	}
   165  
   166  	// Scan each line in reader and only write those lines if lines changed
   167  	scanner := bufio.NewScanner(reader)
   168  	for scanner.Scan() {
   169  		line := lineRE.FindSubmatch(scanner.Bytes())
   170  		if line == nil {
   171  			c.debugf("cannot parse file+line number: %s", scanner.Text())
   172  			continue
   173  		}
   174  
   175  		if writeAll {
   176  			_, _ = fmt.Fprintln(writer, scanner.Text())
   177  			continue
   178  		}
   179  
   180  		// Make absolute path names relative
   181  		path := string(line[1])
   182  		if rel, err := filepath.Rel(absPath, path); err == nil {
   183  			c.debugf("rewrote path from %q to %q (absPath: %q)", path, rel, absPath)
   184  			path = rel
   185  		}
   186  
   187  		// Parse line number
   188  		lno, err := strconv.ParseUint(string(line[2]), 10, 64)
   189  		if err != nil {
   190  			c.debugf("cannot parse line number: %q", scanner.Text())
   191  			continue
   192  		}
   193  
   194  		// Parse optional column number
   195  		var cno uint64
   196  		if len(line[3]) > 0 {
   197  			cno, err = strconv.ParseUint(string(line[3]), 10, 64)
   198  			if err != nil {
   199  				c.debugf("cannot parse column number: %q", scanner.Text())
   200  				// Ignore this error and continue
   201  			}
   202  		}
   203  
   204  		// Extract message
   205  		msg := string(line[4])
   206  
   207  		c.debugf("path: %q, lineNo: %v, colNo: %v, msg: %q", path, lno, cno, msg)
   208  
   209  		simpleIssue := simpleInputIssue{filePath: path, lineNumber: int(lno)}
   210  
   211  		hunkPos, changed := c.IsNewIssue(simpleIssue)
   212  		if changed {
   213  			issue := Issue{
   214  				File:    path,
   215  				LineNo:  int(lno),
   216  				ColNo:   int(cno),
   217  				HunkPos: hunkPos,
   218  				Issue:   scanner.Text(),
   219  				Message: msg,
   220  			}
   221  			issues = append(issues, issue)
   222  
   223  			_, _ = fmt.Fprintln(writer, scanner.Text())
   224  		} else {
   225  			c.debugf("unchanged: %s", scanner.Text())
   226  		}
   227  	}
   228  
   229  	if err := scanner.Err(); err != nil {
   230  		returnErr = fmt.Errorf("error reading standard input: %w", err)
   231  	}
   232  
   233  	return issues, returnErr
   234  }
   235  
   236  func (c *Checker) debugf(format string, s ...interface{}) {
   237  	if c.Debug != nil {
   238  		_, _ = fmt.Fprint(c.Debug, "DEBUG: ")
   239  		_, _ = fmt.Fprintf(c.Debug, format+"\n", s...)
   240  	}
   241  }
   242  
   243  func (c *Checker) preparePatch() error {
   244  	// Check if patch is supplied, if not, retrieve from VCS
   245  	if c.Patch == nil {
   246  		var err error
   247  		c.Patch, c.NewFiles, err = GitPatch(c.RevisionFrom, c.RevisionTo)
   248  		if err != nil {
   249  			return fmt.Errorf("could not read git repo: %w", err)
   250  		}
   251  		if c.Patch == nil {
   252  			return errors.New("no version control repository found")
   253  		}
   254  	}
   255  
   256  	return nil
   257  }
   258  
   259  // linesChanges returns a map of file names to line numbers being changed.
   260  // If key is nil, the file has been recently added, else it contains a slice of positions that have been added.
   261  func (c *Checker) linesChanged() map[string][]pos {
   262  	type state struct {
   263  		file    string
   264  		lineNo  int   // current line number within chunk
   265  		hunkPos int   // current line count since first @@ in file
   266  		changes []pos // position of changes
   267  	}
   268  
   269  	changes := make(map[string][]pos)
   270  
   271  	for _, file := range c.NewFiles {
   272  		changes[file] = nil
   273  	}
   274  
   275  	if c.Patch == nil {
   276  		return changes
   277  	}
   278  
   279  	var s state
   280  
   281  	scanner := bufio.NewReader(c.Patch)
   282  	var scanErr error
   283  	for {
   284  		lineB, isPrefix, err := scanner.ReadLine()
   285  		if isPrefix {
   286  			// If a single line overflowed the buffer, don't bother processing it as
   287  			// it's likey part of a file and not relevant to the patch.
   288  			continue
   289  		}
   290  		if err != nil {
   291  			scanErr = err
   292  			break
   293  		}
   294  		line := strings.TrimRight(string(lineB), "\n")
   295  
   296  		c.debugf(line)
   297  		s.lineNo++
   298  		s.hunkPos++
   299  		switch {
   300  		case strings.HasPrefix(line, "+++ ") && len(line) > 4:
   301  			if s.changes != nil {
   302  				// record the last state
   303  				changes[s.file] = s.changes
   304  			}
   305  			// 6 removes "+++ b/"
   306  			s = state{file: line[6:], hunkPos: -1, changes: []pos{}}
   307  		case strings.HasPrefix(line, "@@ "):
   308  			//      @@ -1 +2,4 @@
   309  			// chdr ^^^^^^^^^^^^^
   310  			// ahdr       ^^^^
   311  			// cstart      ^
   312  			chdr := strings.Split(line, " ")
   313  			ahdr := strings.Split(chdr[2], ",")
   314  			// [1:] to remove leading plus
   315  			cstart, err := strconv.ParseUint(ahdr[0][1:], 10, 64)
   316  			if err != nil {
   317  				panic(err)
   318  			}
   319  			s.lineNo = int(cstart) - 1 // -1 as cstart is the next line number
   320  		case strings.HasPrefix(line, "-"):
   321  			s.lineNo--
   322  		case strings.HasPrefix(line, "+"):
   323  			s.changes = append(s.changes, pos{lineNo: s.lineNo, hunkPos: s.hunkPos})
   324  		}
   325  	}
   326  
   327  	if !errors.Is(scanErr, io.EOF) {
   328  		_, _ = fmt.Fprintln(os.Stderr, "reading standard input:", scanErr)
   329  	}
   330  
   331  	// record the last state
   332  	changes[s.file] = s.changes
   333  
   334  	return changes
   335  }
   336  
   337  // GitPatch returns a patch from a git repository.
   338  // If no git repository was found and no errors occurred, nil is returned,
   339  // else an error is returned revisionFrom and revisionTo defines the git diff parameters,
   340  // if left blank and there are unstaged changes or untracked files,
   341  // only those will be returned else only check changes since HEAD~.
   342  // If revisionFrom is set but revisionTo is not,
   343  // untracked files will be included, to exclude untracked files set revisionTo to HEAD~.
   344  // It's incorrect to specify revisionTo without a revisionFrom.
   345  func GitPatch(revisionFrom, revisionTo string) (io.Reader, []string, error) {
   346  	// check if git repo exists
   347  	if err := exec.Command("git", "status", "--porcelain").Run(); err != nil {
   348  		// don't return an error, we assume the error is not repo exists
   349  		return nil, nil, nil
   350  	}
   351  
   352  	// make a patch for untracked files
   353  	ls, err := exec.Command("git", "ls-files", "--others", "--exclude-standard").CombinedOutput()
   354  	if err != nil {
   355  		return nil, nil, fmt.Errorf("error executing git ls-files: %w", err)
   356  	}
   357  
   358  	var newFiles []string
   359  	for _, file := range bytes.Split(ls, []byte{'\n'}) {
   360  		if len(file) == 0 || bytes.HasSuffix(file, []byte{'/'}) {
   361  			// ls-files was sometimes showing directories when they were ignored
   362  			// I couldn't create a test case for this as I couldn't reproduce correctly for the moment,
   363  			// just exclude files with trailing /
   364  			continue
   365  		}
   366  
   367  		newFiles = append(newFiles, string(file))
   368  	}
   369  
   370  	if revisionFrom != "" {
   371  		args := []string{revisionFrom}
   372  
   373  		if revisionTo != "" {
   374  			args = append(args, revisionTo)
   375  		}
   376  
   377  		args = append(args, "--")
   378  
   379  		patch, errDiff := gitDiff(args...)
   380  		if errDiff != nil {
   381  			return nil, nil, errDiff
   382  		}
   383  
   384  		if revisionTo == "" {
   385  			return patch, newFiles, nil
   386  		}
   387  
   388  		return patch, nil, nil
   389  	}
   390  
   391  	// make a patch for unstaged changes
   392  	patch, err := gitDiff("--")
   393  	if err != nil {
   394  		return nil, nil, err
   395  	}
   396  
   397  	unstaged := patch.Len() > 0
   398  
   399  	// If there's unstaged changes OR untracked changes (or both),
   400  	// then this is a suitable patch
   401  	if unstaged || newFiles != nil {
   402  		return patch, newFiles, nil
   403  	}
   404  
   405  	// check for changes in recent commit
   406  	patch, err = gitDiff("HEAD~", "--")
   407  	if err != nil {
   408  		return nil, nil, err
   409  	}
   410  
   411  	return patch, nil, nil
   412  }
   413  
   414  func gitDiff(extraArgs ...string) (*bytes.Buffer, error) {
   415  	cmd := exec.Command("git", "diff", "--color=never", "--no-ext-diff")
   416  
   417  	if isSupportedByGit(2, 41, 0) {
   418  		cmd.Args = append(cmd.Args, "--default-prefix")
   419  	}
   420  
   421  	cmd.Args = append(cmd.Args, "--relative")
   422  	cmd.Args = append(cmd.Args, extraArgs...)
   423  
   424  	patch := new(bytes.Buffer)
   425  	errBuff := new(bytes.Buffer)
   426  
   427  	cmd.Stdout = patch
   428  	cmd.Stderr = errBuff
   429  
   430  	if err := cmd.Run(); err != nil {
   431  		return nil, fmt.Errorf("error executing %q: %w: %w", strings.Join(cmd.Args, " "), err, readAsError(errBuff))
   432  	}
   433  
   434  	return patch, nil
   435  }
   436  
   437  func readAsError(buff io.Reader) error {
   438  	output, err := io.ReadAll(buff)
   439  	if err != nil {
   440  		return fmt.Errorf("read stderr: %w", err)
   441  	}
   442  
   443  	return errors.New(string(output))
   444  }
   445  
   446  func isSupportedByGit(major, minor, patch int) bool {
   447  	output, err := exec.Command("git", "version").CombinedOutput()
   448  	if err != nil {
   449  		return false
   450  	}
   451  
   452  	parts := bytes.Split(bytes.TrimSpace(output), []byte(" "))
   453  	if len(parts) < 3 {
   454  		return false
   455  	}
   456  
   457  	v := string(parts[2])
   458  	if v == "" {
   459  		return false
   460  	}
   461  
   462  	vp := regexp.MustCompile(`^(\d+)\.(\d+)(?:\.(\d+))?.*$`).FindStringSubmatch(v)
   463  	if len(vp) < 4 {
   464  		return false
   465  	}
   466  
   467  	currentMajor, err := strconv.Atoi(vp[1])
   468  	if err != nil {
   469  		return false
   470  	}
   471  
   472  	currentMinor, err := strconv.Atoi(vp[2])
   473  	if err != nil {
   474  		return false
   475  	}
   476  
   477  	currentPatch, err := strconv.Atoi(vp[3])
   478  	if err != nil {
   479  		return false
   480  	}
   481  
   482  	return currentMajor*1_000_000_000+currentMinor*1_000_000+currentPatch*1_000 >= major*1_000_000_000+minor*1_000_000+patch*1_000
   483  }