github.com/lubgr/revgrep@v0.0.0-20240125154757-7e5ee1900f8a/revgrep.go (about)

     1  package revgrep
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  )
    16  
    17  // Checker provides APIs to filter static analysis tools to specific commits,
    18  // such as showing only issues since last commit.
    19  type Checker struct {
    20  	// Patch file (unified) to read to detect lines being changed, if nil revgrep
    21  	// will attempt to detect the VCS and generate an appropriate patch. Auto
    22  	// detection will search for uncommitted changes first, if none found, will
    23  	// generate a patch from last committed change. File paths within patches
    24  	// must be relative to current working directory.
    25  	Patch io.Reader
    26  	// NewFiles is a list of file names (with absolute paths) where the entire
    27  	// contents of the file is new.
    28  	NewFiles []string
    29  	// Debug sets the debug writer for additional output.
    30  	Debug io.Writer
    31  	// RevisionFrom check revision starting at, leave blank for auto detection
    32  	// ignored if patch is set.
    33  	RevisionFrom string
    34  	// RevisionTo checks revision finishing at, leave blank for auto detection
    35  	// ignored if patch is set.
    36  	RevisionTo string
    37  	// Regexp to match path, line number, optional column number, and message.
    38  	Regexp string
    39  	// AbsPath is used to make an absolute path of an issue's filename to be
    40  	// relative in order to match patch file. If not set, current working
    41  	// directory is used.
    42  	AbsPath string
    43  }
    44  
    45  // Issue contains metadata about an issue found.
    46  type Issue struct {
    47  	// File is the name of the file as it appeared from the patch.
    48  	File string
    49  	// LineNo is the line number of the file.
    50  	LineNo int
    51  	// ColNo is the column number or 0 if none could be parsed.
    52  	ColNo int
    53  	// HunkPos is position from file's first @@, for new files this will be the
    54  	// line number.
    55  	//
    56  	// See also: https://developer.github.com/v3/pulls/comments/#create-a-comment
    57  	HunkPos int
    58  	// Issue text as it appeared from the tool.
    59  	Issue string
    60  	// Message is the issue without file name, line number and column number.
    61  	Message string
    62  }
    63  
    64  // Check scans reader and writes any lines to writer that have been added in
    65  // Checker.Patch.
    66  //
    67  // Returns issues written to writer when no error occurs.
    68  //
    69  // If no VCS could be found or other VCS errors occur, all issues are written
    70  // to writer and an error is returned.
    71  //
    72  // File paths in reader must be relative to current working directory or
    73  // absolute.
    74  func (c Checker) Check(reader io.Reader, writer io.Writer) (issues []Issue, err error) {
    75  	// Check if patch is supplied, if not, retrieve from VCS
    76  	var (
    77  		writeAll  bool
    78  		returnErr error
    79  	)
    80  	if c.Patch == nil {
    81  		c.Patch, c.NewFiles, err = GitPatch(c.RevisionFrom, c.RevisionTo)
    82  		if err != nil {
    83  			writeAll = true
    84  			returnErr = fmt.Errorf("could not read git repo: %s", err)
    85  		}
    86  		if c.Patch == nil {
    87  			writeAll = true
    88  			returnErr = errors.New("no version control repository found")
    89  		}
    90  	}
    91  
    92  	// file.go:lineNo:colNo:message
    93  	// colNo is optional, strip spaces before message
    94  	lineRE := regexp.MustCompile(`(.*?\.go):([0-9]+):([0-9]+)?:?\s*(.*)`)
    95  	if c.Regexp != "" {
    96  		lineRE, err = regexp.Compile(c.Regexp)
    97  		if err != nil {
    98  			return nil, fmt.Errorf("could not parse regexp: %v", err)
    99  		}
   100  	}
   101  
   102  	// TODO consider lazy loading this, if there's nothing in stdin, no point
   103  	// checking for recent changes
   104  	linesChanged := c.linesChanged()
   105  	c.debugf("lines changed: %+v", linesChanged)
   106  
   107  	absPath := c.AbsPath
   108  	if absPath == "" {
   109  		absPath, err = os.Getwd()
   110  		if err != nil {
   111  			returnErr = fmt.Errorf("could not get current working directory: %s", err)
   112  		}
   113  	}
   114  
   115  	// Scan each line in reader and only write those lines if lines changed
   116  	scanner := bufio.NewScanner(reader)
   117  	for scanner.Scan() {
   118  		line := lineRE.FindSubmatch(scanner.Bytes())
   119  		if line == nil {
   120  			c.debugf("cannot parse file+line number: %s", scanner.Text())
   121  			continue
   122  		}
   123  
   124  		if writeAll {
   125  			fmt.Fprintln(writer, scanner.Text())
   126  			continue
   127  		}
   128  
   129  		// Make absolute path names relative
   130  		path := string(line[1])
   131  		if rel, err := filepath.Rel(absPath, path); err == nil {
   132  			c.debugf("rewrote path from %q to %q (absPath: %q)", path, rel, absPath)
   133  			path = rel
   134  		}
   135  
   136  		// Parse line number
   137  		lno, err := strconv.ParseUint(string(line[2]), 10, 64)
   138  		if err != nil {
   139  			c.debugf("cannot parse line number: %q", scanner.Text())
   140  			continue
   141  		}
   142  
   143  		// Parse optional column number
   144  		var cno uint64
   145  		if len(line[3]) > 0 {
   146  			cno, err = strconv.ParseUint(string(line[3]), 10, 64)
   147  			if err != nil {
   148  				c.debugf("cannot parse column number: %q", scanner.Text())
   149  				// Ignore this error and continue
   150  			}
   151  		}
   152  
   153  		// Extract message
   154  		msg := string(line[4])
   155  
   156  		c.debugf("path: %q, lineNo: %v, colNo: %v, msg: %q", path, lno, cno, msg)
   157  
   158  		var (
   159  			fpos    pos
   160  			changed bool
   161  		)
   162  		if fchanges, ok := linesChanged[path]; ok {
   163  			// found file, see if lines matched
   164  			for _, pos := range fchanges {
   165  				if pos.lineNo == int(lno) {
   166  					fpos = pos
   167  					changed = true
   168  				}
   169  			}
   170  			if changed || fchanges == nil {
   171  				// either file changed or it's a new file
   172  				issue := Issue{
   173  					File:    path,
   174  					LineNo:  fpos.lineNo,
   175  					ColNo:   int(cno),
   176  					HunkPos: fpos.lineNo,
   177  					Issue:   scanner.Text(),
   178  					Message: msg,
   179  				}
   180  				if changed {
   181  					// existing file changed
   182  					issue.HunkPos = fpos.hunkPos
   183  				}
   184  				issues = append(issues, issue)
   185  				fmt.Fprintln(writer, scanner.Text())
   186  			}
   187  		}
   188  		if !changed {
   189  			c.debugf("unchanged: %s", scanner.Text())
   190  		}
   191  	}
   192  	if err := scanner.Err(); err != nil {
   193  		returnErr = fmt.Errorf("error reading standard input: %s", err)
   194  	}
   195  	return issues, returnErr
   196  }
   197  
   198  func (c Checker) debugf(format string, s ...interface{}) {
   199  	if c.Debug != nil {
   200  		fmt.Fprint(c.Debug, "DEBUG: ")
   201  		fmt.Fprintf(c.Debug, format+"\n", s...)
   202  	}
   203  }
   204  
   205  type pos struct {
   206  	lineNo  int // line number
   207  	hunkPos int // position relative to first @@ in file
   208  }
   209  
   210  // linesChanges returns a map of file names to line numbers being changed.
   211  // If key is nil, the file has been recently added, else it contains a slice
   212  // of positions that have been added.
   213  func (c Checker) linesChanged() map[string][]pos {
   214  	type state struct {
   215  		file    string
   216  		lineNo  int   // current line number within chunk
   217  		hunkPos int   // current line count since first @@ in file
   218  		changes []pos // position of changes
   219  	}
   220  
   221  	var (
   222  		s       state
   223  		changes = make(map[string][]pos)
   224  	)
   225  
   226  	for _, file := range c.NewFiles {
   227  		changes[file] = nil
   228  	}
   229  
   230  	if c.Patch == nil {
   231  		return changes
   232  	}
   233  
   234  	scanner := bufio.NewScanner(c.Patch)
   235  	for scanner.Scan() {
   236  		line := scanner.Text() // TODO scanner.Bytes()
   237  		c.debugf(line)
   238  		s.lineNo++
   239  		s.hunkPos++
   240  		switch {
   241  		case strings.HasPrefix(line, "+++ ") && len(line) > 4:
   242  			if s.changes != nil {
   243  				// record the last state
   244  				changes[s.file] = s.changes
   245  			}
   246  			// 6 removes "+++ b/"
   247  			s = state{file: line[6:], hunkPos: -1, changes: []pos{}}
   248  		case strings.HasPrefix(line, "@@ "):
   249  			//      @@ -1 +2,4 @@
   250  			// chdr ^^^^^^^^^^^^^
   251  			// ahdr       ^^^^
   252  			// cstart      ^
   253  			chdr := strings.Split(line, " ")
   254  			ahdr := strings.Split(chdr[2], ",")
   255  			// [1:] to remove leading plus
   256  			cstart, err := strconv.ParseUint(ahdr[0][1:], 10, 64)
   257  			if err != nil {
   258  				panic(err)
   259  			}
   260  			s.lineNo = int(cstart) - 1 // -1 as cstart is the next line number
   261  		case strings.HasPrefix(line, "-"):
   262  			s.lineNo--
   263  		case strings.HasPrefix(line, "+"):
   264  			s.changes = append(s.changes, pos{lineNo: s.lineNo, hunkPos: s.hunkPos})
   265  		}
   266  
   267  	}
   268  	if err := scanner.Err(); err != nil {
   269  		fmt.Fprintln(os.Stderr, "reading standard input:", err)
   270  	}
   271  	// record the last state
   272  	changes[s.file] = s.changes
   273  
   274  	return changes
   275  }
   276  
   277  // GitPatch returns a patch from a git repository, if no git repository was
   278  // was found and no errors occurred, nil is returned, else an error is returned
   279  // revisionFrom and revisionTo defines the git diff parameters, if left blank
   280  // and there are unstaged changes or untracked files, only those will be returned
   281  // else only check changes since HEAD~. If revisionFrom is set but revisionTo
   282  // is not, untracked files will be included, to exclude untracked files set
   283  // revisionTo to HEAD~. It's incorrect to specify revisionTo without a
   284  // revisionFrom.
   285  func GitPatch(revisionFrom, revisionTo string) (io.Reader, []string, error) {
   286  	var patch bytes.Buffer
   287  
   288  	// check if git repo exists
   289  	if err := exec.Command("git", "status").Run(); err != nil {
   290  		// don't return an error, we assume the error is not repo exists
   291  		return nil, nil, nil
   292  	}
   293  
   294  	// make a patch for untracked files
   295  	var newFiles []string
   296  	ls, err := exec.Command("git", "ls-files", "-o").CombinedOutput()
   297  	if err != nil {
   298  		return nil, nil, fmt.Errorf("error executing git ls-files: %s", err)
   299  	}
   300  	for _, file := range bytes.Split(ls, []byte{'\n'}) {
   301  		if len(file) == 0 || bytes.HasSuffix(file, []byte{'/'}) {
   302  			// ls-files was sometimes showing directories when they were ignored
   303  			// I couldn't create a test case for this as I couldn't reproduce correctly
   304  			// for the moment, just exclude files with trailing /
   305  			continue
   306  		}
   307  		newFiles = append(newFiles, string(file))
   308  	}
   309  
   310  	if revisionFrom != "" {
   311  		cmd := exec.Command("git", "diff", revisionFrom)
   312  		if revisionTo != "" {
   313  			cmd.Args = append(cmd.Args, revisionTo)
   314  		}
   315  		cmd.Stdout = &patch
   316  		if err := cmd.Run(); err != nil {
   317  			return nil, nil, fmt.Errorf("error executing git diff %q %q: %s", revisionFrom, revisionTo, err)
   318  		}
   319  
   320  		if revisionTo == "" {
   321  			return &patch, newFiles, nil
   322  		}
   323  		return &patch, nil, nil
   324  	}
   325  
   326  	// make a patch for unstaged changes
   327  	// use --no-prefix to remove b/ given: +++ b/main.go
   328  	cmd := exec.Command("git", "diff")
   329  	cmd.Stdout = &patch
   330  	if err := cmd.Run(); err != nil {
   331  		return nil, nil, fmt.Errorf("error executing git diff: %s", err)
   332  	}
   333  	unstaged := patch.Len() > 0
   334  
   335  	// If there's unstaged changes OR untracked changes (or both), then this is
   336  	// a suitable patch
   337  	if unstaged || newFiles != nil {
   338  		return &patch, newFiles, nil
   339  	}
   340  
   341  	// check for changes in recent commit
   342  
   343  	cmd = exec.Command("git", "diff", "HEAD~")
   344  	cmd.Stdout = &patch
   345  	if err := cmd.Run(); err != nil {
   346  		return nil, nil, fmt.Errorf("error executing git diff HEAD~: %s", err)
   347  	}
   348  
   349  	return &patch, nil, nil
   350  }