github.com/psexton/git-lfs@v2.1.1-0.20170517224304-289a18b2bc53+incompatible/lfs/gitscanner_log.go (about)

     1  package lfs
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"regexp"
    10  	"time"
    11  
    12  	"github.com/git-lfs/git-lfs/filepathfilter"
    13  	"github.com/git-lfs/git-lfs/git"
    14  	"github.com/rubyist/tracerx"
    15  )
    16  
    17  // When scanning diffs e.g. parseLogOutputToPointers, which direction of diff to include
    18  // data from, i.e. '+' or '-'. Depending on what you're scanning for either might be useful
    19  type LogDiffDirection byte
    20  
    21  const (
    22  	LogDiffAdditions = LogDiffDirection('+') // include '+' diffs
    23  	LogDiffDeletions = LogDiffDirection('-') // include '-' diffs
    24  )
    25  
    26  var (
    27  	// Arguments to append to a git log call which will limit the output to
    28  	// lfs changes and format the output suitable for parseLogOutput.. method(s)
    29  	logLfsSearchArgs = []string{
    30  		"-G", "oid sha256:", // only diffs which include an lfs file SHA change
    31  		"-p",   // include diff so we can read the SHA
    32  		"-U12", // Make sure diff context is always big enough to support 10 extension lines to get whole pointer
    33  		`--format=lfs-commit-sha: %H %P`, // just a predictable commit header we can detect
    34  	}
    35  )
    36  
    37  type gitscannerResult struct {
    38  	Pointer *WrappedPointer
    39  	Err     error
    40  }
    41  
    42  func scanUnpushed(cb GitScannerFoundPointer, remote string) error {
    43  	logArgs := []string{"log",
    44  		"--branches", "--tags", // include all locally referenced commits
    45  		"--not"} // but exclude everything that comes after
    46  
    47  	if len(remote) == 0 {
    48  		logArgs = append(logArgs, "--remotes")
    49  	} else {
    50  		logArgs = append(logArgs, fmt.Sprintf("--remotes=%v", remote))
    51  	}
    52  
    53  	// Add standard search args to find lfs references
    54  	logArgs = append(logArgs, logLfsSearchArgs...)
    55  
    56  	cmd, err := startCommand("git", logArgs...)
    57  	if err != nil {
    58  		return err
    59  	}
    60  
    61  	parseScannerLogOutput(cb, LogDiffAdditions, cmd)
    62  	return nil
    63  }
    64  
    65  func parseScannerLogOutput(cb GitScannerFoundPointer, direction LogDiffDirection, cmd *wrappedCmd) {
    66  	ch := make(chan gitscannerResult, chanBufSize)
    67  
    68  	go func() {
    69  		scanner := newLogScanner(direction, cmd.Stdout)
    70  		for scanner.Scan() {
    71  			if p := scanner.Pointer(); p != nil {
    72  				ch <- gitscannerResult{Pointer: p}
    73  			}
    74  		}
    75  		stderr, _ := ioutil.ReadAll(cmd.Stderr)
    76  		err := cmd.Wait()
    77  		if err != nil {
    78  			ch <- gitscannerResult{Err: fmt.Errorf("Error in git log: %v %v", err, string(stderr))}
    79  		}
    80  		close(ch)
    81  	}()
    82  
    83  	cmd.Stdin.Close()
    84  	for result := range ch {
    85  		cb(result.Pointer, result.Err)
    86  	}
    87  }
    88  
    89  // logPreviousVersions scans history for all previous versions of LFS pointers
    90  // from 'since' up to (but not including) the final state at ref
    91  func logPreviousSHAs(cb GitScannerFoundPointer, ref string, since time.Time) error {
    92  	logArgs := []string{"log",
    93  		fmt.Sprintf("--since=%v", git.FormatGitDate(since)),
    94  	}
    95  	// Add standard search args to find lfs references
    96  	logArgs = append(logArgs, logLfsSearchArgs...)
    97  	// ending at ref
    98  	logArgs = append(logArgs, ref)
    99  
   100  	cmd, err := startCommand("git", logArgs...)
   101  	if err != nil {
   102  		return err
   103  	}
   104  
   105  	parseScannerLogOutput(cb, LogDiffDeletions, cmd)
   106  	return nil
   107  }
   108  
   109  func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection,
   110  	includePaths, excludePaths []string, results chan *WrappedPointer) {
   111  	scanner := newLogScanner(dir, log)
   112  	if len(includePaths)+len(excludePaths) > 0 {
   113  		scanner.Filter = filepathfilter.New(includePaths, excludePaths)
   114  	}
   115  	for scanner.Scan() {
   116  		if p := scanner.Pointer(); p != nil {
   117  			results <- p
   118  		}
   119  	}
   120  }
   121  
   122  // logScanner parses log output formatted as per logLfsSearchArgs & returns
   123  // pointers.
   124  type logScanner struct {
   125  	// Filter will ensure file paths matching the include patterns, or not matchin
   126  	// the exclude patterns are skipped.
   127  	Filter *filepathfilter.Filter
   128  
   129  	s       *bufio.Scanner
   130  	dir     LogDiffDirection
   131  	pointer *WrappedPointer
   132  
   133  	pointerData         *bytes.Buffer
   134  	currentFilename     string
   135  	currentFileIncluded bool
   136  
   137  	commitHeaderRegex    *regexp.Regexp
   138  	fileHeaderRegex      *regexp.Regexp
   139  	fileMergeHeaderRegex *regexp.Regexp
   140  	pointerDataRegex     *regexp.Regexp
   141  }
   142  
   143  // dir: whether to include results from + or - diffs
   144  // r: a stream of output from git log with at least logLfsSearchArgs specified
   145  func newLogScanner(dir LogDiffDirection, r io.Reader) *logScanner {
   146  	return &logScanner{
   147  		s:                   bufio.NewScanner(r),
   148  		dir:                 dir,
   149  		pointerData:         &bytes.Buffer{},
   150  		currentFileIncluded: true,
   151  
   152  		// no need to compile these regexes on every `git-lfs` call, just ones that
   153  		// use the scanner.
   154  		commitHeaderRegex:    regexp.MustCompile(`^lfs-commit-sha: ([A-Fa-f0-9]{40})(?: ([A-Fa-f0-9]{40}))*`),
   155  		fileHeaderRegex:      regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`),
   156  		fileMergeHeaderRegex: regexp.MustCompile(`diff --cc (.+)`),
   157  		pointerDataRegex:     regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`),
   158  	}
   159  }
   160  
   161  func (s *logScanner) Pointer() *WrappedPointer {
   162  	return s.pointer
   163  }
   164  
   165  func (s *logScanner) Err() error {
   166  	return s.s.Err()
   167  }
   168  
   169  func (s *logScanner) Scan() bool {
   170  	s.pointer = nil
   171  	p, canScan := s.scan()
   172  	s.pointer = p
   173  	return canScan
   174  }
   175  
   176  // Utility func used at several points below (keep in narrow scope)
   177  func (s *logScanner) finishLastPointer() *WrappedPointer {
   178  	if s.pointerData.Len() == 0 || !s.currentFileIncluded {
   179  		return nil
   180  	}
   181  
   182  	p, err := DecodePointer(s.pointerData)
   183  	s.pointerData.Reset()
   184  
   185  	if err == nil {
   186  		return &WrappedPointer{Name: s.currentFilename, Pointer: p}
   187  	} else {
   188  		tracerx.Printf("Unable to parse pointer from log: %v", err)
   189  		return nil
   190  	}
   191  }
   192  
   193  // For each commit we'll get something like this:
   194  /*
   195  	lfs-commit-sha: 60fde3d23553e10a55e2a32ed18c20f65edd91e7 e2eaf1c10b57da7b98eb5d722ec5912ddeb53ea1
   196  
   197  	diff --git a/1D_Noise.png b/1D_Noise.png
   198  	new file mode 100644
   199  	index 0000000..2622b4a
   200  	--- /dev/null
   201  	+++ b/1D_Noise.png
   202  	@@ -0,0 +1,3 @@
   203  	+version https://git-lfs.github.com/spec/v1
   204  	+oid sha256:f5d84da40ab1f6aa28df2b2bf1ade2cdcd4397133f903c12b4106641b10e1ed6
   205  	+size 1289
   206  */
   207  // There can be multiple diffs per commit (multiple binaries)
   208  // Also when a binary is changed the diff will include a '-' line for the old SHA
   209  func (s *logScanner) scan() (*WrappedPointer, bool) {
   210  	for s.s.Scan() {
   211  		line := s.s.Text()
   212  
   213  		if match := s.commitHeaderRegex.FindStringSubmatch(line); match != nil {
   214  			// Currently we're not pulling out commit groupings, but could if we wanted
   215  			// This just acts as a delimiter for finishing a multiline pointer
   216  			if p := s.finishLastPointer(); p != nil {
   217  				return p, true
   218  			}
   219  		} else if match := s.fileHeaderRegex.FindStringSubmatch(line); match != nil {
   220  			// Finding a regular file header
   221  			p := s.finishLastPointer()
   222  
   223  			// Pertinent file name depends on whether we're listening to additions or removals
   224  			if s.dir == LogDiffAdditions {
   225  				s.setFilename(match[2])
   226  			} else {
   227  				s.setFilename(match[1])
   228  			}
   229  
   230  			if p != nil {
   231  				return p, true
   232  			}
   233  		} else if match := s.fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
   234  			// Git merge file header is a little different, only one file
   235  			p := s.finishLastPointer()
   236  
   237  			s.setFilename(match[1])
   238  
   239  			if p != nil {
   240  				return p, true
   241  			}
   242  		} else if s.currentFileIncluded {
   243  			if match := s.pointerDataRegex.FindStringSubmatch(line); match != nil {
   244  				// An LFS pointer data line
   245  				// Include only the entirety of one side of the diff
   246  				// -U3 will ensure we always get all of it, even if only
   247  				// the SHA changed (version & size the same)
   248  				changeType := match[1][0]
   249  
   250  				// Always include unchanged context lines (normally just the version line)
   251  				if LogDiffDirection(changeType) == s.dir || changeType == ' ' {
   252  					// Must skip diff +/- marker
   253  					s.pointerData.WriteString(line[1:])
   254  					s.pointerData.WriteString("\n") // newline was stripped off by scanner
   255  				}
   256  			}
   257  		}
   258  	}
   259  
   260  	if p := s.finishLastPointer(); p != nil {
   261  		return p, true
   262  	}
   263  
   264  	return nil, false
   265  }
   266  
   267  func (s *logScanner) setFilename(name string) {
   268  	s.currentFilename = name
   269  	s.currentFileIncluded = s.Filter.Allows(name)
   270  }