github.com/git-lfs/git-lfs@v2.5.2+incompatible/lfs/gitscanner_log.go (about)

     1  package lfs
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"regexp"
    10  	"time"
    11  
    12  	"github.com/git-lfs/git-lfs/filepathfilter"
    13  	"github.com/git-lfs/git-lfs/git"
    14  	"github.com/git-lfs/git-lfs/subprocess"
    15  	"github.com/rubyist/tracerx"
    16  )
    17  
    18  // When scanning diffs e.g. parseLogOutputToPointers, which direction of diff to include
    19  // data from, i.e. '+' or '-'. Depending on what you're scanning for either might be useful
    20  type LogDiffDirection byte
    21  
    22  const (
    23  	LogDiffAdditions = LogDiffDirection('+') // include '+' diffs
    24  	LogDiffDeletions = LogDiffDirection('-') // include '-' diffs
    25  )
    26  
    27  var (
    28  	// Arguments to append to a git log call which will limit the output to
    29  	// lfs changes and format the output suitable for parseLogOutput.. method(s)
    30  	logLfsSearchArgs = []string{
    31  		"-G", "oid sha256:", // only diffs which include an lfs file SHA change
    32  		"-p",   // include diff so we can read the SHA
    33  		"-U12", // Make sure diff context is always big enough to support 10 extension lines to get whole pointer
    34  		`--format=lfs-commit-sha: %H %P`, // just a predictable commit header we can detect
    35  	}
    36  )
    37  
    38  type gitscannerResult struct {
    39  	Pointer *WrappedPointer
    40  	Err     error
    41  }
    42  
    43  func scanUnpushed(cb GitScannerFoundPointer, remote string) error {
    44  	logArgs := []string{
    45  		"--branches", "--tags", // include all locally referenced commits
    46  		"--not"} // but exclude everything that comes after
    47  
    48  	if len(remote) == 0 {
    49  		logArgs = append(logArgs, "--remotes")
    50  	} else {
    51  		logArgs = append(logArgs, fmt.Sprintf("--remotes=%v", remote))
    52  	}
    53  
    54  	// Add standard search args to find lfs references
    55  	logArgs = append(logArgs, logLfsSearchArgs...)
    56  
    57  	cmd, err := git.Log(logArgs...)
    58  	if err != nil {
    59  		return err
    60  	}
    61  
    62  	parseScannerLogOutput(cb, LogDiffAdditions, cmd)
    63  	return nil
    64  }
    65  
    66  func parseScannerLogOutput(cb GitScannerFoundPointer, direction LogDiffDirection, cmd *subprocess.BufferedCmd) {
    67  	ch := make(chan gitscannerResult, chanBufSize)
    68  
    69  	go func() {
    70  		scanner := newLogScanner(direction, cmd.Stdout)
    71  		for scanner.Scan() {
    72  			if p := scanner.Pointer(); p != nil {
    73  				ch <- gitscannerResult{Pointer: p}
    74  			}
    75  		}
    76  		stderr, _ := ioutil.ReadAll(cmd.Stderr)
    77  		err := cmd.Wait()
    78  		if err != nil {
    79  			ch <- gitscannerResult{Err: fmt.Errorf("Error in git log: %v %v", err, string(stderr))}
    80  		}
    81  		close(ch)
    82  	}()
    83  
    84  	cmd.Stdin.Close()
    85  	for result := range ch {
    86  		cb(result.Pointer, result.Err)
    87  	}
    88  }
    89  
    90  // logPreviousVersions scans history for all previous versions of LFS pointers
    91  // from 'since' up to (but not including) the final state at ref
    92  func logPreviousSHAs(cb GitScannerFoundPointer, ref string, since time.Time) error {
    93  	logArgs := []string{
    94  		fmt.Sprintf("--since=%v", git.FormatGitDate(since)),
    95  	}
    96  	// Add standard search args to find lfs references
    97  	logArgs = append(logArgs, logLfsSearchArgs...)
    98  	// ending at ref
    99  	logArgs = append(logArgs, ref)
   100  
   101  	cmd, err := git.Log(logArgs...)
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	parseScannerLogOutput(cb, LogDiffDeletions, cmd)
   107  	return nil
   108  }
   109  
   110  func parseLogOutputToPointers(log io.Reader, dir LogDiffDirection,
   111  	includePaths, excludePaths []string, results chan *WrappedPointer) {
   112  	scanner := newLogScanner(dir, log)
   113  	if len(includePaths)+len(excludePaths) > 0 {
   114  		scanner.Filter = filepathfilter.New(includePaths, excludePaths)
   115  	}
   116  	for scanner.Scan() {
   117  		if p := scanner.Pointer(); p != nil {
   118  			results <- p
   119  		}
   120  	}
   121  }
   122  
   123  // logScanner parses log output formatted as per logLfsSearchArgs & returns
   124  // pointers.
   125  type logScanner struct {
   126  	// Filter will ensure file paths matching the include patterns, or not matchin
   127  	// the exclude patterns are skipped.
   128  	Filter *filepathfilter.Filter
   129  
   130  	s       *bufio.Scanner
   131  	dir     LogDiffDirection
   132  	pointer *WrappedPointer
   133  
   134  	pointerData         *bytes.Buffer
   135  	currentFilename     string
   136  	currentFileIncluded bool
   137  
   138  	commitHeaderRegex    *regexp.Regexp
   139  	fileHeaderRegex      *regexp.Regexp
   140  	fileMergeHeaderRegex *regexp.Regexp
   141  	pointerDataRegex     *regexp.Regexp
   142  }
   143  
   144  // dir: whether to include results from + or - diffs
   145  // r: a stream of output from git log with at least logLfsSearchArgs specified
   146  func newLogScanner(dir LogDiffDirection, r io.Reader) *logScanner {
   147  	return &logScanner{
   148  		s:                   bufio.NewScanner(r),
   149  		dir:                 dir,
   150  		pointerData:         &bytes.Buffer{},
   151  		currentFileIncluded: true,
   152  
   153  		// no need to compile these regexes on every `git-lfs` call, just ones that
   154  		// use the scanner.
   155  		commitHeaderRegex:    regexp.MustCompile(`^lfs-commit-sha: ([A-Fa-f0-9]{40})(?: ([A-Fa-f0-9]{40}))*`),
   156  		fileHeaderRegex:      regexp.MustCompile(`diff --git a\/(.+?)\s+b\/(.+)`),
   157  		fileMergeHeaderRegex: regexp.MustCompile(`diff --cc (.+)`),
   158  		pointerDataRegex:     regexp.MustCompile(`^([\+\- ])(version https://git-lfs|oid sha256|size|ext-).*$`),
   159  	}
   160  }
   161  
   162  func (s *logScanner) Pointer() *WrappedPointer {
   163  	return s.pointer
   164  }
   165  
   166  func (s *logScanner) Err() error {
   167  	return s.s.Err()
   168  }
   169  
   170  func (s *logScanner) Scan() bool {
   171  	s.pointer = nil
   172  	p, canScan := s.scan()
   173  	s.pointer = p
   174  	return canScan
   175  }
   176  
   177  // Utility func used at several points below (keep in narrow scope)
   178  func (s *logScanner) finishLastPointer() *WrappedPointer {
   179  	if s.pointerData.Len() == 0 || !s.currentFileIncluded {
   180  		return nil
   181  	}
   182  
   183  	p, err := DecodePointer(s.pointerData)
   184  	s.pointerData.Reset()
   185  
   186  	if err == nil {
   187  		return &WrappedPointer{Name: s.currentFilename, Pointer: p}
   188  	} else {
   189  		tracerx.Printf("Unable to parse pointer from log: %v", err)
   190  		return nil
   191  	}
   192  }
   193  
   194  // For each commit we'll get something like this:
   195  /*
   196  	lfs-commit-sha: 60fde3d23553e10a55e2a32ed18c20f65edd91e7 e2eaf1c10b57da7b98eb5d722ec5912ddeb53ea1
   197  
   198  	diff --git a/1D_Noise.png b/1D_Noise.png
   199  	new file mode 100644
   200  	index 0000000..2622b4a
   201  	--- /dev/null
   202  	+++ b/1D_Noise.png
   203  	@@ -0,0 +1,3 @@
   204  	+version https://git-lfs.github.com/spec/v1
   205  	+oid sha256:f5d84da40ab1f6aa28df2b2bf1ade2cdcd4397133f903c12b4106641b10e1ed6
   206  	+size 1289
   207  */
   208  // There can be multiple diffs per commit (multiple binaries)
   209  // Also when a binary is changed the diff will include a '-' line for the old SHA
   210  func (s *logScanner) scan() (*WrappedPointer, bool) {
   211  	for s.s.Scan() {
   212  		line := s.s.Text()
   213  
   214  		if match := s.commitHeaderRegex.FindStringSubmatch(line); match != nil {
   215  			// Currently we're not pulling out commit groupings, but could if we wanted
   216  			// This just acts as a delimiter for finishing a multiline pointer
   217  			if p := s.finishLastPointer(); p != nil {
   218  				return p, true
   219  			}
   220  		} else if match := s.fileHeaderRegex.FindStringSubmatch(line); match != nil {
   221  			// Finding a regular file header
   222  			p := s.finishLastPointer()
   223  
   224  			// Pertinent file name depends on whether we're listening to additions or removals
   225  			if s.dir == LogDiffAdditions {
   226  				s.setFilename(match[2])
   227  			} else {
   228  				s.setFilename(match[1])
   229  			}
   230  
   231  			if p != nil {
   232  				return p, true
   233  			}
   234  		} else if match := s.fileMergeHeaderRegex.FindStringSubmatch(line); match != nil {
   235  			// Git merge file header is a little different, only one file
   236  			p := s.finishLastPointer()
   237  
   238  			s.setFilename(match[1])
   239  
   240  			if p != nil {
   241  				return p, true
   242  			}
   243  		} else if s.currentFileIncluded {
   244  			if match := s.pointerDataRegex.FindStringSubmatch(line); match != nil {
   245  				// An LFS pointer data line
   246  				// Include only the entirety of one side of the diff
   247  				// -U3 will ensure we always get all of it, even if only
   248  				// the SHA changed (version & size the same)
   249  				changeType := match[1][0]
   250  
   251  				// Always include unchanged context lines (normally just the version line)
   252  				if LogDiffDirection(changeType) == s.dir || changeType == ' ' {
   253  					// Must skip diff +/- marker
   254  					s.pointerData.WriteString(line[1:])
   255  					s.pointerData.WriteString("\n") // newline was stripped off by scanner
   256  				}
   257  			}
   258  		}
   259  	}
   260  
   261  	if p := s.finishLastPointer(); p != nil {
   262  		return p, true
   263  	}
   264  
   265  	return nil, false
   266  }
   267  
   268  func (s *logScanner) setFilename(name string) {
   269  	s.currentFilename = name
   270  	s.currentFileIncluded = s.Filter.Allows(name)
   271  }