github.com/psexton/git-lfs@v2.1.1-0.20170517224304-289a18b2bc53+incompatible/lfs/gitscanner_tree.go (about)

     1  package lfs
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/git-lfs/git-lfs/filepathfilter"
    13  )
    14  
    15  // An entry from ls-tree or rev-list including a blob sha and tree path
    16  type TreeBlob struct {
    17  	Sha1     string
    18  	Filename string
    19  }
    20  
    21  func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter) error {
    22  	// We don't use the nameMap approach here since that's imprecise when >1 file
    23  	// can be using the same content
    24  	treeShas, err := lsTreeBlobs(ref, filter)
    25  	if err != nil {
    26  		return err
    27  	}
    28  
    29  	pcw, err := catFileBatchTree(treeShas)
    30  	if err != nil {
    31  		return err
    32  	}
    33  
    34  	for p := range pcw.Results {
    35  		cb(p, nil)
    36  	}
    37  
    38  	if err := pcw.Wait(); err != nil {
    39  		cb(nil, err)
    40  	}
    41  	return nil
    42  }
    43  
    44  // catFileBatchTree uses git cat-file --batch to get the object contents
    45  // of a git object, given its sha1. The contents will be decoded into
    46  // a Git LFS pointer. treeblobs is a channel over which blob entries
    47  // will be sent. It returns a channel from which point.Pointers can be read.
    48  func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) {
    49  	scanner, err := NewPointerScanner()
    50  	if err != nil {
    51  		scanner.Close()
    52  
    53  		return nil, err
    54  	}
    55  
    56  	pointers := make(chan *WrappedPointer, chanBufSize)
    57  	errchan := make(chan error, 10) // Multiple errors possible
    58  
    59  	go func() {
    60  		for t := range treeblobs.Results {
    61  			hasNext := scanner.Scan(t.Sha1)
    62  			if p := scanner.Pointer(); p != nil {
    63  				p.Name = t.Filename
    64  				pointers <- p
    65  			}
    66  
    67  			if err := scanner.Err(); err != nil {
    68  				errchan <- err
    69  			}
    70  
    71  			if !hasNext {
    72  				break
    73  			}
    74  		}
    75  
    76  		// Deal with nested error from incoming treeblobs
    77  		err := treeblobs.Wait()
    78  		if err != nil {
    79  			errchan <- err
    80  		}
    81  
    82  		if err = scanner.Close(); err != nil {
    83  			errchan <- err
    84  		}
    85  
    86  		close(pointers)
    87  		close(errchan)
    88  	}()
    89  
    90  	return NewPointerChannelWrapper(pointers, errchan), nil
    91  }
    92  
    93  // Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
    94  // The returned channel will be sent these blobs which should be sent to catFileBatchTree
    95  // for final check & conversion to Pointer
    96  func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWrapper, error) {
    97  	cmd, err := startCommand("git", "ls-tree",
    98  		"-r",          // recurse
    99  		"-l",          // report object size (we'll need this)
   100  		"-z",          // null line termination
   101  		"--full-tree", // start at the root regardless of where we are in it
   102  		ref,
   103  	)
   104  
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  
   109  	cmd.Stdin.Close()
   110  
   111  	blobs := make(chan TreeBlob, chanBufSize)
   112  	errchan := make(chan error, 1)
   113  
   114  	go func() {
   115  		scanner := newLsTreeScanner(cmd.Stdout)
   116  		for scanner.Scan() {
   117  			if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) {
   118  				blobs <- *t
   119  			}
   120  		}
   121  
   122  		stderr, _ := ioutil.ReadAll(cmd.Stderr)
   123  		err := cmd.Wait()
   124  		if err != nil {
   125  			errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr))
   126  		}
   127  		close(blobs)
   128  		close(errchan)
   129  	}()
   130  
   131  	return NewTreeBlobChannelWrapper(blobs, errchan), nil
   132  }
   133  
   134  type lsTreeScanner struct {
   135  	s    *bufio.Scanner
   136  	tree *TreeBlob
   137  }
   138  
   139  func newLsTreeScanner(r io.Reader) *lsTreeScanner {
   140  	s := bufio.NewScanner(r)
   141  	s.Split(scanNullLines)
   142  	return &lsTreeScanner{s: s}
   143  }
   144  
   145  func (s *lsTreeScanner) TreeBlob() *TreeBlob {
   146  	return s.tree
   147  }
   148  
   149  func (s *lsTreeScanner) Err() error {
   150  	return nil
   151  }
   152  
   153  func (s *lsTreeScanner) Scan() bool {
   154  	t, hasNext := s.next()
   155  	s.tree = t
   156  	return hasNext
   157  }
   158  
   159  func (s *lsTreeScanner) next() (*TreeBlob, bool) {
   160  	hasNext := s.s.Scan()
   161  	line := s.s.Text()
   162  	parts := strings.SplitN(line, "\t", 2)
   163  	if len(parts) < 2 {
   164  		return nil, hasNext
   165  	}
   166  
   167  	attrs := strings.SplitN(parts[0], " ", 4)
   168  	if len(attrs) < 4 {
   169  		return nil, hasNext
   170  	}
   171  
   172  	if attrs[1] != "blob" {
   173  		return nil, hasNext
   174  	}
   175  
   176  	sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
   177  	if err != nil {
   178  		return nil, hasNext
   179  	}
   180  
   181  	if sz < blobSizeCutoff {
   182  		sha1 := attrs[2]
   183  		filename := parts[1]
   184  		return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext
   185  	}
   186  	return nil, hasNext
   187  }
   188  
   189  func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
   190  	if atEOF && len(data) == 0 {
   191  		return 0, nil, nil
   192  	}
   193  
   194  	if i := bytes.IndexByte(data, '\000'); i >= 0 {
   195  		// We have a full null-terminated line.
   196  		return i + 1, data[0:i], nil
   197  	}
   198  
   199  	// If we're at EOF, we have a final, non-terminated line. Return it.
   200  	if atEOF {
   201  		return len(data), data, nil
   202  	}
   203  
   204  	// Request more data.
   205  	return 0, nil, nil
   206  }