github.com/git-lfs/git-lfs@v2.5.2+incompatible/lfs/gitscanner_tree.go (about)

     1  package lfs
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/git-lfs/git-lfs/filepathfilter"
    13  	"github.com/git-lfs/git-lfs/git"
    14  )
    15  
    16  // An entry from ls-tree or rev-list including a blob sha and tree path
    17  type TreeBlob struct {
    18  	Sha1     string
    19  	Filename string
    20  }
    21  
    22  func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter) error {
    23  	// We don't use the nameMap approach here since that's imprecise when >1 file
    24  	// can be using the same content
    25  	treeShas, err := lsTreeBlobs(ref, filter)
    26  	if err != nil {
    27  		return err
    28  	}
    29  
    30  	pcw, err := catFileBatchTree(treeShas)
    31  	if err != nil {
    32  		return err
    33  	}
    34  
    35  	for p := range pcw.Results {
    36  		cb(p, nil)
    37  	}
    38  
    39  	if err := pcw.Wait(); err != nil {
    40  		cb(nil, err)
    41  	}
    42  	return nil
    43  }
    44  
    45  // catFileBatchTree uses git cat-file --batch to get the object contents
    46  // of a git object, given its sha1. The contents will be decoded into
    47  // a Git LFS pointer. treeblobs is a channel over which blob entries
    48  // will be sent. It returns a channel from which point.Pointers can be read.
    49  func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) {
    50  	scanner, err := NewPointerScanner()
    51  	if err != nil {
    52  		scanner.Close()
    53  
    54  		return nil, err
    55  	}
    56  
    57  	pointers := make(chan *WrappedPointer, chanBufSize)
    58  	errchan := make(chan error, 10) // Multiple errors possible
    59  
    60  	go func() {
    61  		for t := range treeblobs.Results {
    62  			hasNext := scanner.Scan(t.Sha1)
    63  			if p := scanner.Pointer(); p != nil {
    64  				p.Name = t.Filename
    65  				pointers <- p
    66  			}
    67  
    68  			if err := scanner.Err(); err != nil {
    69  				errchan <- err
    70  			}
    71  
    72  			if !hasNext {
    73  				break
    74  			}
    75  		}
    76  
    77  		// Deal with nested error from incoming treeblobs
    78  		err := treeblobs.Wait()
    79  		if err != nil {
    80  			errchan <- err
    81  		}
    82  
    83  		if err = scanner.Close(); err != nil {
    84  			errchan <- err
    85  		}
    86  
    87  		close(pointers)
    88  		close(errchan)
    89  	}()
    90  
    91  	return NewPointerChannelWrapper(pointers, errchan), nil
    92  }
    93  
    94  // Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
    95  // The returned channel will be sent these blobs which should be sent to catFileBatchTree
    96  // for final check & conversion to Pointer
    97  func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWrapper, error) {
    98  	cmd, err := git.LsTree(ref)
    99  	if err != nil {
   100  		return nil, err
   101  	}
   102  
   103  	cmd.Stdin.Close()
   104  
   105  	blobs := make(chan TreeBlob, chanBufSize)
   106  	errchan := make(chan error, 1)
   107  
   108  	go func() {
   109  		scanner := newLsTreeScanner(cmd.Stdout)
   110  		for scanner.Scan() {
   111  			if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) {
   112  				blobs <- *t
   113  			}
   114  		}
   115  
   116  		stderr, _ := ioutil.ReadAll(cmd.Stderr)
   117  		err := cmd.Wait()
   118  		if err != nil {
   119  			errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr))
   120  		}
   121  		close(blobs)
   122  		close(errchan)
   123  	}()
   124  
   125  	return NewTreeBlobChannelWrapper(blobs, errchan), nil
   126  }
   127  
   128  type lsTreeScanner struct {
   129  	s    *bufio.Scanner
   130  	tree *TreeBlob
   131  }
   132  
   133  func newLsTreeScanner(r io.Reader) *lsTreeScanner {
   134  	s := bufio.NewScanner(r)
   135  	s.Split(scanNullLines)
   136  	return &lsTreeScanner{s: s}
   137  }
   138  
   139  func (s *lsTreeScanner) TreeBlob() *TreeBlob {
   140  	return s.tree
   141  }
   142  
   143  func (s *lsTreeScanner) Err() error {
   144  	return nil
   145  }
   146  
   147  func (s *lsTreeScanner) Scan() bool {
   148  	t, hasNext := s.next()
   149  	s.tree = t
   150  	return hasNext
   151  }
   152  
   153  func (s *lsTreeScanner) next() (*TreeBlob, bool) {
   154  	hasNext := s.s.Scan()
   155  	line := s.s.Text()
   156  	parts := strings.SplitN(line, "\t", 2)
   157  	if len(parts) < 2 {
   158  		return nil, hasNext
   159  	}
   160  
   161  	attrs := strings.SplitN(parts[0], " ", 4)
   162  	if len(attrs) < 4 {
   163  		return nil, hasNext
   164  	}
   165  
   166  	if attrs[1] != "blob" {
   167  		return nil, hasNext
   168  	}
   169  
   170  	sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
   171  	if err != nil {
   172  		return nil, hasNext
   173  	}
   174  
   175  	if sz < blobSizeCutoff {
   176  		sha1 := attrs[2]
   177  		filename := parts[1]
   178  		return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext
   179  	}
   180  	return nil, hasNext
   181  }
   182  
   183  func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
   184  	if atEOF && len(data) == 0 {
   185  		return 0, nil, nil
   186  	}
   187  
   188  	if i := bytes.IndexByte(data, '\000'); i >= 0 {
   189  		// We have a full null-terminated line.
   190  		return i + 1, data[0:i], nil
   191  	}
   192  
   193  	// If we're at EOF, we have a final, non-terminated line. Return it.
   194  	if atEOF {
   195  		return len(data), data, nil
   196  	}
   197  
   198  	// Request more data.
   199  	return 0, nil, nil
   200  }