github.com/git-lfs/git-lfs@v2.5.2+incompatible/lfs/gitscanner_tree.go (about) 1 package lfs 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "strconv" 10 "strings" 11 12 "github.com/git-lfs/git-lfs/filepathfilter" 13 "github.com/git-lfs/git-lfs/git" 14 ) 15 16 // An entry from ls-tree or rev-list including a blob sha and tree path 17 type TreeBlob struct { 18 Sha1 string 19 Filename string 20 } 21 22 func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter) error { 23 // We don't use the nameMap approach here since that's imprecise when >1 file 24 // can be using the same content 25 treeShas, err := lsTreeBlobs(ref, filter) 26 if err != nil { 27 return err 28 } 29 30 pcw, err := catFileBatchTree(treeShas) 31 if err != nil { 32 return err 33 } 34 35 for p := range pcw.Results { 36 cb(p, nil) 37 } 38 39 if err := pcw.Wait(); err != nil { 40 cb(nil, err) 41 } 42 return nil 43 } 44 45 // catFileBatchTree uses git cat-file --batch to get the object contents 46 // of a git object, given its sha1. The contents will be decoded into 47 // a Git LFS pointer. treeblobs is a channel over which blob entries 48 // will be sent. It returns a channel from which point.Pointers can be read. 49 func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) { 50 scanner, err := NewPointerScanner() 51 if err != nil { 52 scanner.Close() 53 54 return nil, err 55 } 56 57 pointers := make(chan *WrappedPointer, chanBufSize) 58 errchan := make(chan error, 10) // Multiple errors possible 59 60 go func() { 61 for t := range treeblobs.Results { 62 hasNext := scanner.Scan(t.Sha1) 63 if p := scanner.Pointer(); p != nil { 64 p.Name = t.Filename 65 pointers <- p 66 } 67 68 if err := scanner.Err(); err != nil { 69 errchan <- err 70 } 71 72 if !hasNext { 73 break 74 } 75 } 76 77 // Deal with nested error from incoming treeblobs 78 err := treeblobs.Wait() 79 if err != nil { 80 errchan <- err 81 } 82 83 if err = scanner.Close(); err != nil { 84 errchan <- err 85 } 86 87 close(pointers) 88 close(errchan) 89 }() 90 91 return NewPointerChannelWrapper(pointers, errchan), nil 92 } 93 94 // Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files 95 // The returned channel will be sent these blobs which should be sent to catFileBatchTree 96 // for final check & conversion to Pointer 97 func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWrapper, error) { 98 cmd, err := git.LsTree(ref) 99 if err != nil { 100 return nil, err 101 } 102 103 cmd.Stdin.Close() 104 105 blobs := make(chan TreeBlob, chanBufSize) 106 errchan := make(chan error, 1) 107 108 go func() { 109 scanner := newLsTreeScanner(cmd.Stdout) 110 for scanner.Scan() { 111 if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) { 112 blobs <- *t 113 } 114 } 115 116 stderr, _ := ioutil.ReadAll(cmd.Stderr) 117 err := cmd.Wait() 118 if err != nil { 119 errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr)) 120 } 121 close(blobs) 122 close(errchan) 123 }() 124 125 return NewTreeBlobChannelWrapper(blobs, errchan), nil 126 } 127 128 type lsTreeScanner struct { 129 s *bufio.Scanner 130 tree *TreeBlob 131 } 132 133 func newLsTreeScanner(r io.Reader) *lsTreeScanner { 134 s := bufio.NewScanner(r) 135 s.Split(scanNullLines) 136 return &lsTreeScanner{s: s} 137 } 138 139 func (s *lsTreeScanner) TreeBlob() *TreeBlob { 140 return s.tree 141 } 142 143 func (s *lsTreeScanner) Err() error { 144 return nil 145 } 146 147 func (s *lsTreeScanner) Scan() bool { 148 t, hasNext := s.next() 149 s.tree = t 150 return hasNext 151 } 152 153 func (s *lsTreeScanner) next() (*TreeBlob, bool) { 154 hasNext := s.s.Scan() 155 line := s.s.Text() 156 parts := strings.SplitN(line, "\t", 2) 157 if len(parts) < 2 { 158 return nil, hasNext 159 } 160 161 attrs := strings.SplitN(parts[0], " ", 4) 162 if len(attrs) < 4 { 163 return nil, hasNext 164 } 165 166 if attrs[1] != "blob" { 167 return nil, hasNext 168 } 169 170 sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64) 171 if err != nil { 172 return nil, hasNext 173 } 174 175 if sz < blobSizeCutoff { 176 sha1 := attrs[2] 177 filename := parts[1] 178 return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext 179 } 180 return nil, hasNext 181 } 182 183 func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) { 184 if atEOF && len(data) == 0 { 185 return 0, nil, nil 186 } 187 188 if i := bytes.IndexByte(data, '\000'); i >= 0 { 189 // We have a full null-terminated line. 190 return i + 1, data[0:i], nil 191 } 192 193 // If we're at EOF, we have a final, non-terminated line. Return it. 194 if atEOF { 195 return len(data), data, nil 196 } 197 198 // Request more data. 199 return 0, nil, nil 200 }