github.com/psexton/git-lfs@v2.1.1-0.20170517224304-289a18b2bc53+incompatible/lfs/gitscanner_tree.go (about) 1 package lfs 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "strconv" 10 "strings" 11 12 "github.com/git-lfs/git-lfs/filepathfilter" 13 ) 14 15 // An entry from ls-tree or rev-list including a blob sha and tree path 16 type TreeBlob struct { 17 Sha1 string 18 Filename string 19 } 20 21 func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter) error { 22 // We don't use the nameMap approach here since that's imprecise when >1 file 23 // can be using the same content 24 treeShas, err := lsTreeBlobs(ref, filter) 25 if err != nil { 26 return err 27 } 28 29 pcw, err := catFileBatchTree(treeShas) 30 if err != nil { 31 return err 32 } 33 34 for p := range pcw.Results { 35 cb(p, nil) 36 } 37 38 if err := pcw.Wait(); err != nil { 39 cb(nil, err) 40 } 41 return nil 42 } 43 44 // catFileBatchTree uses git cat-file --batch to get the object contents 45 // of a git object, given its sha1. The contents will be decoded into 46 // a Git LFS pointer. treeblobs is a channel over which blob entries 47 // will be sent. It returns a channel from which point.Pointers can be read. 48 func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) { 49 scanner, err := NewPointerScanner() 50 if err != nil { 51 scanner.Close() 52 53 return nil, err 54 } 55 56 pointers := make(chan *WrappedPointer, chanBufSize) 57 errchan := make(chan error, 10) // Multiple errors possible 58 59 go func() { 60 for t := range treeblobs.Results { 61 hasNext := scanner.Scan(t.Sha1) 62 if p := scanner.Pointer(); p != nil { 63 p.Name = t.Filename 64 pointers <- p 65 } 66 67 if err := scanner.Err(); err != nil { 68 errchan <- err 69 } 70 71 if !hasNext { 72 break 73 } 74 } 75 76 // Deal with nested error from incoming treeblobs 77 err := treeblobs.Wait() 78 if err != nil { 79 errchan <- err 80 } 81 82 if err = scanner.Close(); err != nil { 83 errchan <- err 84 } 85 86 close(pointers) 87 close(errchan) 88 }() 89 90 return NewPointerChannelWrapper(pointers, errchan), nil 91 } 92 93 // Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files 94 // The returned channel will be sent these blobs which should be sent to catFileBatchTree 95 // for final check & conversion to Pointer 96 func lsTreeBlobs(ref string, filter *filepathfilter.Filter) (*TreeBlobChannelWrapper, error) { 97 cmd, err := startCommand("git", "ls-tree", 98 "-r", // recurse 99 "-l", // report object size (we'll need this) 100 "-z", // null line termination 101 "--full-tree", // start at the root regardless of where we are in it 102 ref, 103 ) 104 105 if err != nil { 106 return nil, err 107 } 108 109 cmd.Stdin.Close() 110 111 blobs := make(chan TreeBlob, chanBufSize) 112 errchan := make(chan error, 1) 113 114 go func() { 115 scanner := newLsTreeScanner(cmd.Stdout) 116 for scanner.Scan() { 117 if t := scanner.TreeBlob(); t != nil && filter.Allows(t.Filename) { 118 blobs <- *t 119 } 120 } 121 122 stderr, _ := ioutil.ReadAll(cmd.Stderr) 123 err := cmd.Wait() 124 if err != nil { 125 errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr)) 126 } 127 close(blobs) 128 close(errchan) 129 }() 130 131 return NewTreeBlobChannelWrapper(blobs, errchan), nil 132 } 133 134 type lsTreeScanner struct { 135 s *bufio.Scanner 136 tree *TreeBlob 137 } 138 139 func newLsTreeScanner(r io.Reader) *lsTreeScanner { 140 s := bufio.NewScanner(r) 141 s.Split(scanNullLines) 142 return &lsTreeScanner{s: s} 143 } 144 145 func (s *lsTreeScanner) TreeBlob() *TreeBlob { 146 return s.tree 147 } 148 149 func (s *lsTreeScanner) Err() error { 150 return nil 151 } 152 153 func (s *lsTreeScanner) Scan() bool { 154 t, hasNext := s.next() 155 s.tree = t 156 return hasNext 157 } 158 159 func (s *lsTreeScanner) next() (*TreeBlob, bool) { 160 hasNext := s.s.Scan() 161 line := s.s.Text() 162 parts := strings.SplitN(line, "\t", 2) 163 if len(parts) < 2 { 164 return nil, hasNext 165 } 166 167 attrs := strings.SplitN(parts[0], " ", 4) 168 if len(attrs) < 4 { 169 return nil, hasNext 170 } 171 172 if attrs[1] != "blob" { 173 return nil, hasNext 174 } 175 176 sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64) 177 if err != nil { 178 return nil, hasNext 179 } 180 181 if sz < blobSizeCutoff { 182 sha1 := attrs[2] 183 filename := parts[1] 184 return &TreeBlob{Sha1: sha1, Filename: filename}, hasNext 185 } 186 return nil, hasNext 187 } 188 189 func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) { 190 if atEOF && len(data) == 0 { 191 return 0, nil, nil 192 } 193 194 if i := bytes.IndexByte(data, '\000'); i >= 0 { 195 // We have a full null-terminated line. 196 return i + 1, data[0:i], nil 197 } 198 199 // If we're at EOF, we have a final, non-terminated line. Return it. 200 if atEOF { 201 return len(data), data, nil 202 } 203 204 // Request more data. 205 return 0, nil, nil 206 }