github.com/2lambda123/git-lfs@v2.5.2+incompatible/git/rev_list_scanner.go (about) 1 package git 2 3 import ( 4 "bufio" 5 "encoding/hex" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "regexp" 10 "strings" 11 "sync" 12 13 "github.com/git-lfs/git-lfs/errors" 14 "github.com/rubyist/tracerx" 15 ) 16 17 // ScanningMode is a constant type that allows for variation in the range of 18 // commits to scan when given to the `*git.RevListScanner` type. 19 type ScanningMode int 20 21 const ( 22 // ScanRefsMode will scan between two refspecs. 23 ScanRefsMode ScanningMode = iota 24 // ScanAllMode will scan all history. 25 ScanAllMode 26 // ScanLeftToRemoteMode will scan the difference between any included 27 // SHA1s and a remote tracking ref. 28 ScanLeftToRemoteMode 29 ) 30 31 // RevListOrder is a constant type that allows for variation in the ordering of 32 // revisions given by the *RevListScanner below. 33 type RevListOrder int 34 35 const ( 36 // DefaultRevListOrder is the zero-value for this type and yields the 37 // results as given by git-rev-list(1) without any `--<t>-order` 38 // argument given. By default: reverse chronological order. 39 DefaultRevListOrder RevListOrder = iota 40 // DateRevListOrder gives the revisions such that no parents are shown 41 // before children, and otherwise in commit timestamp order. 42 DateRevListOrder 43 // AuthorDateRevListOrder gives the revisions such that no parents are 44 // shown before children, and otherwise in author date timestamp order. 45 AuthorDateRevListOrder 46 // TopoRevListOrder gives the revisions such that they appear in 47 // topological order. 48 TopoRevListOrder 49 ) 50 51 // Flag returns the command-line flag to be passed to git-rev-list(1) in order 52 // to order the output according to the given RevListOrder. It returns both the 53 // flag ("--date-order", "--topo-order", etc) and a bool, whether or not to 54 // append the flag (for instance, DefaultRevListOrder requires no flag). 55 // 56 // Given a type other than those defined above, Flag() will panic(). 57 func (o RevListOrder) Flag() (string, bool) { 58 switch o { 59 case DefaultRevListOrder: 60 return "", false 61 case DateRevListOrder: 62 return "--date-order", true 63 case AuthorDateRevListOrder: 64 return "--author-date-order", true 65 case TopoRevListOrder: 66 return "--topo-order", true 67 default: 68 panic(fmt.Sprintf("git/rev_list_scanner: unknown RevListOrder %d", o)) 69 } 70 } 71 72 // ScanRefsOptions is an "options" type that is used to configure a scan 73 // operation on the `*git.RevListScanner` instance when given to the function 74 // `NewRevListScanner()`. 75 type ScanRefsOptions struct { 76 // Mode is the scan mode to apply, see above. 77 Mode ScanningMode 78 // Remote is the current remote to scan against, if using 79 // ScanLeftToRemoveMode. 80 Remote string 81 // SkipDeletedBlobs specifies whether or not to traverse into commit 82 // ancestry (revealing potentially deleted (unreferenced) blobs, trees, 83 // or commits. 84 SkipDeletedBlobs bool 85 // Order specifies the order in which revisions are yielded from the 86 // output of `git-rev-list(1)`. For more information, see the above 87 // documentation on the RevListOrder type. 88 Order RevListOrder 89 // CommitsOnly specifies whether or not the *RevListScanner should 90 // return only commits, or all objects in range by performing a 91 // traversal of the graph. By default, false: show all objects. 92 CommitsOnly bool 93 // WorkingDir specifies the working directory in which to run 94 // git-rev-list(1). If this is an empty string, (has len(WorkingDir) == 95 // 0), it is equivalent to running in os.Getwd(). 96 WorkingDir string 97 // Reverse specifies whether or not to give the revisions in reverse 98 // order. 99 Reverse bool 100 101 // SkippedRefs provides a list of refs to ignore. 102 SkippedRefs []string 103 // Mutex guards names. 104 Mutex *sync.Mutex 105 // Names maps Git object IDs (encoded as hex using 106 // hex.EncodeString()) to their names, i.e., a directory name 107 // (fully-qualified) for trees, or a pathspec for blob tree entries. 108 Names map[string]string 109 } 110 111 // GetName returns the name associated with a given blob/tree sha and "true" if 112 // it exists, or ("", false) if it doesn't. 113 // 114 // GetName is guarded by a use of o.Mutex, and is goroutine safe. 115 func (o *ScanRefsOptions) GetName(sha string) (string, bool) { 116 o.Mutex.Lock() 117 defer o.Mutex.Unlock() 118 119 name, ok := o.Names[sha] 120 return name, ok 121 } 122 123 // SetName sets the name associated with a given blob/tree sha. 124 // 125 // SetName is guarded by a use of o.Mutex, and is therefore goroutine safe. 126 func (o *ScanRefsOptions) SetName(sha, name string) { 127 o.Mutex.Lock() 128 defer o.Mutex.Unlock() 129 130 o.Names[sha] = name 131 } 132 133 // RevListScanner is a Scanner type that parses through results of the `git 134 // rev-list` command. 135 type RevListScanner struct { 136 // s is a buffered scanner feeding from the output (stdout) of 137 // git-rev-list(1) invocation. 138 s *bufio.Scanner 139 // closeFn is an optional type returning an error yielded by closing any 140 // resources held by an open (running) instance of the *RevListScanner 141 // type. 142 closeFn func() error 143 144 // name is the name of the most recently read object. 145 name string 146 // oid is the oid of the most recently read object. 147 oid []byte 148 // err is the most recently encountered error. 149 err error 150 } 151 152 var ( 153 // ambiguousRegex is a regular expression matching the output of stderr 154 // when ambiguous refnames are encountered. 155 ambiguousRegex = regexp.MustCompile(`warning: refname (.*) is ambiguous`) 156 157 // z40 is a regular expression matching the empty blob/commit/tree 158 // SHA: "0000000000000000000000000000000000000000". 159 z40 = regexp.MustCompile(`\^?0{40}`) 160 ) 161 162 // NewRevListScanner instantiates a new RevListScanner instance scanning all 163 // revisions reachable by refs contained in "include" and not reachable by any 164 // refs included in "excluded", using the *ScanRefsOptions "opt" configuration. 165 // 166 // It returns a new *RevListScanner instance, or an error if one was 167 // encountered. Upon returning, the `git-rev-list(1)` instance is already 168 // running, and Scan() may be called immediately. 169 func NewRevListScanner(include, excluded []string, opt *ScanRefsOptions) (*RevListScanner, error) { 170 stdin, args, err := revListArgs(include, excluded, opt) 171 if err != nil { 172 return nil, err 173 } 174 175 cmd := gitNoLFS(args...).Cmd 176 if len(opt.WorkingDir) > 0 { 177 cmd.Dir = opt.WorkingDir 178 } 179 180 cmd.Stdin = stdin 181 stdout, err := cmd.StdoutPipe() 182 if err != nil { 183 return nil, err 184 } 185 stderr, err := cmd.StderrPipe() 186 if err != nil { 187 return nil, err 188 } 189 190 tracerx.Printf("run_command: git %s", strings.Join(args, " ")) 191 if err := cmd.Start(); err != nil { 192 return nil, err 193 } 194 195 return &RevListScanner{ 196 s: bufio.NewScanner(stdout), 197 closeFn: func() error { 198 msg, _ := ioutil.ReadAll(stderr) 199 200 // First check if there was a non-zero exit code given 201 // when Wait()-ing on the command execution. 202 if err := cmd.Wait(); err != nil { 203 return errors.Errorf("Error in git %s: %v %s", 204 strings.Join(args, " "), err, msg) 205 } 206 207 // If the command exited cleanly, but found an ambiguous 208 // refname, promote that to an error and return it. 209 // 210 // `git-rev-list(1)` does not treat ambiguous refnames 211 // as fatal (non-zero exit status), but we do. 212 if am := ambiguousRegex.FindSubmatch(msg); len(am) > 1 { 213 return errors.Errorf("ref %s is ambiguous", am[1]) 214 } 215 return nil 216 }, 217 }, nil 218 } 219 220 // revListArgs returns the arguments for a given included and excluded set of 221 // SHA1s, and ScanRefsOptions instance. 222 // 223 // In order, it returns the contents of stdin as an io.Reader, the args passed 224 // to git as a []string, and any error encountered in generating those if one 225 // occurred. 226 func revListArgs(include, exclude []string, opt *ScanRefsOptions) (io.Reader, []string, error) { 227 var stdin io.Reader 228 args := []string{"rev-list", "--stdin"} 229 if !opt.CommitsOnly { 230 args = append(args, "--objects") 231 } 232 233 if opt.Reverse { 234 args = append(args, "--reverse") 235 } 236 237 if orderFlag, ok := opt.Order.Flag(); ok { 238 args = append(args, orderFlag) 239 } 240 241 switch opt.Mode { 242 case ScanRefsMode: 243 if opt.SkipDeletedBlobs { 244 args = append(args, "--no-walk") 245 } else { 246 args = append(args, "--do-walk") 247 } 248 249 stdin = strings.NewReader(strings.Join( 250 includeExcludeShas(include, exclude), "\n")) 251 case ScanAllMode: 252 args = append(args, "--all") 253 case ScanLeftToRemoteMode: 254 if len(opt.SkippedRefs) == 0 { 255 args = append(args, "--not", "--remotes="+opt.Remote) 256 stdin = strings.NewReader(strings.Join( 257 includeExcludeShas(include, exclude), "\n")) 258 } else { 259 stdin = strings.NewReader(strings.Join( 260 append(includeExcludeShas(include, exclude), opt.SkippedRefs...), "\n"), 261 ) 262 } 263 default: 264 return nil, nil, errors.Errorf("unknown scan type: %d", opt.Mode) 265 } 266 return stdin, append(args, "--"), nil 267 } 268 269 func includeExcludeShas(include, exclude []string) []string { 270 include = nonZeroShas(include) 271 exclude = nonZeroShas(exclude) 272 273 args := make([]string, 0, len(include)+len(exclude)) 274 275 for _, i := range include { 276 args = append(args, i) 277 } 278 279 for _, x := range exclude { 280 args = append(args, fmt.Sprintf("^%s", x)) 281 } 282 283 return args 284 } 285 286 func nonZeroShas(all []string) []string { 287 nz := make([]string, 0, len(all)) 288 289 for _, sha := range all { 290 if len(sha) > 0 && !z40.MatchString(sha) { 291 nz = append(nz, sha) 292 } 293 } 294 return nz 295 } 296 297 // Name is an optional field that gives the name of the object (if the object is 298 // a tree, blob). 299 // 300 // It can be called before or after Scan(), but will return "" if called 301 // before. 302 func (s *RevListScanner) Name() string { return s.name } 303 304 // OID is the hex-decoded bytes of the object's ID. 305 // 306 // It can be called before or after Scan(), but will return "" if called 307 // before. 308 func (s *RevListScanner) OID() []byte { return s.oid } 309 310 // Err returns the last encountered error (or nil) after a call to Scan(). 311 // 312 // It SHOULD be called, checked and handled after a call to Scan(). 313 func (s *RevListScanner) Err() error { return s.err } 314 315 // Scan scans the next entry given by git-rev-list(1), and returns true/false 316 // indicating if there are more results to scan. 317 func (s *RevListScanner) Scan() bool { 318 var err error 319 s.oid, s.name, err = s.scan() 320 321 if err != nil { 322 if err != io.EOF { 323 s.err = err 324 } 325 return false 326 } 327 return len(s.oid) > 0 328 } 329 330 // Close closes the RevListScanner by freeing any resources held by the 331 // instance while running, and returns any error encountered while doing so. 332 func (s *RevListScanner) Close() error { 333 if s.closeFn == nil { 334 return nil 335 } 336 return s.closeFn() 337 } 338 339 // scan provides the internal implementation of scanning a line of text from the 340 // output of `git-rev-list(1)`. 341 func (s *RevListScanner) scan() ([]byte, string, error) { 342 if !s.s.Scan() { 343 return nil, "", s.s.Err() 344 } 345 346 line := strings.TrimSpace(s.s.Text()) 347 if len(line) < 40 { 348 return nil, "", nil 349 } 350 351 sha1, err := hex.DecodeString(line[:40]) 352 if err != nil { 353 return nil, "", err 354 } 355 356 var name string 357 if len(line) > 40 { 358 name = line[41:] 359 } 360 361 return sha1, name, nil 362 }