github.com/2lambda123/git-lfs@v2.5.2+incompatible/git/rev_list_scanner.go (about)

     1  package git
     2  
     3  import (
     4  	"bufio"
     5  	"encoding/hex"
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"regexp"
    10  	"strings"
    11  	"sync"
    12  
    13  	"github.com/git-lfs/git-lfs/errors"
    14  	"github.com/rubyist/tracerx"
    15  )
    16  
    17  // ScanningMode is a constant type that allows for variation in the range of
    18  // commits to scan when given to the `*git.RevListScanner` type.
    19  type ScanningMode int
    20  
    21  const (
    22  	// ScanRefsMode will scan between two refspecs.
    23  	ScanRefsMode ScanningMode = iota
    24  	// ScanAllMode will scan all history.
    25  	ScanAllMode
    26  	// ScanLeftToRemoteMode will scan the difference between any included
    27  	// SHA1s and a remote tracking ref.
    28  	ScanLeftToRemoteMode
    29  )
    30  
    31  // RevListOrder is a constant type that allows for variation in the ordering of
    32  // revisions given by the *RevListScanner below.
    33  type RevListOrder int
    34  
    35  const (
    36  	// DefaultRevListOrder is the zero-value for this type and yields the
    37  	// results as given by git-rev-list(1) without any `--<t>-order`
    38  	// argument given. By default: reverse chronological order.
    39  	DefaultRevListOrder RevListOrder = iota
    40  	// DateRevListOrder gives the revisions such that no parents are shown
    41  	// before children, and otherwise in commit timestamp order.
    42  	DateRevListOrder
    43  	// AuthorDateRevListOrder gives the revisions such that no parents are
    44  	// shown before children, and otherwise in author date timestamp order.
    45  	AuthorDateRevListOrder
    46  	// TopoRevListOrder gives the revisions such that they appear in
    47  	// topological order.
    48  	TopoRevListOrder
    49  )
    50  
    51  // Flag returns the command-line flag to be passed to git-rev-list(1) in order
    52  // to order the output according to the given RevListOrder. It returns both the
    53  // flag ("--date-order", "--topo-order", etc) and a bool, whether or not to
    54  // append the flag (for instance, DefaultRevListOrder requires no flag).
    55  //
    56  // Given a type other than those defined above, Flag() will panic().
    57  func (o RevListOrder) Flag() (string, bool) {
    58  	switch o {
    59  	case DefaultRevListOrder:
    60  		return "", false
    61  	case DateRevListOrder:
    62  		return "--date-order", true
    63  	case AuthorDateRevListOrder:
    64  		return "--author-date-order", true
    65  	case TopoRevListOrder:
    66  		return "--topo-order", true
    67  	default:
    68  		panic(fmt.Sprintf("git/rev_list_scanner: unknown RevListOrder %d", o))
    69  	}
    70  }
    71  
    72  // ScanRefsOptions is an "options" type that is used to configure a scan
    73  // operation on the `*git.RevListScanner` instance when given to the function
    74  // `NewRevListScanner()`.
    75  type ScanRefsOptions struct {
    76  	// Mode is the scan mode to apply, see above.
    77  	Mode ScanningMode
    78  	// Remote is the current remote to scan against, if using
    79  	// ScanLeftToRemoveMode.
    80  	Remote string
    81  	// SkipDeletedBlobs specifies whether or not to traverse into commit
    82  	// ancestry (revealing potentially deleted (unreferenced) blobs, trees,
    83  	// or commits.
    84  	SkipDeletedBlobs bool
    85  	// Order specifies the order in which revisions are yielded from the
    86  	// output of `git-rev-list(1)`. For more information, see the above
    87  	// documentation on the RevListOrder type.
    88  	Order RevListOrder
    89  	// CommitsOnly specifies whether or not the *RevListScanner should
    90  	// return only commits, or all objects in range by performing a
    91  	// traversal of the graph. By default, false: show all objects.
    92  	CommitsOnly bool
    93  	// WorkingDir specifies the working directory in which to run
    94  	// git-rev-list(1). If this is an empty string, (has len(WorkingDir) ==
    95  	// 0), it is equivalent to running in os.Getwd().
    96  	WorkingDir string
    97  	// Reverse specifies whether or not to give the revisions in reverse
    98  	// order.
    99  	Reverse bool
   100  
   101  	// SkippedRefs provides a list of refs to ignore.
   102  	SkippedRefs []string
   103  	// Mutex guards names.
   104  	Mutex *sync.Mutex
   105  	// Names maps Git object IDs (encoded as hex using
   106  	// hex.EncodeString()) to their names, i.e., a directory name
   107  	// (fully-qualified) for trees, or a pathspec for blob tree entries.
   108  	Names map[string]string
   109  }
   110  
   111  // GetName returns the name associated with a given blob/tree sha and "true" if
   112  // it exists, or ("", false) if it doesn't.
   113  //
   114  // GetName is guarded by a use of o.Mutex, and is goroutine safe.
   115  func (o *ScanRefsOptions) GetName(sha string) (string, bool) {
   116  	o.Mutex.Lock()
   117  	defer o.Mutex.Unlock()
   118  
   119  	name, ok := o.Names[sha]
   120  	return name, ok
   121  }
   122  
   123  // SetName sets the name associated with a given blob/tree sha.
   124  //
   125  // SetName is guarded by a use of o.Mutex, and is therefore goroutine safe.
   126  func (o *ScanRefsOptions) SetName(sha, name string) {
   127  	o.Mutex.Lock()
   128  	defer o.Mutex.Unlock()
   129  
   130  	o.Names[sha] = name
   131  }
   132  
   133  // RevListScanner is a Scanner type that parses through results of the `git
   134  // rev-list` command.
   135  type RevListScanner struct {
   136  	// s is a buffered scanner feeding from the output (stdout) of
   137  	// git-rev-list(1) invocation.
   138  	s *bufio.Scanner
   139  	// closeFn is an optional type returning an error yielded by closing any
   140  	// resources held by an open (running) instance of the *RevListScanner
   141  	// type.
   142  	closeFn func() error
   143  
   144  	// name is the name of the most recently read object.
   145  	name string
   146  	// oid is the oid of the most recently read object.
   147  	oid []byte
   148  	// err is the most recently encountered error.
   149  	err error
   150  }
   151  
   152  var (
   153  	// ambiguousRegex is a regular expression matching the output of stderr
   154  	// when ambiguous refnames are encountered.
   155  	ambiguousRegex = regexp.MustCompile(`warning: refname (.*) is ambiguous`)
   156  
   157  	// z40 is a regular expression matching the empty blob/commit/tree
   158  	// SHA: "0000000000000000000000000000000000000000".
   159  	z40 = regexp.MustCompile(`\^?0{40}`)
   160  )
   161  
   162  // NewRevListScanner instantiates a new RevListScanner instance scanning all
   163  // revisions reachable by refs contained in "include" and not reachable by any
   164  // refs included in "excluded", using the *ScanRefsOptions "opt" configuration.
   165  //
   166  // It returns a new *RevListScanner instance, or an error if one was
   167  // encountered. Upon returning, the `git-rev-list(1)` instance is already
   168  // running, and Scan() may be called immediately.
   169  func NewRevListScanner(include, excluded []string, opt *ScanRefsOptions) (*RevListScanner, error) {
   170  	stdin, args, err := revListArgs(include, excluded, opt)
   171  	if err != nil {
   172  		return nil, err
   173  	}
   174  
   175  	cmd := gitNoLFS(args...).Cmd
   176  	if len(opt.WorkingDir) > 0 {
   177  		cmd.Dir = opt.WorkingDir
   178  	}
   179  
   180  	cmd.Stdin = stdin
   181  	stdout, err := cmd.StdoutPipe()
   182  	if err != nil {
   183  		return nil, err
   184  	}
   185  	stderr, err := cmd.StderrPipe()
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  
   190  	tracerx.Printf("run_command: git %s", strings.Join(args, " "))
   191  	if err := cmd.Start(); err != nil {
   192  		return nil, err
   193  	}
   194  
   195  	return &RevListScanner{
   196  		s: bufio.NewScanner(stdout),
   197  		closeFn: func() error {
   198  			msg, _ := ioutil.ReadAll(stderr)
   199  
   200  			// First check if there was a non-zero exit code given
   201  			// when Wait()-ing on the command execution.
   202  			if err := cmd.Wait(); err != nil {
   203  				return errors.Errorf("Error in git %s: %v %s",
   204  					strings.Join(args, " "), err, msg)
   205  			}
   206  
   207  			// If the command exited cleanly, but found an ambiguous
   208  			// refname, promote that to an error and return it.
   209  			//
   210  			// `git-rev-list(1)` does not treat ambiguous refnames
   211  			// as fatal (non-zero exit status), but we do.
   212  			if am := ambiguousRegex.FindSubmatch(msg); len(am) > 1 {
   213  				return errors.Errorf("ref %s is ambiguous", am[1])
   214  			}
   215  			return nil
   216  		},
   217  	}, nil
   218  }
   219  
   220  // revListArgs returns the arguments for a given included and excluded set of
   221  // SHA1s, and ScanRefsOptions instance.
   222  //
   223  // In order, it returns the contents of stdin as an io.Reader, the args passed
   224  // to git as a []string, and any error encountered in generating those if one
   225  // occurred.
   226  func revListArgs(include, exclude []string, opt *ScanRefsOptions) (io.Reader, []string, error) {
   227  	var stdin io.Reader
   228  	args := []string{"rev-list", "--stdin"}
   229  	if !opt.CommitsOnly {
   230  		args = append(args, "--objects")
   231  	}
   232  
   233  	if opt.Reverse {
   234  		args = append(args, "--reverse")
   235  	}
   236  
   237  	if orderFlag, ok := opt.Order.Flag(); ok {
   238  		args = append(args, orderFlag)
   239  	}
   240  
   241  	switch opt.Mode {
   242  	case ScanRefsMode:
   243  		if opt.SkipDeletedBlobs {
   244  			args = append(args, "--no-walk")
   245  		} else {
   246  			args = append(args, "--do-walk")
   247  		}
   248  
   249  		stdin = strings.NewReader(strings.Join(
   250  			includeExcludeShas(include, exclude), "\n"))
   251  	case ScanAllMode:
   252  		args = append(args, "--all")
   253  	case ScanLeftToRemoteMode:
   254  		if len(opt.SkippedRefs) == 0 {
   255  			args = append(args, "--not", "--remotes="+opt.Remote)
   256  			stdin = strings.NewReader(strings.Join(
   257  				includeExcludeShas(include, exclude), "\n"))
   258  		} else {
   259  			stdin = strings.NewReader(strings.Join(
   260  				append(includeExcludeShas(include, exclude), opt.SkippedRefs...), "\n"),
   261  			)
   262  		}
   263  	default:
   264  		return nil, nil, errors.Errorf("unknown scan type: %d", opt.Mode)
   265  	}
   266  	return stdin, append(args, "--"), nil
   267  }
   268  
   269  func includeExcludeShas(include, exclude []string) []string {
   270  	include = nonZeroShas(include)
   271  	exclude = nonZeroShas(exclude)
   272  
   273  	args := make([]string, 0, len(include)+len(exclude))
   274  
   275  	for _, i := range include {
   276  		args = append(args, i)
   277  	}
   278  
   279  	for _, x := range exclude {
   280  		args = append(args, fmt.Sprintf("^%s", x))
   281  	}
   282  
   283  	return args
   284  }
   285  
   286  func nonZeroShas(all []string) []string {
   287  	nz := make([]string, 0, len(all))
   288  
   289  	for _, sha := range all {
   290  		if len(sha) > 0 && !z40.MatchString(sha) {
   291  			nz = append(nz, sha)
   292  		}
   293  	}
   294  	return nz
   295  }
   296  
   297  // Name is an optional field that gives the name of the object (if the object is
   298  // a tree, blob).
   299  //
   300  // It can be called before or after Scan(), but will return "" if called
   301  // before.
   302  func (s *RevListScanner) Name() string { return s.name }
   303  
   304  // OID is the hex-decoded bytes of the object's ID.
   305  //
   306  // It can be called before or after Scan(), but will return "" if called
   307  // before.
   308  func (s *RevListScanner) OID() []byte { return s.oid }
   309  
   310  // Err returns the last encountered error (or nil) after a call to Scan().
   311  //
   312  // It SHOULD be called, checked and handled after a call to Scan().
   313  func (s *RevListScanner) Err() error { return s.err }
   314  
   315  // Scan scans the next entry given by git-rev-list(1), and returns true/false
   316  // indicating if there are more results to scan.
   317  func (s *RevListScanner) Scan() bool {
   318  	var err error
   319  	s.oid, s.name, err = s.scan()
   320  
   321  	if err != nil {
   322  		if err != io.EOF {
   323  			s.err = err
   324  		}
   325  		return false
   326  	}
   327  	return len(s.oid) > 0
   328  }
   329  
   330  // Close closes the RevListScanner by freeing any resources held by the
   331  // instance while running, and returns any error encountered while doing so.
   332  func (s *RevListScanner) Close() error {
   333  	if s.closeFn == nil {
   334  		return nil
   335  	}
   336  	return s.closeFn()
   337  }
   338  
   339  // scan provides the internal implementation of scanning a line of text from the
   340  // output of `git-rev-list(1)`.
   341  func (s *RevListScanner) scan() ([]byte, string, error) {
   342  	if !s.s.Scan() {
   343  		return nil, "", s.s.Err()
   344  	}
   345  
   346  	line := strings.TrimSpace(s.s.Text())
   347  	if len(line) < 40 {
   348  		return nil, "", nil
   349  	}
   350  
   351  	sha1, err := hex.DecodeString(line[:40])
   352  	if err != nil {
   353  		return nil, "", err
   354  	}
   355  
   356  	var name string
   357  	if len(line) > 40 {
   358  		name = line[41:]
   359  	}
   360  
   361  	return sha1, name, nil
   362  }