github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/go/not-internal/modfetch/codehost/git.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package codehost
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"net/url"
    14  	"os"
    15  	"os/exec"
    16  	"path/filepath"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/gagliardetto/golang-go/cmd/go/not-internal/lockedfile"
    24  	"github.com/gagliardetto/golang-go/cmd/go/not-internal/par"
    25  	"github.com/gagliardetto/golang-go/cmd/go/not-internal/web"
    26  
    27  	"golang.org/x/mod/semver"
    28  )
    29  
    30  // GitRepo returns the code repository at the given Git remote reference.
    31  func GitRepo(remote string) (Repo, error) {
    32  	return newGitRepoCached(remote, false)
    33  }
    34  
    35  // LocalGitRepo is like Repo but accepts both Git remote references
    36  // and paths to repositories on the local file system.
    37  func LocalGitRepo(remote string) (Repo, error) {
    38  	return newGitRepoCached(remote, true)
    39  }
    40  
    41  // A notExistError wraps another error to retain its original text
    42  // but makes it opaquely equivalent to os.ErrNotExist.
    43  type notExistError struct {
    44  	err error
    45  }
    46  
    47  func (e notExistError) Error() string   { return e.err.Error() }
    48  func (notExistError) Is(err error) bool { return err == os.ErrNotExist }
    49  
    50  const gitWorkDirType = "git3"
    51  
    52  var gitRepoCache par.Cache
    53  
    54  func newGitRepoCached(remote string, localOK bool) (Repo, error) {
    55  	type key struct {
    56  		remote  string
    57  		localOK bool
    58  	}
    59  	type cached struct {
    60  		repo Repo
    61  		err  error
    62  	}
    63  
    64  	c := gitRepoCache.Do(key{remote, localOK}, func() interface{} {
    65  		repo, err := newGitRepo(remote, localOK)
    66  		return cached{repo, err}
    67  	}).(cached)
    68  
    69  	return c.repo, c.err
    70  }
    71  
    72  func newGitRepo(remote string, localOK bool) (Repo, error) {
    73  	r := &gitRepo{remote: remote}
    74  	if strings.Contains(remote, "://") {
    75  		// This is a remote path.
    76  		var err error
    77  		r.dir, r.mu.Path, err = WorkDir(gitWorkDirType, r.remote)
    78  		if err != nil {
    79  			return nil, err
    80  		}
    81  
    82  		unlock, err := r.mu.Lock()
    83  		if err != nil {
    84  			return nil, err
    85  		}
    86  		defer unlock()
    87  
    88  		if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
    89  			if _, err := Run(r.dir, "git", "init", "--bare"); err != nil {
    90  				os.RemoveAll(r.dir)
    91  				return nil, err
    92  			}
    93  			// We could just say git fetch https://whatever later,
    94  			// but this lets us say git fetch origin instead, which
    95  			// is a little nicer. More importantly, using a named remote
    96  			// avoids a problem with Git LFS. See golang.org/issue/25605.
    97  			if _, err := Run(r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil {
    98  				os.RemoveAll(r.dir)
    99  				return nil, err
   100  			}
   101  		}
   102  		r.remoteURL = r.remote
   103  		r.remote = "origin"
   104  	} else {
   105  		// Local path.
   106  		// Disallow colon (not in ://) because sometimes
   107  		// that's rcp-style host:path syntax and sometimes it's not (c:\work).
   108  		// The go command has always insisted on URL syntax for ssh.
   109  		if strings.Contains(remote, ":") {
   110  			return nil, fmt.Errorf("git remote cannot use host:path syntax")
   111  		}
   112  		if !localOK {
   113  			return nil, fmt.Errorf("git remote must not be local directory")
   114  		}
   115  		r.local = true
   116  		info, err := os.Stat(remote)
   117  		if err != nil {
   118  			return nil, err
   119  		}
   120  		if !info.IsDir() {
   121  			return nil, fmt.Errorf("%s exists but is not a directory", remote)
   122  		}
   123  		r.dir = remote
   124  		r.mu.Path = r.dir + ".lock"
   125  	}
   126  	return r, nil
   127  }
   128  
   129  type gitRepo struct {
   130  	remote, remoteURL string
   131  	local             bool
   132  	dir               string
   133  
   134  	mu lockedfile.Mutex // protects fetchLevel and git repo state
   135  
   136  	fetchLevel int
   137  
   138  	statCache par.Cache
   139  
   140  	refsOnce sync.Once
   141  	// refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master")
   142  	// to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6")
   143  	refs    map[string]string
   144  	refsErr error
   145  
   146  	localTagsOnce sync.Once
   147  	localTags     map[string]bool
   148  }
   149  
   150  const (
   151  	// How much have we fetched into the git repo (in this process)?
   152  	fetchNone = iota // nothing yet
   153  	fetchSome        // shallow fetches of individual hashes
   154  	fetchAll         // "fetch -t origin": get all remote branches and tags
   155  )
   156  
   157  // loadLocalTags loads tag references from the local git cache
   158  // into the map r.localTags.
   159  // Should only be called as r.localTagsOnce.Do(r.loadLocalTags).
   160  func (r *gitRepo) loadLocalTags() {
   161  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   162  	// so we might as well record both heads and tags in one shot.
   163  	// Most of the time we only care about tags but sometimes we care about heads too.
   164  	out, err := Run(r.dir, "git", "tag", "-l")
   165  	if err != nil {
   166  		return
   167  	}
   168  
   169  	r.localTags = make(map[string]bool)
   170  	for _, line := range strings.Split(string(out), "\n") {
   171  		if line != "" {
   172  			r.localTags[line] = true
   173  		}
   174  	}
   175  }
   176  
   177  // loadRefs loads heads and tags references from the remote into the map r.refs.
   178  // Should only be called as r.refsOnce.Do(r.loadRefs).
   179  func (r *gitRepo) loadRefs() {
   180  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   181  	// so we might as well record both heads and tags in one shot.
   182  	// Most of the time we only care about tags but sometimes we care about heads too.
   183  	out, gitErr := Run(r.dir, "git", "ls-remote", "-q", r.remote)
   184  	if gitErr != nil {
   185  		if rerr, ok := gitErr.(*RunError); ok {
   186  			if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
   187  				rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
   188  			}
   189  		}
   190  
   191  		// If the remote URL doesn't exist at all, ideally we should treat the whole
   192  		// repository as nonexistent by wrapping the error in a notExistError.
   193  		// For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL
   194  		// ourselves and see what code it serves.
   195  		if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") {
   196  			if _, err := web.GetBytes(u); errors.Is(err, os.ErrNotExist) {
   197  				gitErr = notExistError{gitErr}
   198  			}
   199  		}
   200  
   201  		r.refsErr = gitErr
   202  		return
   203  	}
   204  
   205  	r.refs = make(map[string]string)
   206  	for _, line := range strings.Split(string(out), "\n") {
   207  		f := strings.Fields(line)
   208  		if len(f) != 2 {
   209  			continue
   210  		}
   211  		if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
   212  			r.refs[f[1]] = f[0]
   213  		}
   214  	}
   215  	for ref, hash := range r.refs {
   216  		if strings.HasSuffix(ref, "^{}") { // record unwrapped annotated tag as value of tag
   217  			r.refs[strings.TrimSuffix(ref, "^{}")] = hash
   218  			delete(r.refs, ref)
   219  		}
   220  	}
   221  }
   222  
   223  func (r *gitRepo) Tags(prefix string) ([]string, error) {
   224  	r.refsOnce.Do(r.loadRefs)
   225  	if r.refsErr != nil {
   226  		return nil, r.refsErr
   227  	}
   228  
   229  	tags := []string{}
   230  	for ref := range r.refs {
   231  		if !strings.HasPrefix(ref, "refs/tags/") {
   232  			continue
   233  		}
   234  		tag := ref[len("refs/tags/"):]
   235  		if !strings.HasPrefix(tag, prefix) {
   236  			continue
   237  		}
   238  		tags = append(tags, tag)
   239  	}
   240  	sort.Strings(tags)
   241  	return tags, nil
   242  }
   243  
   244  func (r *gitRepo) Latest() (*RevInfo, error) {
   245  	r.refsOnce.Do(r.loadRefs)
   246  	if r.refsErr != nil {
   247  		return nil, r.refsErr
   248  	}
   249  	if r.refs["HEAD"] == "" {
   250  		return nil, ErrNoCommits
   251  	}
   252  	return r.Stat(r.refs["HEAD"])
   253  }
   254  
   255  // findRef finds some ref name for the given hash,
   256  // for use when the server requires giving a ref instead of a hash.
   257  // There may be multiple ref names for a given hash,
   258  // in which case this returns some name - it doesn't matter which.
   259  func (r *gitRepo) findRef(hash string) (ref string, ok bool) {
   260  	r.refsOnce.Do(r.loadRefs)
   261  	for ref, h := range r.refs {
   262  		if h == hash {
   263  			return ref, true
   264  		}
   265  	}
   266  	return "", false
   267  }
   268  
   269  // minHashDigits is the minimum number of digits to require
   270  // before accepting a hex digit sequence as potentially identifying
   271  // a specific commit in a git repo. (Of course, users can always
   272  // specify more digits, and many will paste in all 40 digits,
   273  // but many of git's commands default to printing short hashes
   274  // as 7 digits.)
   275  const minHashDigits = 7
   276  
   277  // stat stats the given rev in the local repository,
   278  // or else it fetches more info from the remote repository and tries again.
   279  func (r *gitRepo) stat(rev string) (*RevInfo, error) {
   280  	if r.local {
   281  		return r.statLocal(rev, rev)
   282  	}
   283  
   284  	// Fast path: maybe rev is a hash we already have locally.
   285  	didStatLocal := false
   286  	if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   287  		if info, err := r.statLocal(rev, rev); err == nil {
   288  			return info, nil
   289  		}
   290  		didStatLocal = true
   291  	}
   292  
   293  	// Maybe rev is a tag we already have locally.
   294  	// (Note that we're excluding branches, which can be stale.)
   295  	r.localTagsOnce.Do(r.loadLocalTags)
   296  	if r.localTags[rev] {
   297  		return r.statLocal(rev, "refs/tags/"+rev)
   298  	}
   299  
   300  	// Maybe rev is the name of a tag or branch on the remote server.
   301  	// Or maybe it's the prefix of a hash of a named ref.
   302  	// Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
   303  	r.refsOnce.Do(r.loadRefs)
   304  	var ref, hash string
   305  	if r.refs["refs/tags/"+rev] != "" {
   306  		ref = "refs/tags/" + rev
   307  		hash = r.refs[ref]
   308  		// Keep rev as is: tags are assumed not to change meaning.
   309  	} else if r.refs["refs/heads/"+rev] != "" {
   310  		ref = "refs/heads/" + rev
   311  		hash = r.refs[ref]
   312  		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
   313  	} else if rev == "HEAD" && r.refs["HEAD"] != "" {
   314  		ref = "HEAD"
   315  		hash = r.refs[ref]
   316  		rev = hash // Replace rev, because meaning of HEAD can change.
   317  	} else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   318  		// At the least, we have a hash prefix we can look up after the fetch below.
   319  		// Maybe we can map it to a full hash using the known refs.
   320  		prefix := rev
   321  		// Check whether rev is prefix of known ref hash.
   322  		for k, h := range r.refs {
   323  			if strings.HasPrefix(h, prefix) {
   324  				if hash != "" && hash != h {
   325  					// Hash is an ambiguous hash prefix.
   326  					// More information will not change that.
   327  					return nil, fmt.Errorf("ambiguous revision %s", rev)
   328  				}
   329  				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
   330  					ref = k
   331  				}
   332  				rev = h
   333  				hash = h
   334  			}
   335  		}
   336  		if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
   337  			hash = rev
   338  		}
   339  	} else {
   340  		return nil, &UnknownRevisionError{Rev: rev}
   341  	}
   342  
   343  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   344  	unlock, err := r.mu.Lock()
   345  	if err != nil {
   346  		return nil, err
   347  	}
   348  	defer unlock()
   349  
   350  	// Perhaps r.localTags did not have the ref when we loaded local tags,
   351  	// but we've since done fetches that pulled down the hash we need
   352  	// (or already have the hash we need, just without its tag).
   353  	// Either way, try a local stat before falling back to network I/O.
   354  	if !didStatLocal {
   355  		if info, err := r.statLocal(rev, hash); err == nil {
   356  			if strings.HasPrefix(ref, "refs/tags/") {
   357  				// Make sure tag exists, so it will be in localTags next time the go command is run.
   358  				Run(r.dir, "git", "tag", strings.TrimPrefix(ref, "refs/tags/"), hash)
   359  			}
   360  			return info, nil
   361  		}
   362  	}
   363  
   364  	// If we know a specific commit we need and its ref, fetch it.
   365  	// We do NOT fetch arbitrary hashes (when we don't know the ref)
   366  	// because we want to avoid ever importing a commit that isn't
   367  	// reachable from refs/tags/* or refs/heads/* or HEAD.
   368  	// Both Gerrit and GitHub expose every CL/PR as a named ref,
   369  	// and we don't want those commits masquerading as being real
   370  	// pseudo-versions in the main repo.
   371  	if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local {
   372  		r.fetchLevel = fetchSome
   373  		var refspec string
   374  		if ref != "" && ref != "HEAD" {
   375  			// If we do know the ref name, save the mapping locally
   376  			// so that (if it is a tag) it can show up in localTags
   377  			// on a future call. Also, some servers refuse to allow
   378  			// full hashes in ref specs, so prefer a ref name if known.
   379  			refspec = ref + ":" + ref
   380  		} else {
   381  			// Fetch the hash but give it a local name (refs/dummy),
   382  			// because that triggers the fetch behavior of creating any
   383  			// other known remote tags for the hash. We never use
   384  			// refs/dummy (it's not refs/tags/dummy) and it will be
   385  			// overwritten in the next command, and that's fine.
   386  			ref = hash
   387  			refspec = hash + ":refs/dummy"
   388  		}
   389  		_, err := Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec)
   390  		if err == nil {
   391  			return r.statLocal(rev, ref)
   392  		}
   393  		// Don't try to be smart about parsing the error.
   394  		// It's too complex and varies too much by git version.
   395  		// No matter what went wrong, fall back to a complete fetch.
   396  	}
   397  
   398  	// Last resort.
   399  	// Fetch all heads and tags and hope the hash we want is in the history.
   400  	if err := r.fetchRefsLocked(); err != nil {
   401  		return nil, err
   402  	}
   403  
   404  	return r.statLocal(rev, rev)
   405  }
   406  
   407  // fetchRefsLocked fetches all heads and tags from the origin, along with the
   408  // ancestors of those commits.
   409  //
   410  // We only fetch heads and tags, not arbitrary other commits: we don't want to
   411  // pull in off-branch commits (such as rejected GitHub pull requests) that the
   412  // server may be willing to provide. (See the comments within the stat method
   413  // for more detail.)
   414  //
   415  // fetchRefsLocked requires that r.mu remain locked for the duration of the call.
   416  func (r *gitRepo) fetchRefsLocked() error {
   417  	if r.fetchLevel < fetchAll {
   418  		// NOTE: To work around a bug affecting Git clients up to at least 2.23.0
   419  		// (2019-08-16), we must first expand the set of local refs, and only then
   420  		// unshallow the repository as a separate fetch operation. (See
   421  		// golang.org/issue/34266 and
   422  		// https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.)
   423  
   424  		if _, err := Run(r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
   425  			return err
   426  		}
   427  
   428  		if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil {
   429  			if _, err := Run(r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil {
   430  				return err
   431  			}
   432  		}
   433  
   434  		r.fetchLevel = fetchAll
   435  	}
   436  	return nil
   437  }
   438  
   439  // statLocal returns a RevInfo describing rev in the local git repository.
   440  // It uses version as info.Version.
   441  func (r *gitRepo) statLocal(version, rev string) (*RevInfo, error) {
   442  	out, err := Run(r.dir, "git", "-c", "log.showsignature=false", "log", "-n1", "--format=format:%H %ct %D", rev, "--")
   443  	if err != nil {
   444  		return nil, &UnknownRevisionError{Rev: rev}
   445  	}
   446  	f := strings.Fields(string(out))
   447  	if len(f) < 2 {
   448  		return nil, fmt.Errorf("unexpected response from git log: %q", out)
   449  	}
   450  	hash := f[0]
   451  	if strings.HasPrefix(hash, version) {
   452  		version = hash // extend to full hash
   453  	}
   454  	t, err := strconv.ParseInt(f[1], 10, 64)
   455  	if err != nil {
   456  		return nil, fmt.Errorf("invalid time from git log: %q", out)
   457  	}
   458  
   459  	info := &RevInfo{
   460  		Name:    hash,
   461  		Short:   ShortenSHA1(hash),
   462  		Time:    time.Unix(t, 0).UTC(),
   463  		Version: hash,
   464  	}
   465  
   466  	// Add tags. Output looks like:
   467  	//	ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
   468  	for i := 2; i < len(f); i++ {
   469  		if f[i] == "tag:" {
   470  			i++
   471  			if i < len(f) {
   472  				info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
   473  			}
   474  		}
   475  	}
   476  	sort.Strings(info.Tags)
   477  
   478  	// Used hash as info.Version above.
   479  	// Use caller's suggested version if it appears in the tag list
   480  	// (filters out branch names, HEAD).
   481  	for _, tag := range info.Tags {
   482  		if version == tag {
   483  			info.Version = version
   484  		}
   485  	}
   486  
   487  	return info, nil
   488  }
   489  
   490  func (r *gitRepo) Stat(rev string) (*RevInfo, error) {
   491  	if rev == "latest" {
   492  		return r.Latest()
   493  	}
   494  	type cached struct {
   495  		info *RevInfo
   496  		err  error
   497  	}
   498  	c := r.statCache.Do(rev, func() interface{} {
   499  		info, err := r.stat(rev)
   500  		return cached{info, err}
   501  	}).(cached)
   502  	return c.info, c.err
   503  }
   504  
   505  func (r *gitRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) {
   506  	// TODO: Could use git cat-file --batch.
   507  	info, err := r.Stat(rev) // download rev into local git repo
   508  	if err != nil {
   509  		return nil, err
   510  	}
   511  	out, err := Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file)
   512  	if err != nil {
   513  		return nil, os.ErrNotExist
   514  	}
   515  	return out, nil
   516  }
   517  
   518  func (r *gitRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) {
   519  	// Create space to hold results.
   520  	files := make(map[string]*FileRev)
   521  	for _, rev := range revs {
   522  		f := &FileRev{Rev: rev}
   523  		files[rev] = f
   524  	}
   525  
   526  	// Collect locally-known revs.
   527  	need, err := r.readFileRevs(revs, file, files)
   528  	if err != nil {
   529  		return nil, err
   530  	}
   531  	if len(need) == 0 {
   532  		return files, nil
   533  	}
   534  
   535  	// Build list of known remote refs that might help.
   536  	var redo []string
   537  	r.refsOnce.Do(r.loadRefs)
   538  	if r.refsErr != nil {
   539  		return nil, r.refsErr
   540  	}
   541  	for _, tag := range need {
   542  		if r.refs["refs/tags/"+tag] != "" {
   543  			redo = append(redo, tag)
   544  		}
   545  	}
   546  	if len(redo) == 0 {
   547  		return files, nil
   548  	}
   549  
   550  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   551  	// See stat method above.
   552  	unlock, err := r.mu.Lock()
   553  	if err != nil {
   554  		return nil, err
   555  	}
   556  	defer unlock()
   557  
   558  	if err := r.fetchRefsLocked(); err != nil {
   559  		return nil, err
   560  	}
   561  
   562  	if _, err := r.readFileRevs(redo, file, files); err != nil {
   563  		return nil, err
   564  	}
   565  
   566  	return files, nil
   567  }
   568  
   569  func (r *gitRepo) readFileRevs(tags []string, file string, fileMap map[string]*FileRev) (missing []string, err error) {
   570  	var stdin bytes.Buffer
   571  	for _, tag := range tags {
   572  		fmt.Fprintf(&stdin, "refs/tags/%s\n", tag)
   573  		fmt.Fprintf(&stdin, "refs/tags/%s:%s\n", tag, file)
   574  	}
   575  
   576  	data, err := RunWithStdin(r.dir, &stdin, "git", "cat-file", "--batch")
   577  	if err != nil {
   578  		return nil, err
   579  	}
   580  
   581  	next := func() (typ string, body []byte, ok bool) {
   582  		var line string
   583  		i := bytes.IndexByte(data, '\n')
   584  		if i < 0 {
   585  			return "", nil, false
   586  		}
   587  		line, data = string(bytes.TrimSpace(data[:i])), data[i+1:]
   588  		if strings.HasSuffix(line, " missing") {
   589  			return "missing", nil, true
   590  		}
   591  		f := strings.Fields(line)
   592  		if len(f) != 3 {
   593  			return "", nil, false
   594  		}
   595  		n, err := strconv.Atoi(f[2])
   596  		if err != nil || n > len(data) {
   597  			return "", nil, false
   598  		}
   599  		body, data = data[:n], data[n:]
   600  		if len(data) > 0 && data[0] == '\r' {
   601  			data = data[1:]
   602  		}
   603  		if len(data) > 0 && data[0] == '\n' {
   604  			data = data[1:]
   605  		}
   606  		return f[1], body, true
   607  	}
   608  
   609  	badGit := func() ([]string, error) {
   610  		return nil, fmt.Errorf("malformed output from git cat-file --batch")
   611  	}
   612  
   613  	for _, tag := range tags {
   614  		commitType, _, ok := next()
   615  		if !ok {
   616  			return badGit()
   617  		}
   618  		fileType, fileData, ok := next()
   619  		if !ok {
   620  			return badGit()
   621  		}
   622  		f := fileMap[tag]
   623  		f.Data = nil
   624  		f.Err = nil
   625  		switch commitType {
   626  		default:
   627  			f.Err = fmt.Errorf("unexpected non-commit type %q for rev %s", commitType, tag)
   628  
   629  		case "missing":
   630  			// Note: f.Err must not satisfy os.IsNotExist. That's reserved for the file not existing in a valid commit.
   631  			f.Err = fmt.Errorf("no such rev %s", tag)
   632  			missing = append(missing, tag)
   633  
   634  		case "tag", "commit":
   635  			switch fileType {
   636  			default:
   637  				f.Err = &os.PathError{Path: tag + ":" + file, Op: "read", Err: fmt.Errorf("unexpected non-blob type %q", fileType)}
   638  			case "missing":
   639  				f.Err = &os.PathError{Path: tag + ":" + file, Op: "read", Err: os.ErrNotExist}
   640  			case "blob":
   641  				f.Data = fileData
   642  			}
   643  		}
   644  	}
   645  	if len(bytes.TrimSpace(data)) != 0 {
   646  		return badGit()
   647  	}
   648  
   649  	return missing, nil
   650  }
   651  
   652  func (r *gitRepo) RecentTag(rev, prefix, major string) (tag string, err error) {
   653  	info, err := r.Stat(rev)
   654  	if err != nil {
   655  		return "", err
   656  	}
   657  	rev = info.Name // expand hash prefixes
   658  
   659  	// describe sets tag and err using 'git for-each-ref' and reports whether the
   660  	// result is definitive.
   661  	describe := func() (definitive bool) {
   662  		var out []byte
   663  		out, err = Run(r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev)
   664  		if err != nil {
   665  			return true
   666  		}
   667  
   668  		// prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix
   669  		var highest string
   670  		for _, line := range strings.Split(string(out), "\n") {
   671  			line = strings.TrimSpace(line)
   672  			// git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here
   673  			// instead gives support for git v2.7.0.
   674  			if !strings.HasPrefix(line, "refs/tags/") {
   675  				continue
   676  			}
   677  			line = line[len("refs/tags/"):]
   678  
   679  			if !strings.HasPrefix(line, prefix) {
   680  				continue
   681  			}
   682  
   683  			semtag := line[len(prefix):]
   684  			// Consider only tags that are valid and complete (not just major.minor prefixes).
   685  			// NOTE: Do not replace the call to semver.Compare with semver.Max.
   686  			// We want to return the actual tag, not a canonicalized version of it,
   687  			// and semver.Max currently canonicalizes (see golang.org/issue/32700).
   688  			if c := semver.Canonical(semtag); c != "" && strings.HasPrefix(semtag, c) && (major == "" || semver.Major(c) == major) && semver.Compare(semtag, highest) > 0 {
   689  				highest = semtag
   690  			}
   691  		}
   692  
   693  		if highest != "" {
   694  			tag = prefix + highest
   695  		}
   696  
   697  		return tag != "" && !AllHex(tag)
   698  	}
   699  
   700  	if describe() {
   701  		return tag, err
   702  	}
   703  
   704  	// Git didn't find a version tag preceding the requested rev.
   705  	// See whether any plausible tag exists.
   706  	tags, err := r.Tags(prefix + "v")
   707  	if err != nil {
   708  		return "", err
   709  	}
   710  	if len(tags) == 0 {
   711  		return "", nil
   712  	}
   713  
   714  	// There are plausible tags, but we don't know if rev is a descendent of any of them.
   715  	// Fetch the history to find out.
   716  
   717  	unlock, err := r.mu.Lock()
   718  	if err != nil {
   719  		return "", err
   720  	}
   721  	defer unlock()
   722  
   723  	if err := r.fetchRefsLocked(); err != nil {
   724  		return "", err
   725  	}
   726  
   727  	// If we've reached this point, we have all of the commits that are reachable
   728  	// from all heads and tags.
   729  	//
   730  	// The only refs we should be missing are those that are no longer reachable
   731  	// (or never were reachable) from any branch or tag, including the master
   732  	// branch, and we don't want to resolve them anyway (they're probably
   733  	// unreachable for a reason).
   734  	//
   735  	// Try one last time in case some other goroutine fetched rev while we were
   736  	// waiting on the lock.
   737  	describe()
   738  	return tag, err
   739  }
   740  
   741  func (r *gitRepo) DescendsFrom(rev, tag string) (bool, error) {
   742  	// The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so
   743  	// this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go
   744  	// already doesn't work with Git 1.7.1, so at least it's not a regression.
   745  	//
   746  	// git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or
   747  	// 1 if not.
   748  	_, err := Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   749  
   750  	// Git reports "is an ancestor" with exit code 0 and "not an ancestor" with
   751  	// exit code 1.
   752  	// Unfortunately, if we've already fetched rev with a shallow history, git
   753  	// merge-base has been observed to report a false-negative, so don't stop yet
   754  	// even if the exit code is 1!
   755  	if err == nil {
   756  		return true, nil
   757  	}
   758  
   759  	// See whether the tag and rev even exist.
   760  	tags, err := r.Tags(tag)
   761  	if err != nil {
   762  		return false, err
   763  	}
   764  	if len(tags) == 0 {
   765  		return false, nil
   766  	}
   767  
   768  	// NOTE: r.stat is very careful not to fetch commits that we shouldn't know
   769  	// about, like rejected GitHub pull requests, so don't try to short-circuit
   770  	// that here.
   771  	if _, err = r.stat(rev); err != nil {
   772  		return false, err
   773  	}
   774  
   775  	// Now fetch history so that git can search for a path.
   776  	unlock, err := r.mu.Lock()
   777  	if err != nil {
   778  		return false, err
   779  	}
   780  	defer unlock()
   781  
   782  	if r.fetchLevel < fetchAll {
   783  		// Fetch the complete history for all refs and heads. It would be more
   784  		// efficient to only fetch the history from rev to tag, but that's much more
   785  		// complicated, and any kind of shallow fetch is fairly likely to trigger
   786  		// bugs in JGit servers and/or the go command anyway.
   787  		if err := r.fetchRefsLocked(); err != nil {
   788  			return false, err
   789  		}
   790  	}
   791  
   792  	_, err = Run(r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev)
   793  	if err == nil {
   794  		return true, nil
   795  	}
   796  	if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 {
   797  		return false, nil
   798  	}
   799  	return false, err
   800  }
   801  
   802  func (r *gitRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) {
   803  	// TODO: Use maxSize or drop it.
   804  	args := []string{}
   805  	if subdir != "" {
   806  		args = append(args, "--", subdir)
   807  	}
   808  	info, err := r.Stat(rev) // download rev into local git repo
   809  	if err != nil {
   810  		return nil, err
   811  	}
   812  
   813  	unlock, err := r.mu.Lock()
   814  	if err != nil {
   815  		return nil, err
   816  	}
   817  	defer unlock()
   818  
   819  	if err := ensureGitAttributes(r.dir); err != nil {
   820  		return nil, err
   821  	}
   822  
   823  	// Incredibly, git produces different archives depending on whether
   824  	// it is running on a Windows system or not, in an attempt to normalize
   825  	// text file line endings. Setting -c core.autocrlf=input means only
   826  	// translate files on the way into the repo, not on the way out (archive).
   827  	// The -c core.eol=lf should be unnecessary but set it anyway.
   828  	archive, err := Run(r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
   829  	if err != nil {
   830  		if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
   831  			return nil, os.ErrNotExist
   832  		}
   833  		return nil, err
   834  	}
   835  
   836  	return ioutil.NopCloser(bytes.NewReader(archive)), nil
   837  }
   838  
   839  // ensureGitAttributes makes sure export-subst and export-ignore features are
   840  // disabled for this repo. This is intended to be run prior to running git
   841  // archive so that zip files are generated that produce consistent ziphashes
   842  // for a given revision, independent of variables such as git version and the
   843  // size of the repo.
   844  //
   845  // See: https://github.com/golang/go/issues/27153
   846  func ensureGitAttributes(repoDir string) (err error) {
   847  	const attr = "\n* -export-subst -export-ignore\n"
   848  
   849  	d := repoDir + "/info"
   850  	p := d + "/attributes"
   851  
   852  	if err := os.MkdirAll(d, 0755); err != nil {
   853  		return err
   854  	}
   855  
   856  	f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
   857  	if err != nil {
   858  		return err
   859  	}
   860  	defer func() {
   861  		closeErr := f.Close()
   862  		if closeErr != nil {
   863  			err = closeErr
   864  		}
   865  	}()
   866  
   867  	b, err := ioutil.ReadAll(f)
   868  	if err != nil {
   869  		return err
   870  	}
   871  	if !bytes.HasSuffix(b, []byte(attr)) {
   872  		_, err := f.WriteString(attr)
   873  		return err
   874  	}
   875  
   876  	return nil
   877  }