github.com/gernest/nezuko@v0.1.2/internal/modfetch/codehost/git.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package codehost
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"path/filepath"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/gernest/nezuko/internal/lockedfile"
    21  	"github.com/gernest/nezuko/internal/par"
    22  )
    23  
    24  // GitRepo returns the code repository at the given Git remote reference.
    25  func GitRepo(remote string) (Repo, error) {
    26  	return newGitRepoCached(remote, false)
    27  }
    28  
    29  // LocalGitRepo is like Repo but accepts both Git remote references
    30  // and paths to repositories on the local file system.
    31  func LocalGitRepo(remote string) (Repo, error) {
    32  	return newGitRepoCached(remote, true)
    33  }
    34  
    35  const gitWorkDirType = "git2"
    36  
    37  var gitRepoCache par.Cache
    38  
    39  func newGitRepoCached(remote string, localOK bool) (Repo, error) {
    40  	type key struct {
    41  		remote  string
    42  		localOK bool
    43  	}
    44  	type cached struct {
    45  		repo Repo
    46  		err  error
    47  	}
    48  
    49  	c := gitRepoCache.Do(key{remote, localOK}, func() interface{} {
    50  		repo, err := newGitRepo(remote, localOK)
    51  		return cached{repo, err}
    52  	}).(cached)
    53  
    54  	return c.repo, c.err
    55  }
    56  
    57  func newGitRepo(remote string, localOK bool) (Repo, error) {
    58  	r := &gitRepo{remote: remote}
    59  	if strings.Contains(remote, "://") {
    60  		// This is a remote path.
    61  		var err error
    62  		r.dir, r.mu.Path, err = WorkDir(gitWorkDirType, r.remote)
    63  		if err != nil {
    64  			return nil, err
    65  		}
    66  
    67  		unlock, err := r.mu.Lock()
    68  		if err != nil {
    69  			return nil, err
    70  		}
    71  		defer unlock()
    72  
    73  		if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil {
    74  			if _, err := Run(r.dir, "git", "init", "--bare"); err != nil {
    75  				os.RemoveAll(r.dir)
    76  				return nil, err
    77  			}
    78  			// We could just say git fetch https://whatever later,
    79  			// but this lets us say git fetch origin instead, which
    80  			// is a little nicer. More importantly, using a named remote
    81  			// avoids a problem with Git LFS. See golang.org/issue/25605.
    82  			if _, err := Run(r.dir, "git", "remote", "add", "origin", r.remote); err != nil {
    83  				os.RemoveAll(r.dir)
    84  				return nil, err
    85  			}
    86  			r.remote = "origin"
    87  		}
    88  	} else {
    89  		// Local path.
    90  		// Disallow colon (not in ://) because sometimes
    91  		// that's rcp-style host:path syntax and sometimes it's not (c:\work).
    92  		// The go command has always insisted on URL syntax for ssh.
    93  		if strings.Contains(remote, ":") {
    94  			return nil, fmt.Errorf("git remote cannot use host:path syntax")
    95  		}
    96  		if !localOK {
    97  			return nil, fmt.Errorf("git remote must not be local directory")
    98  		}
    99  		r.local = true
   100  		info, err := os.Stat(remote)
   101  		if err != nil {
   102  			return nil, err
   103  		}
   104  		if !info.IsDir() {
   105  			return nil, fmt.Errorf("%s exists but is not a directory", remote)
   106  		}
   107  		r.dir = remote
   108  		r.mu.Path = r.dir + ".lock"
   109  	}
   110  	return r, nil
   111  }
   112  
   113  type gitRepo struct {
   114  	remote string
   115  	local  bool
   116  	dir    string
   117  
   118  	mu lockedfile.Mutex // protects fetchLevel and git repo state
   119  
   120  	fetchLevel int
   121  
   122  	statCache par.Cache
   123  
   124  	refsOnce sync.Once
   125  	refs     map[string]string
   126  	refsErr  error
   127  
   128  	localTagsOnce sync.Once
   129  	localTags     map[string]bool
   130  }
   131  
   132  const (
   133  	// How much have we fetched into the git repo (in this process)?
   134  	fetchNone = iota // nothing yet
   135  	fetchSome        // shallow fetches of individual hashes
   136  	fetchAll         // "fetch -t origin": get all remote branches and tags
   137  )
   138  
   139  // loadLocalTags loads tag references from the local git cache
   140  // into the map r.localTags.
   141  // Should only be called as r.localTagsOnce.Do(r.loadLocalTags).
   142  func (r *gitRepo) loadLocalTags() {
   143  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   144  	// so we might as well record both heads and tags in one shot.
   145  	// Most of the time we only care about tags but sometimes we care about heads too.
   146  	out, err := Run(r.dir, "git", "tag", "-l")
   147  	if err != nil {
   148  		return
   149  	}
   150  
   151  	r.localTags = make(map[string]bool)
   152  	for _, line := range strings.Split(string(out), "\n") {
   153  		if line != "" {
   154  			r.localTags[line] = true
   155  		}
   156  	}
   157  }
   158  
   159  // loadRefs loads heads and tags references from the remote into the map r.refs.
   160  // Should only be called as r.refsOnce.Do(r.loadRefs).
   161  func (r *gitRepo) loadRefs() {
   162  	// The git protocol sends all known refs and ls-remote filters them on the client side,
   163  	// so we might as well record both heads and tags in one shot.
   164  	// Most of the time we only care about tags but sometimes we care about heads too.
   165  	out, err := Run(r.dir, "git", "ls-remote", "-q", r.remote)
   166  	if err != nil {
   167  		if rerr, ok := err.(*RunError); ok {
   168  			if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) {
   169  				rerr.HelpText = "If this is a private repository, see https://golang.org/doc/faq#git_https for additional information."
   170  			}
   171  		}
   172  		r.refsErr = err
   173  		return
   174  	}
   175  
   176  	r.refs = make(map[string]string)
   177  	for _, line := range strings.Split(string(out), "\n") {
   178  		f := strings.Fields(line)
   179  		if len(f) != 2 {
   180  			continue
   181  		}
   182  		if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") {
   183  			r.refs[f[1]] = f[0]
   184  		}
   185  	}
   186  	for ref, hash := range r.refs {
   187  		if strings.HasSuffix(ref, "^{}") { // record unwrapped annotated tag as value of tag
   188  			r.refs[strings.TrimSuffix(ref, "^{}")] = hash
   189  			delete(r.refs, ref)
   190  		}
   191  	}
   192  }
   193  
   194  func (r *gitRepo) Tags(prefix string) ([]string, error) {
   195  	r.refsOnce.Do(r.loadRefs)
   196  	if r.refsErr != nil {
   197  		return nil, r.refsErr
   198  	}
   199  
   200  	tags := []string{}
   201  	for ref := range r.refs {
   202  		if !strings.HasPrefix(ref, "refs/tags/") {
   203  			continue
   204  		}
   205  		tag := ref[len("refs/tags/"):]
   206  		if !strings.HasPrefix(tag, prefix) {
   207  			continue
   208  		}
   209  		tags = append(tags, tag)
   210  	}
   211  	sort.Strings(tags)
   212  	return tags, nil
   213  }
   214  
   215  func (r *gitRepo) Latest() (*RevInfo, error) {
   216  	r.refsOnce.Do(r.loadRefs)
   217  	if r.refsErr != nil {
   218  		return nil, r.refsErr
   219  	}
   220  	if r.refs["HEAD"] == "" {
   221  		return nil, fmt.Errorf("no commits")
   222  	}
   223  	return r.Stat(r.refs["HEAD"])
   224  }
   225  
   226  // findRef finds some ref name for the given hash,
   227  // for use when the server requires giving a ref instead of a hash.
   228  // There may be multiple ref names for a given hash,
   229  // in which case this returns some name - it doesn't matter which.
   230  func (r *gitRepo) findRef(hash string) (ref string, ok bool) {
   231  	r.refsOnce.Do(r.loadRefs)
   232  	for ref, h := range r.refs {
   233  		if h == hash {
   234  			return ref, true
   235  		}
   236  	}
   237  	return "", false
   238  }
   239  
   240  func unshallow(gitDir string) []string {
   241  	if _, err := os.Stat(filepath.Join(gitDir, "shallow")); err == nil {
   242  		return []string{"--unshallow"}
   243  	}
   244  	return []string{}
   245  }
   246  
   247  // minHashDigits is the minimum number of digits to require
   248  // before accepting a hex digit sequence as potentially identifying
   249  // a specific commit in a git repo. (Of course, users can always
   250  // specify more digits, and many will paste in all 40 digits,
   251  // but many of git's commands default to printing short hashes
   252  // as 7 digits.)
   253  const minHashDigits = 7
   254  
   255  // stat stats the given rev in the local repository,
   256  // or else it fetches more info from the remote repository and tries again.
   257  func (r *gitRepo) stat(rev string) (*RevInfo, error) {
   258  	if r.local {
   259  		return r.statLocal(rev, rev)
   260  	}
   261  
   262  	// Fast path: maybe rev is a hash we already have locally.
   263  	didStatLocal := false
   264  	if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   265  		if info, err := r.statLocal(rev, rev); err == nil {
   266  			return info, nil
   267  		}
   268  		didStatLocal = true
   269  	}
   270  
   271  	// Maybe rev is a tag we already have locally.
   272  	// (Note that we're excluding branches, which can be stale.)
   273  	r.localTagsOnce.Do(r.loadLocalTags)
   274  	if r.localTags[rev] {
   275  		return r.statLocal(rev, "refs/tags/"+rev)
   276  	}
   277  
   278  	// Maybe rev is the name of a tag or branch on the remote server.
   279  	// Or maybe it's the prefix of a hash of a named ref.
   280  	// Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash.
   281  	r.refsOnce.Do(r.loadRefs)
   282  	var ref, hash string
   283  	if r.refs["refs/tags/"+rev] != "" {
   284  		ref = "refs/tags/" + rev
   285  		hash = r.refs[ref]
   286  		// Keep rev as is: tags are assumed not to change meaning.
   287  	} else if r.refs["refs/heads/"+rev] != "" {
   288  		ref = "refs/heads/" + rev
   289  		hash = r.refs[ref]
   290  		rev = hash // Replace rev, because meaning of refs/heads/foo can change.
   291  	} else if rev == "HEAD" && r.refs["HEAD"] != "" {
   292  		ref = "HEAD"
   293  		hash = r.refs[ref]
   294  		rev = hash // Replace rev, because meaning of HEAD can change.
   295  	} else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) {
   296  		// At the least, we have a hash prefix we can look up after the fetch below.
   297  		// Maybe we can map it to a full hash using the known refs.
   298  		prefix := rev
   299  		// Check whether rev is prefix of known ref hash.
   300  		for k, h := range r.refs {
   301  			if strings.HasPrefix(h, prefix) {
   302  				if hash != "" && hash != h {
   303  					// Hash is an ambiguous hash prefix.
   304  					// More information will not change that.
   305  					return nil, fmt.Errorf("ambiguous revision %s", rev)
   306  				}
   307  				if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash.
   308  					ref = k
   309  				}
   310  				rev = h
   311  				hash = h
   312  			}
   313  		}
   314  		if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash.
   315  			hash = rev
   316  		}
   317  	} else {
   318  		return nil, fmt.Errorf("unknown revision %s", rev)
   319  	}
   320  
   321  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   322  	unlock, err := r.mu.Lock()
   323  	if err != nil {
   324  		return nil, err
   325  	}
   326  	defer unlock()
   327  
   328  	// Perhaps r.localTags did not have the ref when we loaded local tags,
   329  	// but we've since done fetches that pulled down the hash we need
   330  	// (or already have the hash we need, just without its tag).
   331  	// Either way, try a local stat before falling back to network I/O.
   332  	if !didStatLocal {
   333  		if info, err := r.statLocal(rev, hash); err == nil {
   334  			if strings.HasPrefix(ref, "refs/tags/") {
   335  				// Make sure tag exists, so it will be in localTags next time the go command is run.
   336  				Run(r.dir, "git", "tag", strings.TrimPrefix(ref, "refs/tags/"), hash)
   337  			}
   338  			return info, nil
   339  		}
   340  	}
   341  
   342  	// If we know a specific commit we need, fetch it.
   343  	if r.fetchLevel <= fetchSome && hash != "" && !r.local {
   344  		r.fetchLevel = fetchSome
   345  		var refspec string
   346  		if ref != "" && ref != "HEAD" {
   347  			// If we do know the ref name, save the mapping locally
   348  			// so that (if it is a tag) it can show up in localTags
   349  			// on a future call. Also, some servers refuse to allow
   350  			// full hashes in ref specs, so prefer a ref name if known.
   351  			refspec = ref + ":" + ref
   352  		} else {
   353  			// Fetch the hash but give it a local name (refs/dummy),
   354  			// because that triggers the fetch behavior of creating any
   355  			// other known remote tags for the hash. We never use
   356  			// refs/dummy (it's not refs/tags/dummy) and it will be
   357  			// overwritten in the next command, and that's fine.
   358  			ref = hash
   359  			refspec = hash + ":refs/dummy"
   360  		}
   361  		_, err := Run(r.dir, "git", "fetch", "-f", "--depth=1", r.remote, refspec)
   362  		if err == nil {
   363  			return r.statLocal(rev, ref)
   364  		}
   365  		// Don't try to be smart about parsing the error.
   366  		// It's too complex and varies too much by git version.
   367  		// No matter what went wrong, fall back to a complete fetch.
   368  	}
   369  
   370  	// Last resort.
   371  	// Fetch all heads and tags and hope the hash we want is in the history.
   372  	if r.fetchLevel < fetchAll {
   373  		// TODO(bcmills): should we wait to upgrade fetchLevel until after we check
   374  		// err? If there is a temporary server error, we want subsequent fetches to
   375  		// try again instead of proceeding with an incomplete repo.
   376  		r.fetchLevel = fetchAll
   377  		if err := r.fetchUnshallow("refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
   378  			return nil, err
   379  		}
   380  	}
   381  
   382  	return r.statLocal(rev, rev)
   383  }
   384  
   385  func (r *gitRepo) fetchUnshallow(refSpecs ...string) error {
   386  	// To work around a protocol version 2 bug that breaks --unshallow,
   387  	// add -c protocol.version=0.
   388  	// TODO(rsc): The bug is believed to be server-side, meaning only
   389  	// on Google's Git servers. Once the servers are fixed, drop the
   390  	// protocol.version=0. See Google-internal bug b/110495752.
   391  	var protoFlag []string
   392  	unshallowFlag := unshallow(r.dir)
   393  	if len(unshallowFlag) > 0 {
   394  		protoFlag = []string{"-c", "protocol.version=0"}
   395  	}
   396  	_, err := Run(r.dir, "git", protoFlag, "fetch", unshallowFlag, "-f", r.remote, refSpecs)
   397  	return err
   398  }
   399  
   400  // statLocal returns a RevInfo describing rev in the local git repository.
   401  // It uses version as info.Version.
   402  func (r *gitRepo) statLocal(version, rev string) (*RevInfo, error) {
   403  	out, err := Run(r.dir, "git", "-c", "log.showsignature=false", "log", "-n1", "--format=format:%H %ct %D", rev)
   404  	if err != nil {
   405  		return nil, fmt.Errorf("unknown revision %s", rev)
   406  	}
   407  	f := strings.Fields(string(out))
   408  	if len(f) < 2 {
   409  		return nil, fmt.Errorf("unexpected response from git log: %q", out)
   410  	}
   411  	hash := f[0]
   412  	if strings.HasPrefix(hash, version) {
   413  		version = hash // extend to full hash
   414  	}
   415  	t, err := strconv.ParseInt(f[1], 10, 64)
   416  	if err != nil {
   417  		return nil, fmt.Errorf("invalid time from git log: %q", out)
   418  	}
   419  
   420  	info := &RevInfo{
   421  		Name:    hash,
   422  		Short:   ShortenSHA1(hash),
   423  		Time:    time.Unix(t, 0).UTC(),
   424  		Version: hash,
   425  	}
   426  
   427  	// Add tags. Output looks like:
   428  	//	ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD
   429  	for i := 2; i < len(f); i++ {
   430  		if f[i] == "tag:" {
   431  			i++
   432  			if i < len(f) {
   433  				info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ","))
   434  			}
   435  		}
   436  	}
   437  	sort.Strings(info.Tags)
   438  
   439  	// Used hash as info.Version above.
   440  	// Use caller's suggested version if it appears in the tag list
   441  	// (filters out branch names, HEAD).
   442  	for _, tag := range info.Tags {
   443  		if version == tag {
   444  			info.Version = version
   445  		}
   446  	}
   447  
   448  	return info, nil
   449  }
   450  
   451  func (r *gitRepo) Stat(rev string) (*RevInfo, error) {
   452  	if rev == "latest" {
   453  		return r.Latest()
   454  	}
   455  	type cached struct {
   456  		info *RevInfo
   457  		err  error
   458  	}
   459  	c := r.statCache.Do(rev, func() interface{} {
   460  		info, err := r.stat(rev)
   461  		return cached{info, err}
   462  	}).(cached)
   463  	return c.info, c.err
   464  }
   465  
   466  func (r *gitRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) {
   467  	// TODO: Could use git cat-file --batch.
   468  	info, err := r.Stat(rev) // download rev into local git repo
   469  	if err != nil {
   470  		return nil, err
   471  	}
   472  	out, err := Run(r.dir, "git", "cat-file", "blob", info.Name+":"+file)
   473  	if err != nil {
   474  		return nil, os.ErrNotExist
   475  	}
   476  	return out, nil
   477  }
   478  
   479  func (r *gitRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) {
   480  	// Create space to hold results.
   481  	files := make(map[string]*FileRev)
   482  	for _, rev := range revs {
   483  		f := &FileRev{Rev: rev}
   484  		files[rev] = f
   485  	}
   486  
   487  	// Collect locally-known revs.
   488  	need, err := r.readFileRevs(revs, file, files)
   489  	if err != nil {
   490  		return nil, err
   491  	}
   492  	if len(need) == 0 {
   493  		return files, nil
   494  	}
   495  
   496  	// Build list of known remote refs that might help.
   497  	var redo []string
   498  	r.refsOnce.Do(r.loadRefs)
   499  	if r.refsErr != nil {
   500  		return nil, r.refsErr
   501  	}
   502  	for _, tag := range need {
   503  		if r.refs["refs/tags/"+tag] != "" {
   504  			redo = append(redo, tag)
   505  		}
   506  	}
   507  	if len(redo) == 0 {
   508  		return files, nil
   509  	}
   510  
   511  	// Protect r.fetchLevel and the "fetch more and more" sequence.
   512  	// See stat method above.
   513  	unlock, err := r.mu.Lock()
   514  	if err != nil {
   515  		return nil, err
   516  	}
   517  	defer unlock()
   518  
   519  	var refs []string
   520  	var protoFlag []string
   521  	var unshallowFlag []string
   522  	for _, tag := range redo {
   523  		refs = append(refs, "refs/tags/"+tag+":refs/tags/"+tag)
   524  	}
   525  	if len(refs) > 1 {
   526  		unshallowFlag = unshallow(r.dir)
   527  		if len(unshallowFlag) > 0 {
   528  			// To work around a protocol version 2 bug that breaks --unshallow,
   529  			// add -c protocol.version=0.
   530  			// TODO(rsc): The bug is believed to be server-side, meaning only
   531  			// on Google's Git servers. Once the servers are fixed, drop the
   532  			// protocol.version=0. See Google-internal bug b/110495752.
   533  			protoFlag = []string{"-c", "protocol.version=0"}
   534  		}
   535  	}
   536  	if _, err := Run(r.dir, "git", protoFlag, "fetch", unshallowFlag, "-f", r.remote, refs); err != nil {
   537  		return nil, err
   538  	}
   539  
   540  	// TODO(bcmills): after the 1.11 freeze, replace the block above with:
   541  	//	if r.fetchLevel <= fetchSome {
   542  	//		r.fetchLevel = fetchSome
   543  	//		var refs []string
   544  	//		for _, tag := range redo {
   545  	//			refs = append(refs, "refs/tags/"+tag+":refs/tags/"+tag)
   546  	//		}
   547  	//		if _, err := Run(r.dir, "git", "fetch", "--update-shallow", "-f", r.remote, refs); err != nil {
   548  	//			return nil, err
   549  	//		}
   550  	//	}
   551  
   552  	if _, err := r.readFileRevs(redo, file, files); err != nil {
   553  		return nil, err
   554  	}
   555  
   556  	return files, nil
   557  }
   558  
   559  func (r *gitRepo) readFileRevs(tags []string, file string, fileMap map[string]*FileRev) (missing []string, err error) {
   560  	var stdin bytes.Buffer
   561  	for _, tag := range tags {
   562  		fmt.Fprintf(&stdin, "refs/tags/%s\n", tag)
   563  		fmt.Fprintf(&stdin, "refs/tags/%s:%s\n", tag, file)
   564  	}
   565  
   566  	data, err := RunWithStdin(r.dir, &stdin, "git", "cat-file", "--batch")
   567  	if err != nil {
   568  		return nil, err
   569  	}
   570  
   571  	next := func() (typ string, body []byte, ok bool) {
   572  		var line string
   573  		i := bytes.IndexByte(data, '\n')
   574  		if i < 0 {
   575  			return "", nil, false
   576  		}
   577  		line, data = string(bytes.TrimSpace(data[:i])), data[i+1:]
   578  		if strings.HasSuffix(line, " missing") {
   579  			return "missing", nil, true
   580  		}
   581  		f := strings.Fields(line)
   582  		if len(f) != 3 {
   583  			return "", nil, false
   584  		}
   585  		n, err := strconv.Atoi(f[2])
   586  		if err != nil || n > len(data) {
   587  			return "", nil, false
   588  		}
   589  		body, data = data[:n], data[n:]
   590  		if len(data) > 0 && data[0] == '\r' {
   591  			data = data[1:]
   592  		}
   593  		if len(data) > 0 && data[0] == '\n' {
   594  			data = data[1:]
   595  		}
   596  		return f[1], body, true
   597  	}
   598  
   599  	badGit := func() ([]string, error) {
   600  		return nil, fmt.Errorf("malformed output from git cat-file --batch")
   601  	}
   602  
   603  	for _, tag := range tags {
   604  		commitType, _, ok := next()
   605  		if !ok {
   606  			return badGit()
   607  		}
   608  		fileType, fileData, ok := next()
   609  		if !ok {
   610  			return badGit()
   611  		}
   612  		f := fileMap[tag]
   613  		f.Data = nil
   614  		f.Err = nil
   615  		switch commitType {
   616  		default:
   617  			f.Err = fmt.Errorf("unexpected non-commit type %q for rev %s", commitType, tag)
   618  
   619  		case "missing":
   620  			// Note: f.Err must not satisfy os.IsNotExist. That's reserved for the file not existing in a valid commit.
   621  			f.Err = fmt.Errorf("no such rev %s", tag)
   622  			missing = append(missing, tag)
   623  
   624  		case "tag", "commit":
   625  			switch fileType {
   626  			default:
   627  				f.Err = &os.PathError{Path: tag + ":" + file, Op: "read", Err: fmt.Errorf("unexpected non-blob type %q", fileType)}
   628  			case "missing":
   629  				f.Err = &os.PathError{Path: tag + ":" + file, Op: "read", Err: os.ErrNotExist}
   630  			case "blob":
   631  				f.Data = fileData
   632  			}
   633  		}
   634  	}
   635  	if len(bytes.TrimSpace(data)) != 0 {
   636  		return badGit()
   637  	}
   638  
   639  	return missing, nil
   640  }
   641  
   642  func (r *gitRepo) RecentTag(rev, prefix string) (tag string, err error) {
   643  	info, err := r.Stat(rev)
   644  	if err != nil {
   645  		return "", err
   646  	}
   647  	rev = info.Name // expand hash prefixes
   648  
   649  	// describe sets tag and err using 'git describe' and reports whether the
   650  	// result is definitive.
   651  	describe := func() (definitive bool) {
   652  		var out []byte
   653  		out, err = Run(r.dir, "git", "describe", "--first-parent", "--always", "--abbrev=0", "--match", prefix+"v[0-9]*.[0-9]*.[0-9]*", "--tags", rev)
   654  		if err != nil {
   655  			return true // Because we use "--always", describe should never fail.
   656  		}
   657  
   658  		tag = string(bytes.TrimSpace(out))
   659  		return tag != "" && !AllHex(tag)
   660  	}
   661  
   662  	if describe() {
   663  		return tag, err
   664  	}
   665  
   666  	// Git didn't find a version tag preceding the requested rev.
   667  	// See whether any plausible tag exists.
   668  	tags, err := r.Tags(prefix + "v")
   669  	if err != nil {
   670  		return "", err
   671  	}
   672  	if len(tags) == 0 {
   673  		return "", nil
   674  	}
   675  
   676  	// There are plausible tags, but we don't know if rev is a descendent of any of them.
   677  	// Fetch the history to find out.
   678  
   679  	unlock, err := r.mu.Lock()
   680  	if err != nil {
   681  		return "", err
   682  	}
   683  	defer unlock()
   684  
   685  	if r.fetchLevel < fetchAll {
   686  		// Fetch all heads and tags and see if that gives us enough history.
   687  		if err := r.fetchUnshallow("refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil {
   688  			return "", err
   689  		}
   690  		r.fetchLevel = fetchAll
   691  	}
   692  
   693  	// If we've reached this point, we have all of the commits that are reachable
   694  	// from all heads and tags.
   695  	//
   696  	// The only refs we should be missing are those that are no longer reachable
   697  	// (or never were reachable) from any branch or tag, including the master
   698  	// branch, and we don't want to resolve them anyway (they're probably
   699  	// unreachable for a reason).
   700  	//
   701  	// Try one last time in case some other goroutine fetched rev while we were
   702  	// waiting on the lock.
   703  	describe()
   704  	return tag, err
   705  }
   706  
   707  func (r *gitRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, actualSubdir string, err error) {
   708  	// TODO: Use maxSize or drop it.
   709  	args := []string{}
   710  	if subdir != "" {
   711  		args = append(args, "--", subdir)
   712  	}
   713  	info, err := r.Stat(rev) // download rev into local git repo
   714  	if err != nil {
   715  		return nil, "", err
   716  	}
   717  
   718  	unlock, err := r.mu.Lock()
   719  	if err != nil {
   720  		return nil, "", err
   721  	}
   722  	defer unlock()
   723  
   724  	if err := ensureGitAttributes(r.dir); err != nil {
   725  		return nil, "", err
   726  	}
   727  
   728  	// Incredibly, git produces different archives depending on whether
   729  	// it is running on a Windows system or not, in an attempt to normalize
   730  	// text file line endings. Setting -c core.autocrlf=input means only
   731  	// translate files on the way into the repo, not on the way out (archive).
   732  	// The -c core.eol=lf should be unnecessary but set it anyway.
   733  	archive, err := Run(r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args)
   734  	if err != nil {
   735  		if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) {
   736  			return nil, "", os.ErrNotExist
   737  		}
   738  		return nil, "", err
   739  	}
   740  
   741  	return ioutil.NopCloser(bytes.NewReader(archive)), "", nil
   742  }
   743  
   744  // ensureGitAttributes makes sure export-subst and export-ignore features are
   745  // disabled for this repo. This is intended to be run prior to running git
   746  // archive so that zip files are generated that produce consistent ziphashes
   747  // for a given revision, independent of variables such as git version and the
   748  // size of the repo.
   749  //
   750  // See: https://github.com/golang/go/issues/27153
   751  func ensureGitAttributes(repoDir string) (err error) {
   752  	const attr = "\n* -export-subst -export-ignore\n"
   753  
   754  	d := repoDir + "/info"
   755  	p := d + "/attributes"
   756  
   757  	if err := os.MkdirAll(d, 0755); err != nil {
   758  		return err
   759  	}
   760  
   761  	f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666)
   762  	if err != nil {
   763  		return err
   764  	}
   765  	defer func() {
   766  		closeErr := f.Close()
   767  		if closeErr != nil {
   768  			err = closeErr
   769  		}
   770  	}()
   771  
   772  	b, err := ioutil.ReadAll(f)
   773  	if err != nil {
   774  		return err
   775  	}
   776  	if !bytes.HasSuffix(b, []byte(attr)) {
   777  		_, err := f.WriteString(attr)
   778  		return err
   779  	}
   780  
   781  	return nil
   782  }