github.com/wmuizelaar/kpt@v0.0.0-20221018115725-bd564717b2ed/internal/gitutil/gitutil.go (about)

     1  // Copyright 2019 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gitutil
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"context"
    21  	"crypto/md5"
    22  	"encoding/base32"
    23  	"fmt"
    24  	"io"
    25  	"os"
    26  	"os/exec"
    27  	"path/filepath"
    28  	"regexp"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/GoogleContainerTools/kpt/internal/errors"
    33  	"github.com/GoogleContainerTools/kpt/internal/printer"
    34  )
    35  
    36  // RepoCacheDirEnv is the name of the environment variable that controls the cache directory
    37  // for remote repos.  Defaults to UserHomeDir/.kpt/repos if unspecified.
    38  const RepoCacheDirEnv = "KPT_CACHE_DIR"
    39  
    40  // NewLocalGitRunner returns a new GitLocalRunner for a local package.
    41  func NewLocalGitRunner(pkg string) (*GitLocalRunner, error) {
    42  	const op errors.Op = "gitutil.NewLocalGitRunner"
    43  	p, err := exec.LookPath("git")
    44  	if err != nil {
    45  		return nil, errors.E(op, errors.Git, &GitExecError{
    46  			Type: GitExecutableNotFound,
    47  			Err:  err,
    48  		})
    49  	}
    50  
    51  	return &GitLocalRunner{
    52  		gitPath: p,
    53  		Dir:     pkg,
    54  		Debug:   false,
    55  	}, nil
    56  }
    57  
    58  // GitLocalRunner runs git commands in a local git repo.
    59  type GitLocalRunner struct {
    60  	// Path to the git executable.
    61  	gitPath string
    62  
    63  	// Dir is the directory the commands are run in.
    64  	Dir string
    65  
    66  	// Debug enables output of debug information to stderr.
    67  	Debug bool
    68  }
    69  
    70  type RunResult struct {
    71  	Stdout string
    72  	Stderr string
    73  }
    74  
    75  // Run runs a git command.
    76  // Omit the 'git' part of the command.
    77  // The first return value contains the output to Stdout and Stderr when
    78  // running the command.
    79  func (g *GitLocalRunner) Run(ctx context.Context, command string, args ...string) (RunResult, error) {
    80  	return g.run(ctx, false, command, args...)
    81  }
    82  
    83  // RunVerbose runs a git command.
    84  // Omit the 'git' part of the command.
    85  // The first return value contains the output to Stdout and Stderr when
    86  // running the command.
    87  func (g *GitLocalRunner) RunVerbose(ctx context.Context, command string, args ...string) (RunResult, error) {
    88  	return g.run(ctx, true, command, args...)
    89  }
    90  
    91  // run runs a git command.
    92  // Omit the 'git' part of the command.
    93  // The first return value contains the output to Stdout and Stderr when
    94  // running the command.
    95  func (g *GitLocalRunner) run(ctx context.Context, verbose bool, command string, args ...string) (RunResult, error) {
    96  	const op errors.Op = "gitutil.run"
    97  
    98  	fullArgs := append([]string{command}, args...)
    99  	cmd := exec.CommandContext(ctx, g.gitPath, fullArgs...)
   100  	cmd.Dir = g.Dir
   101  	// Disable git prompting the user for credentials.
   102  	cmd.Env = append(os.Environ(),
   103  		"GIT_TERMINAL_PROMPT=0")
   104  	pr := printer.FromContextOrDie(ctx)
   105  	cmdStdout := &bytes.Buffer{}
   106  	cmdStderr := &bytes.Buffer{}
   107  	if verbose {
   108  		cmd.Stdout = io.MultiWriter(cmdStdout, pr.OutStream())
   109  		cmd.Stderr = io.MultiWriter(cmdStderr, pr.ErrStream())
   110  	} else {
   111  		cmd.Stdout = cmdStdout
   112  		cmd.Stderr = cmdStderr
   113  	}
   114  
   115  	if g.Debug {
   116  		_, _ = fmt.Fprintf(os.Stderr, "[git -C %s %s]\n", g.Dir, strings.Join(fullArgs, " "))
   117  	}
   118  	start := time.Now()
   119  	err := cmd.Run()
   120  	duration := time.Since(start)
   121  	if g.Debug {
   122  		_, _ = fmt.Fprintf(os.Stderr, "duration: %v\n", duration)
   123  	}
   124  	if err != nil {
   125  		return RunResult{}, errors.E(op, errors.Git, &GitExecError{
   126  			Type:    determineErrorType(cmdStderr.String()),
   127  			Args:    args,
   128  			Command: command,
   129  			Err:     err,
   130  			StdOut:  cmdStdout.String(),
   131  			StdErr:  cmdStderr.String(),
   132  		})
   133  	}
   134  	return RunResult{
   135  		Stdout: cmdStdout.String(),
   136  		Stderr: cmdStderr.String(),
   137  	}, nil
   138  }
   139  
   140  type NewGitUpstreamRepoOption func(*GitUpstreamRepo)
   141  
   142  func WithFetchedRefs(a map[string]bool) NewGitUpstreamRepoOption {
   143  	return func(g *GitUpstreamRepo) {
   144  		g.fetchedRefs = a
   145  	}
   146  }
   147  
   148  // NewGitUpstreamRepo returns a new GitUpstreamRepo for an upstream package.
   149  func NewGitUpstreamRepo(ctx context.Context, uri string, opts ...NewGitUpstreamRepoOption) (*GitUpstreamRepo, error) {
   150  	const op errors.Op = "gitutil.NewGitUpstreamRepo"
   151  	g := &GitUpstreamRepo{
   152  		URI: uri,
   153  	}
   154  	for _, opt := range opts {
   155  		opt(g)
   156  	}
   157  	if g.fetchedRefs == nil {
   158  		g.fetchedRefs = map[string]bool{}
   159  	}
   160  	if err := g.updateRefs(ctx); err != nil {
   161  		return nil, errors.E(op, errors.Repo(uri), err)
   162  	}
   163  	return g, nil
   164  }
   165  
   166  // GitUpstreamRepo runs git commands in a local git repo.
   167  type GitUpstreamRepo struct {
   168  	URI string
   169  
   170  	// Heads contains all head refs in the upstream repo as well as the
   171  	// each of the are referencing.
   172  	Heads map[string]string
   173  
   174  	// Tags contains all tag refs in the upstream repo as well as the
   175  	// each of the are referencing.
   176  	Tags map[string]string
   177  
   178  	// fetchedRefs keeps track of refs already fetched from remote
   179  	fetchedRefs map[string]bool
   180  }
   181  
   182  func (gur *GitUpstreamRepo) GetFetchedRefs() []string {
   183  	fetchedRefs := make([]string, 0, len(gur.fetchedRefs))
   184  	for ref := range gur.fetchedRefs {
   185  		fetchedRefs = append(fetchedRefs, ref)
   186  	}
   187  	return fetchedRefs
   188  }
   189  
   190  // updateRefs fetches all refs from the upstream git repo, parses the results
   191  // and caches all refs and the commit they reference. Not that this doesn't
   192  // download any objects, only refs.
   193  func (gur *GitUpstreamRepo) updateRefs(ctx context.Context) error {
   194  	const op errors.Op = "gitutil.updateRefs"
   195  	repoCacheDir, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{})
   196  	if err != nil {
   197  		return errors.E(op, errors.Repo(gur.URI), err)
   198  	}
   199  
   200  	gitRunner, err := NewLocalGitRunner(repoCacheDir)
   201  	if err != nil {
   202  		return errors.E(op, errors.Repo(gur.URI), err)
   203  	}
   204  
   205  	rr, err := gitRunner.Run(ctx, "ls-remote", "--heads", "--tags", "--refs", "origin")
   206  	if err != nil {
   207  		AmendGitExecError(err, func(e *GitExecError) {
   208  			e.Repo = gur.URI
   209  		})
   210  		// TODO: This should only fail if we can't connect to the repo. We should
   211  		// consider exposing the error message from git to the user here.
   212  		return errors.E(op, errors.Repo(gur.URI), err)
   213  	}
   214  
   215  	heads := make(map[string]string)
   216  	tags := make(map[string]string)
   217  
   218  	re := regexp.MustCompile(`^([a-z0-9]+)\s+refs/(heads|tags)/(.+)$`)
   219  	scanner := bufio.NewScanner(bytes.NewBufferString(rr.Stdout))
   220  	for scanner.Scan() {
   221  		txt := scanner.Text()
   222  		res := re.FindStringSubmatch(txt)
   223  		if len(res) == 0 {
   224  			continue
   225  		}
   226  		switch res[2] {
   227  		case "heads":
   228  			heads[res[3]] = res[1]
   229  		case "tags":
   230  			tags[res[3]] = res[1]
   231  		}
   232  	}
   233  	if err := scanner.Err(); err != nil {
   234  		return errors.E(op, errors.Repo(gur.URI), errors.Git,
   235  			fmt.Errorf("error parsing response from git: %w", err))
   236  	}
   237  	gur.Heads = heads
   238  	gur.Tags = tags
   239  	return nil
   240  }
   241  
   242  // GetRepo fetches all the provided refs and the objects. It will fetch it
   243  // to the cache repo and returns the path to the local git clone in the cache
   244  // directory.
   245  func (gur *GitUpstreamRepo) GetRepo(ctx context.Context, refs []string) (string, error) {
   246  	const op errors.Op = "gitutil.GetRepo"
   247  	dir, err := gur.cacheRepo(ctx, gur.URI, refs, []string{})
   248  	if err != nil {
   249  		return "", errors.E(op, errors.Repo(gur.URI), err)
   250  	}
   251  	return dir, nil
   252  }
   253  
   254  // GetDefaultBranch returns the name of the branch pointed to by the
   255  // HEAD symref. This is the default branch of the repository.
   256  func (gur *GitUpstreamRepo) GetDefaultBranch(ctx context.Context) (string, error) {
   257  	const op errors.Op = "gitutil.GetDefaultBranch"
   258  	cacheRepo, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{})
   259  	if err != nil {
   260  		return "", errors.E(op, errors.Repo(gur.URI), err)
   261  	}
   262  
   263  	gitRunner, err := NewLocalGitRunner(cacheRepo)
   264  	if err != nil {
   265  		return "", errors.E(op, errors.Repo(gur.URI), err)
   266  	}
   267  
   268  	rr, err := gitRunner.Run(ctx, "ls-remote", "--symref", "origin", "HEAD")
   269  	if err != nil {
   270  		AmendGitExecError(err, func(e *GitExecError) {
   271  			e.Repo = gur.URI
   272  		})
   273  		return "", errors.E(op, errors.Repo(gur.URI), err)
   274  	}
   275  	if rr.Stdout == "" {
   276  		return "", errors.E(op, errors.Repo(gur.URI),
   277  			fmt.Errorf("unable to detect default branch in repo"))
   278  	}
   279  
   280  	re := regexp.MustCompile(`ref: refs/heads/([^\s/]+)\s*HEAD`)
   281  	match := re.FindStringSubmatch(rr.Stdout)
   282  	if len(match) != 2 {
   283  		return "", errors.E(op, errors.Repo(gur.URI), errors.Git,
   284  			fmt.Errorf("unexpected response from git when determining default branch: %s", rr.Stdout))
   285  	}
   286  	return match[1], nil
   287  }
   288  
   289  // ResolveBranch resolves the branch to a commit SHA. This happens based on the
   290  // cached information about refs in the upstream repo. If the branch doesn't exist
   291  // in the upstream repo, the last return value will be false.
   292  func (gur *GitUpstreamRepo) ResolveBranch(branch string) (string, bool) {
   293  	branch = strings.TrimPrefix(branch, "refs/heads/")
   294  	for head, commit := range gur.Heads {
   295  		if head == branch {
   296  			return commit, true
   297  		}
   298  	}
   299  	return "", false
   300  }
   301  
   302  // ResolveTag resolves the tag to a commit SHA. This happens based on the
   303  // cached information about refs in the upstream repo. If the tag doesn't exist
   304  // in the upstream repo, the last return value will be false.
   305  func (gur *GitUpstreamRepo) ResolveTag(tag string) (string, bool) {
   306  	tag = strings.TrimPrefix(tag, "refs/tags/")
   307  	for t, commit := range gur.Tags {
   308  		if t == tag {
   309  			return commit, true
   310  		}
   311  	}
   312  	return "", false
   313  }
   314  
   315  // ResolveRef resolves the ref (either tag or branch) to a commit SHA. If the
   316  // ref doesn't exist in the upstream repo, the last return value will be false.
   317  func (gur *GitUpstreamRepo) ResolveRef(ref string) (string, bool) {
   318  	commit, found := gur.ResolveBranch(ref)
   319  	if found {
   320  		return commit, true
   321  	}
   322  	return gur.ResolveTag(ref)
   323  }
   324  
   325  // getRepoDir returns the cache directory name for a remote repo
   326  // This takes the md5 hash of the repo uri and then base32 encodes it to make
   327  // sure it doesn't contain characters that isn't legal in directory names.
   328  func (gur *GitUpstreamRepo) getRepoDir(uri string) string {
   329  	return strings.ToLower(base32.StdEncoding.EncodeToString(md5.New().Sum([]byte(uri))))
   330  }
   331  
   332  // getRepoCacheDir
   333  func (gur *GitUpstreamRepo) getRepoCacheDir() (string, error) {
   334  	const op errors.Op = "gitutil.getRepoCacheDir"
   335  	var err error
   336  	dir := os.Getenv(RepoCacheDirEnv)
   337  	if dir != "" {
   338  		return dir, nil
   339  	}
   340  
   341  	// cache location unspecified, use UserHomeDir/.kpt/repos
   342  	dir, err = os.UserHomeDir()
   343  	if err != nil {
   344  		return "", errors.E(op, errors.IO, fmt.Errorf(
   345  			"error looking up user home dir: %w", err))
   346  	}
   347  	return filepath.Join(dir, ".kpt", "repos"), nil
   348  }
   349  
   350  // cacheRepo fetches a remote repo to a cache location, and fetches the provided refs.
   351  func (gur *GitUpstreamRepo) cacheRepo(ctx context.Context, uri string, requiredRefs []string, optionalRefs []string) (string, error) {
   352  	const op errors.Op = "gitutil.cacheRepo"
   353  	kptCacheDir, err := gur.getRepoCacheDir()
   354  	if err != nil {
   355  		return "", errors.E(op, err)
   356  	}
   357  	if err := os.MkdirAll(kptCacheDir, 0700); err != nil {
   358  		return "", errors.E(op, errors.IO, fmt.Errorf(
   359  			"error creating cache directory for repo: %w", err))
   360  	}
   361  
   362  	// create the repo directory if it doesn't exist yet
   363  	gitRunner, err := NewLocalGitRunner(kptCacheDir)
   364  	if err != nil {
   365  		return "", errors.E(op, errors.Repo(uri), err)
   366  	}
   367  	uriSha := gur.getRepoDir(uri)
   368  	repoCacheDir := filepath.Join(kptCacheDir, uriSha)
   369  	if _, err := os.Stat(repoCacheDir); os.IsNotExist(err) {
   370  		if _, err := gitRunner.Run(ctx, "init", uriSha); err != nil {
   371  			AmendGitExecError(err, func(e *GitExecError) {
   372  				e.Repo = uri
   373  			})
   374  			return "", errors.E(op, errors.Git, fmt.Errorf("error running `git init`: %w", err))
   375  		}
   376  		gitRunner.Dir = repoCacheDir
   377  		if _, err = gitRunner.Run(ctx, "remote", "add", "origin", uri); err != nil {
   378  			AmendGitExecError(err, func(e *GitExecError) {
   379  				e.Repo = uri
   380  			})
   381  			return "", errors.E(op, errors.Git, fmt.Errorf("error adding origin remote: %w", err))
   382  		}
   383  	} else {
   384  		gitRunner.Dir = repoCacheDir
   385  	}
   386  
   387  loop:
   388  	for i := range requiredRefs {
   389  		s := requiredRefs[i]
   390  		// Check if we can verify the ref. This will output a full commit sha if
   391  		// either the ref (short commit, tag, branch) can be resolved to a full
   392  		// commit sha, or if the provided ref is already a valid full commit sha (note
   393  		// that this will happen even if the commit doesn't exist in the local repo).
   394  		// We ignore the error here since an error just means the ref didn't exist,
   395  		// which we detect by checking the output to stdout.
   396  		rr, _ := gitRunner.Run(ctx, "rev-parse", "--verify", "-q", s)
   397  		// If the output is the same as the ref, then the ref was already a full
   398  		// commit sha.
   399  		validFullSha := s == strings.TrimSpace(rr.Stdout)
   400  		_, resolved := gur.ResolveRef(s)
   401  		// check if ref was previously fetched
   402  		// we use the ref s as the cache key
   403  		_, fetched := gur.fetchedRefs[s]
   404  		switch {
   405  		case fetched:
   406  			// skip refetching if previously fetched
   407  			break
   408  		case resolved || validFullSha:
   409  			// If the ref references a branch or a tag, or is a valid commit
   410  			// sha and has not already been fetched, we can fetch just a single commit.
   411  			if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin", "--depth=1", s); err != nil {
   412  				AmendGitExecError(err, func(e *GitExecError) {
   413  					e.Repo = uri
   414  					e.Command = "fetch"
   415  					e.Ref = s
   416  				})
   417  				return "", errors.E(op, errors.Git, fmt.Errorf(
   418  					"error running `git fetch` for ref %q: %w", s, err))
   419  			}
   420  			gur.fetchedRefs[s] = true
   421  		default:
   422  			// In other situations (like a short commit sha), we have to do
   423  			// a full fetch from the remote.
   424  			if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin"); err != nil {
   425  				AmendGitExecError(err, func(e *GitExecError) {
   426  					e.Repo = uri
   427  					e.Command = "fetch"
   428  				})
   429  				return "", errors.E(op, errors.Git, fmt.Errorf(
   430  					"error running `git fetch` for origin: %w", err))
   431  			}
   432  			if _, err = gitRunner.Run(ctx, "show", s); err != nil {
   433  				AmendGitExecError(err, func(e *GitExecError) {
   434  					e.Repo = uri
   435  					e.Ref = s
   436  				})
   437  				return "", errors.E(op, errors.Git, fmt.Errorf(
   438  					"error verifying results from fetch: %w", err))
   439  			}
   440  			gur.fetchedRefs[s] = true
   441  			// If we did a full fetch, we already have all refs, so we can just
   442  			// exit the loop.
   443  			break loop
   444  		}
   445  	}
   446  
   447  	var found bool
   448  	for _, s := range optionalRefs {
   449  		if _, err := gitRunner.Run(ctx, "fetch", "origin", s); err == nil {
   450  			found = true
   451  		}
   452  	}
   453  	if !found && len(optionalRefs) > 0 {
   454  		return "", errors.E(op, errors.Git, fmt.Errorf("unable to find any refs %s",
   455  			strings.Join(optionalRefs, ",")))
   456  	}
   457  	return repoCacheDir, nil
   458  }