github.com/SamarSidharth/kpt@v0.0.0-20231122062228-c7d747ae3ace/internal/gitutil/gitutil.go (about)

     1  // Copyright 2019 The kpt Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gitutil
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"context"
    21  	"crypto/md5"
    22  	"encoding/base32"
    23  	"encoding/hex"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"os/exec"
    28  	"path/filepath"
    29  	"regexp"
    30  	"runtime"
    31  	"strings"
    32  	"time"
    33  
    34  	"github.com/GoogleContainerTools/kpt/internal/errors"
    35  	"github.com/GoogleContainerTools/kpt/pkg/printer"
    36  )
    37  
    38  // RepoCacheDirEnv is the name of the environment variable that controls the cache directory
    39  // for remote repos.  Defaults to UserHomeDir/.kpt/repos if unspecified.
    40  const RepoCacheDirEnv = "KPT_CACHE_DIR"
    41  
    42  // NewLocalGitRunner returns a new GitLocalRunner for a local package.
    43  func NewLocalGitRunner(pkg string) (*GitLocalRunner, error) {
    44  	const op errors.Op = "gitutil.NewLocalGitRunner"
    45  	p, err := exec.LookPath("git")
    46  	if err != nil {
    47  		return nil, errors.E(op, errors.Git, &GitExecError{
    48  			Type: GitExecutableNotFound,
    49  			Err:  err,
    50  		})
    51  	}
    52  
    53  	return &GitLocalRunner{
    54  		gitPath: p,
    55  		Dir:     pkg,
    56  		Debug:   false,
    57  	}, nil
    58  }
    59  
    60  // GitLocalRunner runs git commands in a local git repo.
    61  type GitLocalRunner struct {
    62  	// Path to the git executable.
    63  	gitPath string
    64  
    65  	// Dir is the directory the commands are run in.
    66  	Dir string
    67  
    68  	// Debug enables output of debug information to stderr.
    69  	Debug bool
    70  }
    71  
    72  type RunResult struct {
    73  	Stdout string
    74  	Stderr string
    75  }
    76  
    77  // Run runs a git command.
    78  // Omit the 'git' part of the command.
    79  // The first return value contains the output to Stdout and Stderr when
    80  // running the command.
    81  func (g *GitLocalRunner) Run(ctx context.Context, command string, args ...string) (RunResult, error) {
    82  	return g.run(ctx, false, command, args...)
    83  }
    84  
    85  // RunVerbose runs a git command.
    86  // Omit the 'git' part of the command.
    87  // The first return value contains the output to Stdout and Stderr when
    88  // running the command.
    89  func (g *GitLocalRunner) RunVerbose(ctx context.Context, command string, args ...string) (RunResult, error) {
    90  	return g.run(ctx, true, command, args...)
    91  }
    92  
    93  // run runs a git command.
    94  // Omit the 'git' part of the command.
    95  // The first return value contains the output to Stdout and Stderr when
    96  // running the command.
    97  func (g *GitLocalRunner) run(ctx context.Context, verbose bool, command string, args ...string) (RunResult, error) {
    98  	const op errors.Op = "gitutil.run"
    99  
   100  	fullArgs := append([]string{command}, args...)
   101  	cmd := exec.CommandContext(ctx, g.gitPath, fullArgs...)
   102  	cmd.Dir = g.Dir
   103  	// Disable git prompting the user for credentials.
   104  	cmd.Env = append(os.Environ(),
   105  		"GIT_TERMINAL_PROMPT=0")
   106  	pr := printer.FromContextOrDie(ctx)
   107  	cmdStdout := &bytes.Buffer{}
   108  	cmdStderr := &bytes.Buffer{}
   109  	if verbose {
   110  		cmd.Stdout = io.MultiWriter(cmdStdout, pr.OutStream())
   111  		cmd.Stderr = io.MultiWriter(cmdStderr, pr.ErrStream())
   112  	} else {
   113  		cmd.Stdout = cmdStdout
   114  		cmd.Stderr = cmdStderr
   115  	}
   116  
   117  	if g.Debug {
   118  		_, _ = fmt.Fprintf(os.Stderr, "[git -C %s %s]\n", g.Dir, strings.Join(fullArgs, " "))
   119  	}
   120  	start := time.Now()
   121  	err := cmd.Run()
   122  	duration := time.Since(start)
   123  	if g.Debug {
   124  		_, _ = fmt.Fprintf(os.Stderr, "duration: %v\n", duration)
   125  	}
   126  	if err != nil {
   127  		return RunResult{}, errors.E(op, errors.Git, &GitExecError{
   128  			Type:    determineErrorType(cmdStderr.String()),
   129  			Args:    args,
   130  			Command: command,
   131  			Err:     err,
   132  			StdOut:  cmdStdout.String(),
   133  			StdErr:  cmdStderr.String(),
   134  		})
   135  	}
   136  	return RunResult{
   137  		Stdout: cmdStdout.String(),
   138  		Stderr: cmdStderr.String(),
   139  	}, nil
   140  }
   141  
   142  type NewGitUpstreamRepoOption func(*GitUpstreamRepo)
   143  
   144  func WithFetchedRefs(a map[string]bool) NewGitUpstreamRepoOption {
   145  	return func(g *GitUpstreamRepo) {
   146  		g.fetchedRefs = a
   147  	}
   148  }
   149  
   150  // NewGitUpstreamRepo returns a new GitUpstreamRepo for an upstream package.
   151  func NewGitUpstreamRepo(ctx context.Context, uri string, opts ...NewGitUpstreamRepoOption) (*GitUpstreamRepo, error) {
   152  	const op errors.Op = "gitutil.NewGitUpstreamRepo"
   153  	g := &GitUpstreamRepo{
   154  		URI: uri,
   155  	}
   156  	for _, opt := range opts {
   157  		opt(g)
   158  	}
   159  	if g.fetchedRefs == nil {
   160  		g.fetchedRefs = map[string]bool{}
   161  	}
   162  	if err := g.updateRefs(ctx); err != nil {
   163  		return nil, errors.E(op, errors.Repo(uri), err)
   164  	}
   165  	return g, nil
   166  }
   167  
   168  // GitUpstreamRepo runs git commands in a local git repo.
   169  type GitUpstreamRepo struct {
   170  	URI string
   171  
   172  	// Heads contains all head refs in the upstream repo as well as the
   173  	// each of the are referencing.
   174  	Heads map[string]string
   175  
   176  	// Tags contains all tag refs in the upstream repo as well as the
   177  	// each of the are referencing.
   178  	Tags map[string]string
   179  
   180  	// fetchedRefs keeps track of refs already fetched from remote
   181  	fetchedRefs map[string]bool
   182  }
   183  
   184  func (gur *GitUpstreamRepo) GetFetchedRefs() []string {
   185  	fetchedRefs := make([]string, 0, len(gur.fetchedRefs))
   186  	for ref := range gur.fetchedRefs {
   187  		fetchedRefs = append(fetchedRefs, ref)
   188  	}
   189  	return fetchedRefs
   190  }
   191  
   192  // updateRefs fetches all refs from the upstream git repo, parses the results
   193  // and caches all refs and the commit they reference. Not that this doesn't
   194  // download any objects, only refs.
   195  func (gur *GitUpstreamRepo) updateRefs(ctx context.Context) error {
   196  	const op errors.Op = "gitutil.updateRefs"
   197  	repoCacheDir, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{})
   198  	if err != nil {
   199  		return errors.E(op, errors.Repo(gur.URI), err)
   200  	}
   201  
   202  	gitRunner, err := NewLocalGitRunner(repoCacheDir)
   203  	if err != nil {
   204  		return errors.E(op, errors.Repo(gur.URI), err)
   205  	}
   206  
   207  	rr, err := gitRunner.Run(ctx, "ls-remote", "--heads", "--tags", "--refs", "origin")
   208  	if err != nil {
   209  		AmendGitExecError(err, func(e *GitExecError) {
   210  			e.Repo = gur.URI
   211  		})
   212  		// TODO: This should only fail if we can't connect to the repo. We should
   213  		// consider exposing the error message from git to the user here.
   214  		return errors.E(op, errors.Repo(gur.URI), err)
   215  	}
   216  
   217  	heads := make(map[string]string)
   218  	tags := make(map[string]string)
   219  
   220  	re := regexp.MustCompile(`^([a-z0-9]+)\s+refs/(heads|tags)/(.+)$`)
   221  	scanner := bufio.NewScanner(bytes.NewBufferString(rr.Stdout))
   222  	for scanner.Scan() {
   223  		txt := scanner.Text()
   224  		res := re.FindStringSubmatch(txt)
   225  		if len(res) == 0 {
   226  			continue
   227  		}
   228  		switch res[2] {
   229  		case "heads":
   230  			heads[res[3]] = res[1]
   231  		case "tags":
   232  			tags[res[3]] = res[1]
   233  		}
   234  	}
   235  	if err := scanner.Err(); err != nil {
   236  		return errors.E(op, errors.Repo(gur.URI), errors.Git,
   237  			fmt.Errorf("error parsing response from git: %w", err))
   238  	}
   239  	gur.Heads = heads
   240  	gur.Tags = tags
   241  	return nil
   242  }
   243  
   244  // GetRepo fetches all the provided refs and the objects. It will fetch it
   245  // to the cache repo and returns the path to the local git clone in the cache
   246  // directory.
   247  func (gur *GitUpstreamRepo) GetRepo(ctx context.Context, refs []string) (string, error) {
   248  	const op errors.Op = "gitutil.GetRepo"
   249  	dir, err := gur.cacheRepo(ctx, gur.URI, refs, []string{})
   250  	if err != nil {
   251  		return "", errors.E(op, errors.Repo(gur.URI), err)
   252  	}
   253  	return dir, nil
   254  }
   255  
   256  // GetDefaultBranch returns the name of the branch pointed to by the
   257  // HEAD symref. This is the default branch of the repository.
   258  func (gur *GitUpstreamRepo) GetDefaultBranch(ctx context.Context) (string, error) {
   259  	const op errors.Op = "gitutil.GetDefaultBranch"
   260  	cacheRepo, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{})
   261  	if err != nil {
   262  		return "", errors.E(op, errors.Repo(gur.URI), err)
   263  	}
   264  
   265  	gitRunner, err := NewLocalGitRunner(cacheRepo)
   266  	if err != nil {
   267  		return "", errors.E(op, errors.Repo(gur.URI), err)
   268  	}
   269  
   270  	rr, err := gitRunner.Run(ctx, "ls-remote", "--symref", "origin", "HEAD")
   271  	if err != nil {
   272  		AmendGitExecError(err, func(e *GitExecError) {
   273  			e.Repo = gur.URI
   274  		})
   275  		return "", errors.E(op, errors.Repo(gur.URI), err)
   276  	}
   277  	if rr.Stdout == "" {
   278  		return "", errors.E(op, errors.Repo(gur.URI),
   279  			fmt.Errorf("unable to detect default branch in repo"))
   280  	}
   281  
   282  	re := regexp.MustCompile(`ref: refs/heads/([^\s/]+)\s*HEAD`)
   283  	match := re.FindStringSubmatch(rr.Stdout)
   284  	if len(match) != 2 {
   285  		return "", errors.E(op, errors.Repo(gur.URI), errors.Git,
   286  			fmt.Errorf("unexpected response from git when determining default branch: %s", rr.Stdout))
   287  	}
   288  	return match[1], nil
   289  }
   290  
   291  // ResolveBranch resolves the branch to a commit SHA. This happens based on the
   292  // cached information about refs in the upstream repo. If the branch doesn't exist
   293  // in the upstream repo, the last return value will be false.
   294  func (gur *GitUpstreamRepo) ResolveBranch(branch string) (string, bool) {
   295  	branch = strings.TrimPrefix(branch, "refs/heads/")
   296  	for head, commit := range gur.Heads {
   297  		if head == branch {
   298  			return commit, true
   299  		}
   300  	}
   301  	return "", false
   302  }
   303  
   304  // ResolveTag resolves the tag to a commit SHA. This happens based on the
   305  // cached information about refs in the upstream repo. If the tag doesn't exist
   306  // in the upstream repo, the last return value will be false.
   307  func (gur *GitUpstreamRepo) ResolveTag(tag string) (string, bool) {
   308  	tag = strings.TrimPrefix(tag, "refs/tags/")
   309  	for t, commit := range gur.Tags {
   310  		if t == tag {
   311  			return commit, true
   312  		}
   313  	}
   314  	return "", false
   315  }
   316  
   317  // ResolveRef resolves the ref (either tag or branch) to a commit SHA. If the
   318  // ref doesn't exist in the upstream repo, the last return value will be false.
   319  func (gur *GitUpstreamRepo) ResolveRef(ref string) (string, bool) {
   320  	commit, found := gur.ResolveBranch(ref)
   321  	if found {
   322  		return commit, true
   323  	}
   324  	return gur.ResolveTag(ref)
   325  }
   326  
   327  // getRepoDir returns the cache directory name for a remote repo
   328  // This takes the md5 hash of the repo uri and then base32 (or hex for Windows to shorten dir)
   329  // encodes it to make sure it doesn't contain characters that isn't legal in directory names.
   330  func (gur *GitUpstreamRepo) getRepoDir(uri string) string {
   331  	if runtime.GOOS == "windows" {
   332  		var hash = md5.Sum([]byte(uri))
   333  		return strings.ToLower(hex.EncodeToString(hash[:]))	
   334  	}
   335  	return strings.ToLower(base32.StdEncoding.EncodeToString(md5.New().Sum([]byte(uri))))
   336  }
   337  
   338  // getRepoCacheDir
   339  func (gur *GitUpstreamRepo) getRepoCacheDir() (string, error) {
   340  	const op errors.Op = "gitutil.getRepoCacheDir"
   341  	var err error
   342  	dir := os.Getenv(RepoCacheDirEnv)
   343  	if dir != "" {
   344  		return dir, nil
   345  	}
   346  
   347  	// cache location unspecified, use UserHomeDir/.kpt/repos
   348  	dir, err = os.UserHomeDir()
   349  	if err != nil {
   350  		return "", errors.E(op, errors.IO, fmt.Errorf(
   351  			"error looking up user home dir: %w", err))
   352  	}
   353  	return filepath.Join(dir, ".kpt", "repos"), nil
   354  }
   355  
   356  // cacheRepo fetches a remote repo to a cache location, and fetches the provided refs.
   357  func (gur *GitUpstreamRepo) cacheRepo(ctx context.Context, uri string, requiredRefs []string, optionalRefs []string) (string, error) {
   358  	const op errors.Op = "gitutil.cacheRepo"
   359  	kptCacheDir, err := gur.getRepoCacheDir()
   360  	if err != nil {
   361  		return "", errors.E(op, err)
   362  	}
   363  	if err := os.MkdirAll(kptCacheDir, 0700); err != nil {
   364  		return "", errors.E(op, errors.IO, fmt.Errorf(
   365  			"error creating cache directory for repo: %w", err))
   366  	}
   367  
   368  	// create the repo directory if it doesn't exist yet
   369  	gitRunner, err := NewLocalGitRunner(kptCacheDir)
   370  	if err != nil {
   371  		return "", errors.E(op, errors.Repo(uri), err)
   372  	}
   373  	uriSha := gur.getRepoDir(uri)
   374  	repoCacheDir := filepath.Join(kptCacheDir, uriSha)
   375  	if _, err := os.Stat(repoCacheDir); os.IsNotExist(err) {
   376  		if _, err := gitRunner.Run(ctx, "init", uriSha); err != nil {
   377  			AmendGitExecError(err, func(e *GitExecError) {
   378  				e.Repo = uri
   379  			})
   380  			return "", errors.E(op, errors.Git, fmt.Errorf("error running `git init`: %w", err))
   381  		}
   382  		gitRunner.Dir = repoCacheDir
   383  		if _, err = gitRunner.Run(ctx, "remote", "add", "origin", uri); err != nil {
   384  			AmendGitExecError(err, func(e *GitExecError) {
   385  				e.Repo = uri
   386  			})
   387  			return "", errors.E(op, errors.Git, fmt.Errorf("error adding origin remote: %w", err))
   388  		}
   389  	} else {
   390  		gitRunner.Dir = repoCacheDir
   391  	}
   392  
   393  loop:
   394  	for i := range requiredRefs {
   395  		s := requiredRefs[i]
   396  		// Check if we can verify the ref. This will output a full commit sha if
   397  		// either the ref (short commit, tag, branch) can be resolved to a full
   398  		// commit sha, or if the provided ref is already a valid full commit sha (note
   399  		// that this will happen even if the commit doesn't exist in the local repo).
   400  		// We ignore the error here since an error just means the ref didn't exist,
   401  		// which we detect by checking the output to stdout.
   402  		rr, _ := gitRunner.Run(ctx, "rev-parse", "--verify", "-q", s)
   403  		// If the output is the same as the ref, then the ref was already a full
   404  		// commit sha.
   405  		validFullSha := s == strings.TrimSpace(rr.Stdout)
   406  		_, resolved := gur.ResolveRef(s)
   407  		// check if ref was previously fetched
   408  		// we use the ref s as the cache key
   409  		_, fetched := gur.fetchedRefs[s]
   410  		switch {
   411  		case fetched:
   412  			// skip refetching if previously fetched
   413  			break
   414  		case resolved || validFullSha:
   415  			// If the ref references a branch or a tag, or is a valid commit
   416  			// sha and has not already been fetched, we can fetch just a single commit.
   417  			if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin", "--depth=1", s); err != nil {
   418  				AmendGitExecError(err, func(e *GitExecError) {
   419  					e.Repo = uri
   420  					e.Command = "fetch"
   421  					e.Ref = s
   422  				})
   423  				return "", errors.E(op, errors.Git, fmt.Errorf(
   424  					"error running `git fetch` for ref %q: %w", s, err))
   425  			}
   426  			gur.fetchedRefs[s] = true
   427  		default:
   428  			// In other situations (like a short commit sha), we have to do
   429  			// a full fetch from the remote.
   430  			if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin"); err != nil {
   431  				AmendGitExecError(err, func(e *GitExecError) {
   432  					e.Repo = uri
   433  					e.Command = "fetch"
   434  				})
   435  				return "", errors.E(op, errors.Git, fmt.Errorf(
   436  					"error running `git fetch` for origin: %w", err))
   437  			}
   438  			if _, err = gitRunner.Run(ctx, "show", s); err != nil {
   439  				AmendGitExecError(err, func(e *GitExecError) {
   440  					e.Repo = uri
   441  					e.Ref = s
   442  				})
   443  				return "", errors.E(op, errors.Git, fmt.Errorf(
   444  					"error verifying results from fetch: %w", err))
   445  			}
   446  			gur.fetchedRefs[s] = true
   447  			// If we did a full fetch, we already have all refs, so we can just
   448  			// exit the loop.
   449  			break loop
   450  		}
   451  	}
   452  
   453  	var found bool
   454  	for _, s := range optionalRefs {
   455  		if _, err := gitRunner.Run(ctx, "fetch", "origin", s); err == nil {
   456  			found = true
   457  		}
   458  	}
   459  	if !found && len(optionalRefs) > 0 {
   460  		return "", errors.E(op, errors.Git, fmt.Errorf("unable to find any refs %s",
   461  			strings.Join(optionalRefs, ",")))
   462  	}
   463  	return repoCacheDir, nil
   464  }