github.com/SamarSidharth/kpt@v0.0.0-20231122062228-c7d747ae3ace/internal/gitutil/gitutil.go (about) 1 // Copyright 2019 The kpt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gitutil 16 17 import ( 18 "bufio" 19 "bytes" 20 "context" 21 "crypto/md5" 22 "encoding/base32" 23 "encoding/hex" 24 "fmt" 25 "io" 26 "os" 27 "os/exec" 28 "path/filepath" 29 "regexp" 30 "runtime" 31 "strings" 32 "time" 33 34 "github.com/GoogleContainerTools/kpt/internal/errors" 35 "github.com/GoogleContainerTools/kpt/pkg/printer" 36 ) 37 38 // RepoCacheDirEnv is the name of the environment variable that controls the cache directory 39 // for remote repos. Defaults to UserHomeDir/.kpt/repos if unspecified. 40 const RepoCacheDirEnv = "KPT_CACHE_DIR" 41 42 // NewLocalGitRunner returns a new GitLocalRunner for a local package. 43 func NewLocalGitRunner(pkg string) (*GitLocalRunner, error) { 44 const op errors.Op = "gitutil.NewLocalGitRunner" 45 p, err := exec.LookPath("git") 46 if err != nil { 47 return nil, errors.E(op, errors.Git, &GitExecError{ 48 Type: GitExecutableNotFound, 49 Err: err, 50 }) 51 } 52 53 return &GitLocalRunner{ 54 gitPath: p, 55 Dir: pkg, 56 Debug: false, 57 }, nil 58 } 59 60 // GitLocalRunner runs git commands in a local git repo. 61 type GitLocalRunner struct { 62 // Path to the git executable. 63 gitPath string 64 65 // Dir is the directory the commands are run in. 66 Dir string 67 68 // Debug enables output of debug information to stderr. 69 Debug bool 70 } 71 72 type RunResult struct { 73 Stdout string 74 Stderr string 75 } 76 77 // Run runs a git command. 78 // Omit the 'git' part of the command. 79 // The first return value contains the output to Stdout and Stderr when 80 // running the command. 81 func (g *GitLocalRunner) Run(ctx context.Context, command string, args ...string) (RunResult, error) { 82 return g.run(ctx, false, command, args...) 83 } 84 85 // RunVerbose runs a git command. 86 // Omit the 'git' part of the command. 87 // The first return value contains the output to Stdout and Stderr when 88 // running the command. 89 func (g *GitLocalRunner) RunVerbose(ctx context.Context, command string, args ...string) (RunResult, error) { 90 return g.run(ctx, true, command, args...) 91 } 92 93 // run runs a git command. 94 // Omit the 'git' part of the command. 95 // The first return value contains the output to Stdout and Stderr when 96 // running the command. 97 func (g *GitLocalRunner) run(ctx context.Context, verbose bool, command string, args ...string) (RunResult, error) { 98 const op errors.Op = "gitutil.run" 99 100 fullArgs := append([]string{command}, args...) 101 cmd := exec.CommandContext(ctx, g.gitPath, fullArgs...) 102 cmd.Dir = g.Dir 103 // Disable git prompting the user for credentials. 104 cmd.Env = append(os.Environ(), 105 "GIT_TERMINAL_PROMPT=0") 106 pr := printer.FromContextOrDie(ctx) 107 cmdStdout := &bytes.Buffer{} 108 cmdStderr := &bytes.Buffer{} 109 if verbose { 110 cmd.Stdout = io.MultiWriter(cmdStdout, pr.OutStream()) 111 cmd.Stderr = io.MultiWriter(cmdStderr, pr.ErrStream()) 112 } else { 113 cmd.Stdout = cmdStdout 114 cmd.Stderr = cmdStderr 115 } 116 117 if g.Debug { 118 _, _ = fmt.Fprintf(os.Stderr, "[git -C %s %s]\n", g.Dir, strings.Join(fullArgs, " ")) 119 } 120 start := time.Now() 121 err := cmd.Run() 122 duration := time.Since(start) 123 if g.Debug { 124 _, _ = fmt.Fprintf(os.Stderr, "duration: %v\n", duration) 125 } 126 if err != nil { 127 return RunResult{}, errors.E(op, errors.Git, &GitExecError{ 128 Type: determineErrorType(cmdStderr.String()), 129 Args: args, 130 Command: command, 131 Err: err, 132 StdOut: cmdStdout.String(), 133 StdErr: cmdStderr.String(), 134 }) 135 } 136 return RunResult{ 137 Stdout: cmdStdout.String(), 138 Stderr: cmdStderr.String(), 139 }, nil 140 } 141 142 type NewGitUpstreamRepoOption func(*GitUpstreamRepo) 143 144 func WithFetchedRefs(a map[string]bool) NewGitUpstreamRepoOption { 145 return func(g *GitUpstreamRepo) { 146 g.fetchedRefs = a 147 } 148 } 149 150 // NewGitUpstreamRepo returns a new GitUpstreamRepo for an upstream package. 151 func NewGitUpstreamRepo(ctx context.Context, uri string, opts ...NewGitUpstreamRepoOption) (*GitUpstreamRepo, error) { 152 const op errors.Op = "gitutil.NewGitUpstreamRepo" 153 g := &GitUpstreamRepo{ 154 URI: uri, 155 } 156 for _, opt := range opts { 157 opt(g) 158 } 159 if g.fetchedRefs == nil { 160 g.fetchedRefs = map[string]bool{} 161 } 162 if err := g.updateRefs(ctx); err != nil { 163 return nil, errors.E(op, errors.Repo(uri), err) 164 } 165 return g, nil 166 } 167 168 // GitUpstreamRepo runs git commands in a local git repo. 169 type GitUpstreamRepo struct { 170 URI string 171 172 // Heads contains all head refs in the upstream repo as well as the 173 // each of the are referencing. 174 Heads map[string]string 175 176 // Tags contains all tag refs in the upstream repo as well as the 177 // each of the are referencing. 178 Tags map[string]string 179 180 // fetchedRefs keeps track of refs already fetched from remote 181 fetchedRefs map[string]bool 182 } 183 184 func (gur *GitUpstreamRepo) GetFetchedRefs() []string { 185 fetchedRefs := make([]string, 0, len(gur.fetchedRefs)) 186 for ref := range gur.fetchedRefs { 187 fetchedRefs = append(fetchedRefs, ref) 188 } 189 return fetchedRefs 190 } 191 192 // updateRefs fetches all refs from the upstream git repo, parses the results 193 // and caches all refs and the commit they reference. Not that this doesn't 194 // download any objects, only refs. 195 func (gur *GitUpstreamRepo) updateRefs(ctx context.Context) error { 196 const op errors.Op = "gitutil.updateRefs" 197 repoCacheDir, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{}) 198 if err != nil { 199 return errors.E(op, errors.Repo(gur.URI), err) 200 } 201 202 gitRunner, err := NewLocalGitRunner(repoCacheDir) 203 if err != nil { 204 return errors.E(op, errors.Repo(gur.URI), err) 205 } 206 207 rr, err := gitRunner.Run(ctx, "ls-remote", "--heads", "--tags", "--refs", "origin") 208 if err != nil { 209 AmendGitExecError(err, func(e *GitExecError) { 210 e.Repo = gur.URI 211 }) 212 // TODO: This should only fail if we can't connect to the repo. We should 213 // consider exposing the error message from git to the user here. 214 return errors.E(op, errors.Repo(gur.URI), err) 215 } 216 217 heads := make(map[string]string) 218 tags := make(map[string]string) 219 220 re := regexp.MustCompile(`^([a-z0-9]+)\s+refs/(heads|tags)/(.+)$`) 221 scanner := bufio.NewScanner(bytes.NewBufferString(rr.Stdout)) 222 for scanner.Scan() { 223 txt := scanner.Text() 224 res := re.FindStringSubmatch(txt) 225 if len(res) == 0 { 226 continue 227 } 228 switch res[2] { 229 case "heads": 230 heads[res[3]] = res[1] 231 case "tags": 232 tags[res[3]] = res[1] 233 } 234 } 235 if err := scanner.Err(); err != nil { 236 return errors.E(op, errors.Repo(gur.URI), errors.Git, 237 fmt.Errorf("error parsing response from git: %w", err)) 238 } 239 gur.Heads = heads 240 gur.Tags = tags 241 return nil 242 } 243 244 // GetRepo fetches all the provided refs and the objects. It will fetch it 245 // to the cache repo and returns the path to the local git clone in the cache 246 // directory. 247 func (gur *GitUpstreamRepo) GetRepo(ctx context.Context, refs []string) (string, error) { 248 const op errors.Op = "gitutil.GetRepo" 249 dir, err := gur.cacheRepo(ctx, gur.URI, refs, []string{}) 250 if err != nil { 251 return "", errors.E(op, errors.Repo(gur.URI), err) 252 } 253 return dir, nil 254 } 255 256 // GetDefaultBranch returns the name of the branch pointed to by the 257 // HEAD symref. This is the default branch of the repository. 258 func (gur *GitUpstreamRepo) GetDefaultBranch(ctx context.Context) (string, error) { 259 const op errors.Op = "gitutil.GetDefaultBranch" 260 cacheRepo, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{}) 261 if err != nil { 262 return "", errors.E(op, errors.Repo(gur.URI), err) 263 } 264 265 gitRunner, err := NewLocalGitRunner(cacheRepo) 266 if err != nil { 267 return "", errors.E(op, errors.Repo(gur.URI), err) 268 } 269 270 rr, err := gitRunner.Run(ctx, "ls-remote", "--symref", "origin", "HEAD") 271 if err != nil { 272 AmendGitExecError(err, func(e *GitExecError) { 273 e.Repo = gur.URI 274 }) 275 return "", errors.E(op, errors.Repo(gur.URI), err) 276 } 277 if rr.Stdout == "" { 278 return "", errors.E(op, errors.Repo(gur.URI), 279 fmt.Errorf("unable to detect default branch in repo")) 280 } 281 282 re := regexp.MustCompile(`ref: refs/heads/([^\s/]+)\s*HEAD`) 283 match := re.FindStringSubmatch(rr.Stdout) 284 if len(match) != 2 { 285 return "", errors.E(op, errors.Repo(gur.URI), errors.Git, 286 fmt.Errorf("unexpected response from git when determining default branch: %s", rr.Stdout)) 287 } 288 return match[1], nil 289 } 290 291 // ResolveBranch resolves the branch to a commit SHA. This happens based on the 292 // cached information about refs in the upstream repo. If the branch doesn't exist 293 // in the upstream repo, the last return value will be false. 294 func (gur *GitUpstreamRepo) ResolveBranch(branch string) (string, bool) { 295 branch = strings.TrimPrefix(branch, "refs/heads/") 296 for head, commit := range gur.Heads { 297 if head == branch { 298 return commit, true 299 } 300 } 301 return "", false 302 } 303 304 // ResolveTag resolves the tag to a commit SHA. This happens based on the 305 // cached information about refs in the upstream repo. If the tag doesn't exist 306 // in the upstream repo, the last return value will be false. 307 func (gur *GitUpstreamRepo) ResolveTag(tag string) (string, bool) { 308 tag = strings.TrimPrefix(tag, "refs/tags/") 309 for t, commit := range gur.Tags { 310 if t == tag { 311 return commit, true 312 } 313 } 314 return "", false 315 } 316 317 // ResolveRef resolves the ref (either tag or branch) to a commit SHA. If the 318 // ref doesn't exist in the upstream repo, the last return value will be false. 319 func (gur *GitUpstreamRepo) ResolveRef(ref string) (string, bool) { 320 commit, found := gur.ResolveBranch(ref) 321 if found { 322 return commit, true 323 } 324 return gur.ResolveTag(ref) 325 } 326 327 // getRepoDir returns the cache directory name for a remote repo 328 // This takes the md5 hash of the repo uri and then base32 (or hex for Windows to shorten dir) 329 // encodes it to make sure it doesn't contain characters that isn't legal in directory names. 330 func (gur *GitUpstreamRepo) getRepoDir(uri string) string { 331 if runtime.GOOS == "windows" { 332 var hash = md5.Sum([]byte(uri)) 333 return strings.ToLower(hex.EncodeToString(hash[:])) 334 } 335 return strings.ToLower(base32.StdEncoding.EncodeToString(md5.New().Sum([]byte(uri)))) 336 } 337 338 // getRepoCacheDir 339 func (gur *GitUpstreamRepo) getRepoCacheDir() (string, error) { 340 const op errors.Op = "gitutil.getRepoCacheDir" 341 var err error 342 dir := os.Getenv(RepoCacheDirEnv) 343 if dir != "" { 344 return dir, nil 345 } 346 347 // cache location unspecified, use UserHomeDir/.kpt/repos 348 dir, err = os.UserHomeDir() 349 if err != nil { 350 return "", errors.E(op, errors.IO, fmt.Errorf( 351 "error looking up user home dir: %w", err)) 352 } 353 return filepath.Join(dir, ".kpt", "repos"), nil 354 } 355 356 // cacheRepo fetches a remote repo to a cache location, and fetches the provided refs. 357 func (gur *GitUpstreamRepo) cacheRepo(ctx context.Context, uri string, requiredRefs []string, optionalRefs []string) (string, error) { 358 const op errors.Op = "gitutil.cacheRepo" 359 kptCacheDir, err := gur.getRepoCacheDir() 360 if err != nil { 361 return "", errors.E(op, err) 362 } 363 if err := os.MkdirAll(kptCacheDir, 0700); err != nil { 364 return "", errors.E(op, errors.IO, fmt.Errorf( 365 "error creating cache directory for repo: %w", err)) 366 } 367 368 // create the repo directory if it doesn't exist yet 369 gitRunner, err := NewLocalGitRunner(kptCacheDir) 370 if err != nil { 371 return "", errors.E(op, errors.Repo(uri), err) 372 } 373 uriSha := gur.getRepoDir(uri) 374 repoCacheDir := filepath.Join(kptCacheDir, uriSha) 375 if _, err := os.Stat(repoCacheDir); os.IsNotExist(err) { 376 if _, err := gitRunner.Run(ctx, "init", uriSha); err != nil { 377 AmendGitExecError(err, func(e *GitExecError) { 378 e.Repo = uri 379 }) 380 return "", errors.E(op, errors.Git, fmt.Errorf("error running `git init`: %w", err)) 381 } 382 gitRunner.Dir = repoCacheDir 383 if _, err = gitRunner.Run(ctx, "remote", "add", "origin", uri); err != nil { 384 AmendGitExecError(err, func(e *GitExecError) { 385 e.Repo = uri 386 }) 387 return "", errors.E(op, errors.Git, fmt.Errorf("error adding origin remote: %w", err)) 388 } 389 } else { 390 gitRunner.Dir = repoCacheDir 391 } 392 393 loop: 394 for i := range requiredRefs { 395 s := requiredRefs[i] 396 // Check if we can verify the ref. This will output a full commit sha if 397 // either the ref (short commit, tag, branch) can be resolved to a full 398 // commit sha, or if the provided ref is already a valid full commit sha (note 399 // that this will happen even if the commit doesn't exist in the local repo). 400 // We ignore the error here since an error just means the ref didn't exist, 401 // which we detect by checking the output to stdout. 402 rr, _ := gitRunner.Run(ctx, "rev-parse", "--verify", "-q", s) 403 // If the output is the same as the ref, then the ref was already a full 404 // commit sha. 405 validFullSha := s == strings.TrimSpace(rr.Stdout) 406 _, resolved := gur.ResolveRef(s) 407 // check if ref was previously fetched 408 // we use the ref s as the cache key 409 _, fetched := gur.fetchedRefs[s] 410 switch { 411 case fetched: 412 // skip refetching if previously fetched 413 break 414 case resolved || validFullSha: 415 // If the ref references a branch or a tag, or is a valid commit 416 // sha and has not already been fetched, we can fetch just a single commit. 417 if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin", "--depth=1", s); err != nil { 418 AmendGitExecError(err, func(e *GitExecError) { 419 e.Repo = uri 420 e.Command = "fetch" 421 e.Ref = s 422 }) 423 return "", errors.E(op, errors.Git, fmt.Errorf( 424 "error running `git fetch` for ref %q: %w", s, err)) 425 } 426 gur.fetchedRefs[s] = true 427 default: 428 // In other situations (like a short commit sha), we have to do 429 // a full fetch from the remote. 430 if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin"); err != nil { 431 AmendGitExecError(err, func(e *GitExecError) { 432 e.Repo = uri 433 e.Command = "fetch" 434 }) 435 return "", errors.E(op, errors.Git, fmt.Errorf( 436 "error running `git fetch` for origin: %w", err)) 437 } 438 if _, err = gitRunner.Run(ctx, "show", s); err != nil { 439 AmendGitExecError(err, func(e *GitExecError) { 440 e.Repo = uri 441 e.Ref = s 442 }) 443 return "", errors.E(op, errors.Git, fmt.Errorf( 444 "error verifying results from fetch: %w", err)) 445 } 446 gur.fetchedRefs[s] = true 447 // If we did a full fetch, we already have all refs, so we can just 448 // exit the loop. 449 break loop 450 } 451 } 452 453 var found bool 454 for _, s := range optionalRefs { 455 if _, err := gitRunner.Run(ctx, "fetch", "origin", s); err == nil { 456 found = true 457 } 458 } 459 if !found && len(optionalRefs) > 0 { 460 return "", errors.E(op, errors.Git, fmt.Errorf("unable to find any refs %s", 461 strings.Join(optionalRefs, ","))) 462 } 463 return repoCacheDir, nil 464 }