github.com/wmuizelaar/kpt@v0.0.0-20221018115725-bd564717b2ed/internal/gitutil/gitutil.go (about) 1 // Copyright 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gitutil 16 17 import ( 18 "bufio" 19 "bytes" 20 "context" 21 "crypto/md5" 22 "encoding/base32" 23 "fmt" 24 "io" 25 "os" 26 "os/exec" 27 "path/filepath" 28 "regexp" 29 "strings" 30 "time" 31 32 "github.com/GoogleContainerTools/kpt/internal/errors" 33 "github.com/GoogleContainerTools/kpt/internal/printer" 34 ) 35 36 // RepoCacheDirEnv is the name of the environment variable that controls the cache directory 37 // for remote repos. Defaults to UserHomeDir/.kpt/repos if unspecified. 38 const RepoCacheDirEnv = "KPT_CACHE_DIR" 39 40 // NewLocalGitRunner returns a new GitLocalRunner for a local package. 41 func NewLocalGitRunner(pkg string) (*GitLocalRunner, error) { 42 const op errors.Op = "gitutil.NewLocalGitRunner" 43 p, err := exec.LookPath("git") 44 if err != nil { 45 return nil, errors.E(op, errors.Git, &GitExecError{ 46 Type: GitExecutableNotFound, 47 Err: err, 48 }) 49 } 50 51 return &GitLocalRunner{ 52 gitPath: p, 53 Dir: pkg, 54 Debug: false, 55 }, nil 56 } 57 58 // GitLocalRunner runs git commands in a local git repo. 59 type GitLocalRunner struct { 60 // Path to the git executable. 61 gitPath string 62 63 // Dir is the directory the commands are run in. 64 Dir string 65 66 // Debug enables output of debug information to stderr. 67 Debug bool 68 } 69 70 type RunResult struct { 71 Stdout string 72 Stderr string 73 } 74 75 // Run runs a git command. 76 // Omit the 'git' part of the command. 77 // The first return value contains the output to Stdout and Stderr when 78 // running the command. 79 func (g *GitLocalRunner) Run(ctx context.Context, command string, args ...string) (RunResult, error) { 80 return g.run(ctx, false, command, args...) 81 } 82 83 // RunVerbose runs a git command. 84 // Omit the 'git' part of the command. 85 // The first return value contains the output to Stdout and Stderr when 86 // running the command. 87 func (g *GitLocalRunner) RunVerbose(ctx context.Context, command string, args ...string) (RunResult, error) { 88 return g.run(ctx, true, command, args...) 89 } 90 91 // run runs a git command. 92 // Omit the 'git' part of the command. 93 // The first return value contains the output to Stdout and Stderr when 94 // running the command. 95 func (g *GitLocalRunner) run(ctx context.Context, verbose bool, command string, args ...string) (RunResult, error) { 96 const op errors.Op = "gitutil.run" 97 98 fullArgs := append([]string{command}, args...) 99 cmd := exec.CommandContext(ctx, g.gitPath, fullArgs...) 100 cmd.Dir = g.Dir 101 // Disable git prompting the user for credentials. 102 cmd.Env = append(os.Environ(), 103 "GIT_TERMINAL_PROMPT=0") 104 pr := printer.FromContextOrDie(ctx) 105 cmdStdout := &bytes.Buffer{} 106 cmdStderr := &bytes.Buffer{} 107 if verbose { 108 cmd.Stdout = io.MultiWriter(cmdStdout, pr.OutStream()) 109 cmd.Stderr = io.MultiWriter(cmdStderr, pr.ErrStream()) 110 } else { 111 cmd.Stdout = cmdStdout 112 cmd.Stderr = cmdStderr 113 } 114 115 if g.Debug { 116 _, _ = fmt.Fprintf(os.Stderr, "[git -C %s %s]\n", g.Dir, strings.Join(fullArgs, " ")) 117 } 118 start := time.Now() 119 err := cmd.Run() 120 duration := time.Since(start) 121 if g.Debug { 122 _, _ = fmt.Fprintf(os.Stderr, "duration: %v\n", duration) 123 } 124 if err != nil { 125 return RunResult{}, errors.E(op, errors.Git, &GitExecError{ 126 Type: determineErrorType(cmdStderr.String()), 127 Args: args, 128 Command: command, 129 Err: err, 130 StdOut: cmdStdout.String(), 131 StdErr: cmdStderr.String(), 132 }) 133 } 134 return RunResult{ 135 Stdout: cmdStdout.String(), 136 Stderr: cmdStderr.String(), 137 }, nil 138 } 139 140 type NewGitUpstreamRepoOption func(*GitUpstreamRepo) 141 142 func WithFetchedRefs(a map[string]bool) NewGitUpstreamRepoOption { 143 return func(g *GitUpstreamRepo) { 144 g.fetchedRefs = a 145 } 146 } 147 148 // NewGitUpstreamRepo returns a new GitUpstreamRepo for an upstream package. 149 func NewGitUpstreamRepo(ctx context.Context, uri string, opts ...NewGitUpstreamRepoOption) (*GitUpstreamRepo, error) { 150 const op errors.Op = "gitutil.NewGitUpstreamRepo" 151 g := &GitUpstreamRepo{ 152 URI: uri, 153 } 154 for _, opt := range opts { 155 opt(g) 156 } 157 if g.fetchedRefs == nil { 158 g.fetchedRefs = map[string]bool{} 159 } 160 if err := g.updateRefs(ctx); err != nil { 161 return nil, errors.E(op, errors.Repo(uri), err) 162 } 163 return g, nil 164 } 165 166 // GitUpstreamRepo runs git commands in a local git repo. 167 type GitUpstreamRepo struct { 168 URI string 169 170 // Heads contains all head refs in the upstream repo as well as the 171 // each of the are referencing. 172 Heads map[string]string 173 174 // Tags contains all tag refs in the upstream repo as well as the 175 // each of the are referencing. 176 Tags map[string]string 177 178 // fetchedRefs keeps track of refs already fetched from remote 179 fetchedRefs map[string]bool 180 } 181 182 func (gur *GitUpstreamRepo) GetFetchedRefs() []string { 183 fetchedRefs := make([]string, 0, len(gur.fetchedRefs)) 184 for ref := range gur.fetchedRefs { 185 fetchedRefs = append(fetchedRefs, ref) 186 } 187 return fetchedRefs 188 } 189 190 // updateRefs fetches all refs from the upstream git repo, parses the results 191 // and caches all refs and the commit they reference. Not that this doesn't 192 // download any objects, only refs. 193 func (gur *GitUpstreamRepo) updateRefs(ctx context.Context) error { 194 const op errors.Op = "gitutil.updateRefs" 195 repoCacheDir, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{}) 196 if err != nil { 197 return errors.E(op, errors.Repo(gur.URI), err) 198 } 199 200 gitRunner, err := NewLocalGitRunner(repoCacheDir) 201 if err != nil { 202 return errors.E(op, errors.Repo(gur.URI), err) 203 } 204 205 rr, err := gitRunner.Run(ctx, "ls-remote", "--heads", "--tags", "--refs", "origin") 206 if err != nil { 207 AmendGitExecError(err, func(e *GitExecError) { 208 e.Repo = gur.URI 209 }) 210 // TODO: This should only fail if we can't connect to the repo. We should 211 // consider exposing the error message from git to the user here. 212 return errors.E(op, errors.Repo(gur.URI), err) 213 } 214 215 heads := make(map[string]string) 216 tags := make(map[string]string) 217 218 re := regexp.MustCompile(`^([a-z0-9]+)\s+refs/(heads|tags)/(.+)$`) 219 scanner := bufio.NewScanner(bytes.NewBufferString(rr.Stdout)) 220 for scanner.Scan() { 221 txt := scanner.Text() 222 res := re.FindStringSubmatch(txt) 223 if len(res) == 0 { 224 continue 225 } 226 switch res[2] { 227 case "heads": 228 heads[res[3]] = res[1] 229 case "tags": 230 tags[res[3]] = res[1] 231 } 232 } 233 if err := scanner.Err(); err != nil { 234 return errors.E(op, errors.Repo(gur.URI), errors.Git, 235 fmt.Errorf("error parsing response from git: %w", err)) 236 } 237 gur.Heads = heads 238 gur.Tags = tags 239 return nil 240 } 241 242 // GetRepo fetches all the provided refs and the objects. It will fetch it 243 // to the cache repo and returns the path to the local git clone in the cache 244 // directory. 245 func (gur *GitUpstreamRepo) GetRepo(ctx context.Context, refs []string) (string, error) { 246 const op errors.Op = "gitutil.GetRepo" 247 dir, err := gur.cacheRepo(ctx, gur.URI, refs, []string{}) 248 if err != nil { 249 return "", errors.E(op, errors.Repo(gur.URI), err) 250 } 251 return dir, nil 252 } 253 254 // GetDefaultBranch returns the name of the branch pointed to by the 255 // HEAD symref. This is the default branch of the repository. 256 func (gur *GitUpstreamRepo) GetDefaultBranch(ctx context.Context) (string, error) { 257 const op errors.Op = "gitutil.GetDefaultBranch" 258 cacheRepo, err := gur.cacheRepo(ctx, gur.URI, []string{}, []string{}) 259 if err != nil { 260 return "", errors.E(op, errors.Repo(gur.URI), err) 261 } 262 263 gitRunner, err := NewLocalGitRunner(cacheRepo) 264 if err != nil { 265 return "", errors.E(op, errors.Repo(gur.URI), err) 266 } 267 268 rr, err := gitRunner.Run(ctx, "ls-remote", "--symref", "origin", "HEAD") 269 if err != nil { 270 AmendGitExecError(err, func(e *GitExecError) { 271 e.Repo = gur.URI 272 }) 273 return "", errors.E(op, errors.Repo(gur.URI), err) 274 } 275 if rr.Stdout == "" { 276 return "", errors.E(op, errors.Repo(gur.URI), 277 fmt.Errorf("unable to detect default branch in repo")) 278 } 279 280 re := regexp.MustCompile(`ref: refs/heads/([^\s/]+)\s*HEAD`) 281 match := re.FindStringSubmatch(rr.Stdout) 282 if len(match) != 2 { 283 return "", errors.E(op, errors.Repo(gur.URI), errors.Git, 284 fmt.Errorf("unexpected response from git when determining default branch: %s", rr.Stdout)) 285 } 286 return match[1], nil 287 } 288 289 // ResolveBranch resolves the branch to a commit SHA. This happens based on the 290 // cached information about refs in the upstream repo. If the branch doesn't exist 291 // in the upstream repo, the last return value will be false. 292 func (gur *GitUpstreamRepo) ResolveBranch(branch string) (string, bool) { 293 branch = strings.TrimPrefix(branch, "refs/heads/") 294 for head, commit := range gur.Heads { 295 if head == branch { 296 return commit, true 297 } 298 } 299 return "", false 300 } 301 302 // ResolveTag resolves the tag to a commit SHA. This happens based on the 303 // cached information about refs in the upstream repo. If the tag doesn't exist 304 // in the upstream repo, the last return value will be false. 305 func (gur *GitUpstreamRepo) ResolveTag(tag string) (string, bool) { 306 tag = strings.TrimPrefix(tag, "refs/tags/") 307 for t, commit := range gur.Tags { 308 if t == tag { 309 return commit, true 310 } 311 } 312 return "", false 313 } 314 315 // ResolveRef resolves the ref (either tag or branch) to a commit SHA. If the 316 // ref doesn't exist in the upstream repo, the last return value will be false. 317 func (gur *GitUpstreamRepo) ResolveRef(ref string) (string, bool) { 318 commit, found := gur.ResolveBranch(ref) 319 if found { 320 return commit, true 321 } 322 return gur.ResolveTag(ref) 323 } 324 325 // getRepoDir returns the cache directory name for a remote repo 326 // This takes the md5 hash of the repo uri and then base32 encodes it to make 327 // sure it doesn't contain characters that isn't legal in directory names. 328 func (gur *GitUpstreamRepo) getRepoDir(uri string) string { 329 return strings.ToLower(base32.StdEncoding.EncodeToString(md5.New().Sum([]byte(uri)))) 330 } 331 332 // getRepoCacheDir 333 func (gur *GitUpstreamRepo) getRepoCacheDir() (string, error) { 334 const op errors.Op = "gitutil.getRepoCacheDir" 335 var err error 336 dir := os.Getenv(RepoCacheDirEnv) 337 if dir != "" { 338 return dir, nil 339 } 340 341 // cache location unspecified, use UserHomeDir/.kpt/repos 342 dir, err = os.UserHomeDir() 343 if err != nil { 344 return "", errors.E(op, errors.IO, fmt.Errorf( 345 "error looking up user home dir: %w", err)) 346 } 347 return filepath.Join(dir, ".kpt", "repos"), nil 348 } 349 350 // cacheRepo fetches a remote repo to a cache location, and fetches the provided refs. 351 func (gur *GitUpstreamRepo) cacheRepo(ctx context.Context, uri string, requiredRefs []string, optionalRefs []string) (string, error) { 352 const op errors.Op = "gitutil.cacheRepo" 353 kptCacheDir, err := gur.getRepoCacheDir() 354 if err != nil { 355 return "", errors.E(op, err) 356 } 357 if err := os.MkdirAll(kptCacheDir, 0700); err != nil { 358 return "", errors.E(op, errors.IO, fmt.Errorf( 359 "error creating cache directory for repo: %w", err)) 360 } 361 362 // create the repo directory if it doesn't exist yet 363 gitRunner, err := NewLocalGitRunner(kptCacheDir) 364 if err != nil { 365 return "", errors.E(op, errors.Repo(uri), err) 366 } 367 uriSha := gur.getRepoDir(uri) 368 repoCacheDir := filepath.Join(kptCacheDir, uriSha) 369 if _, err := os.Stat(repoCacheDir); os.IsNotExist(err) { 370 if _, err := gitRunner.Run(ctx, "init", uriSha); err != nil { 371 AmendGitExecError(err, func(e *GitExecError) { 372 e.Repo = uri 373 }) 374 return "", errors.E(op, errors.Git, fmt.Errorf("error running `git init`: %w", err)) 375 } 376 gitRunner.Dir = repoCacheDir 377 if _, err = gitRunner.Run(ctx, "remote", "add", "origin", uri); err != nil { 378 AmendGitExecError(err, func(e *GitExecError) { 379 e.Repo = uri 380 }) 381 return "", errors.E(op, errors.Git, fmt.Errorf("error adding origin remote: %w", err)) 382 } 383 } else { 384 gitRunner.Dir = repoCacheDir 385 } 386 387 loop: 388 for i := range requiredRefs { 389 s := requiredRefs[i] 390 // Check if we can verify the ref. This will output a full commit sha if 391 // either the ref (short commit, tag, branch) can be resolved to a full 392 // commit sha, or if the provided ref is already a valid full commit sha (note 393 // that this will happen even if the commit doesn't exist in the local repo). 394 // We ignore the error here since an error just means the ref didn't exist, 395 // which we detect by checking the output to stdout. 396 rr, _ := gitRunner.Run(ctx, "rev-parse", "--verify", "-q", s) 397 // If the output is the same as the ref, then the ref was already a full 398 // commit sha. 399 validFullSha := s == strings.TrimSpace(rr.Stdout) 400 _, resolved := gur.ResolveRef(s) 401 // check if ref was previously fetched 402 // we use the ref s as the cache key 403 _, fetched := gur.fetchedRefs[s] 404 switch { 405 case fetched: 406 // skip refetching if previously fetched 407 break 408 case resolved || validFullSha: 409 // If the ref references a branch or a tag, or is a valid commit 410 // sha and has not already been fetched, we can fetch just a single commit. 411 if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin", "--depth=1", s); err != nil { 412 AmendGitExecError(err, func(e *GitExecError) { 413 e.Repo = uri 414 e.Command = "fetch" 415 e.Ref = s 416 }) 417 return "", errors.E(op, errors.Git, fmt.Errorf( 418 "error running `git fetch` for ref %q: %w", s, err)) 419 } 420 gur.fetchedRefs[s] = true 421 default: 422 // In other situations (like a short commit sha), we have to do 423 // a full fetch from the remote. 424 if _, err := gitRunner.RunVerbose(ctx, "fetch", "origin"); err != nil { 425 AmendGitExecError(err, func(e *GitExecError) { 426 e.Repo = uri 427 e.Command = "fetch" 428 }) 429 return "", errors.E(op, errors.Git, fmt.Errorf( 430 "error running `git fetch` for origin: %w", err)) 431 } 432 if _, err = gitRunner.Run(ctx, "show", s); err != nil { 433 AmendGitExecError(err, func(e *GitExecError) { 434 e.Repo = uri 435 e.Ref = s 436 }) 437 return "", errors.E(op, errors.Git, fmt.Errorf( 438 "error verifying results from fetch: %w", err)) 439 } 440 gur.fetchedRefs[s] = true 441 // If we did a full fetch, we already have all refs, so we can just 442 // exit the loop. 443 break loop 444 } 445 } 446 447 var found bool 448 for _, s := range optionalRefs { 449 if _, err := gitRunner.Run(ctx, "fetch", "origin", s); err == nil { 450 found = true 451 } 452 } 453 if !found && len(optionalRefs) > 0 { 454 return "", errors.E(op, errors.Git, fmt.Errorf("unable to find any refs %s", 455 strings.Join(optionalRefs, ","))) 456 } 457 return repoCacheDir, nil 458 }