github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/modfetch/codehost/git.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package codehost 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/sha256" 11 "encoding/base64" 12 "errors" 13 "fmt" 14 "io" 15 "io/fs" 16 "net/url" 17 "os" 18 "os/exec" 19 "path/filepath" 20 "runtime" 21 "slices" 22 "sort" 23 "strconv" 24 "strings" 25 "sync" 26 "time" 27 28 "github.com/go-asm/go/cmd/go/base" 29 "github.com/go-asm/go/cmd/go/lockedfile" 30 "github.com/go-asm/go/cmd/go/par" 31 "github.com/go-asm/go/cmd/go/web" 32 33 "golang.org/x/mod/semver" 34 ) 35 36 // LocalGitRepo is like Repo but accepts both Git remote references 37 // and paths to repositories on the local file system. 38 func LocalGitRepo(ctx context.Context, remote string) (Repo, error) { 39 return newGitRepoCached(ctx, remote, true) 40 } 41 42 // A notExistError wraps another error to retain its original text 43 // but makes it opaquely equivalent to fs.ErrNotExist. 44 type notExistError struct { 45 err error 46 } 47 48 func (e notExistError) Error() string { return e.err.Error() } 49 func (notExistError) Is(err error) bool { return err == fs.ErrNotExist } 50 51 const gitWorkDirType = "git3" 52 53 var gitRepoCache par.ErrCache[gitCacheKey, Repo] 54 55 type gitCacheKey struct { 56 remote string 57 localOK bool 58 } 59 60 func newGitRepoCached(ctx context.Context, remote string, localOK bool) (Repo, error) { 61 return gitRepoCache.Do(gitCacheKey{remote, localOK}, func() (Repo, error) { 62 return newGitRepo(ctx, remote, localOK) 63 }) 64 } 65 66 func newGitRepo(ctx context.Context, remote string, localOK bool) (Repo, error) { 67 r := &gitRepo{remote: remote} 68 if strings.Contains(remote, "://") { 69 // This is a remote path. 70 var err error 71 r.dir, r.mu.Path, err = WorkDir(ctx, gitWorkDirType, r.remote) 72 if err != nil { 73 return nil, err 74 } 75 76 unlock, err := r.mu.Lock() 77 if err != nil { 78 return nil, err 79 } 80 defer unlock() 81 82 if _, err := os.Stat(filepath.Join(r.dir, "objects")); err != nil { 83 if _, err := Run(ctx, r.dir, "git", "init", "--bare"); err != nil { 84 os.RemoveAll(r.dir) 85 return nil, err 86 } 87 // We could just say git fetch https://whatever later, 88 // but this lets us say git fetch origin instead, which 89 // is a little nicer. More importantly, using a named remote 90 // avoids a problem with Git LFS. See golang.org/issue/25605. 91 if _, err := Run(ctx, r.dir, "git", "remote", "add", "origin", "--", r.remote); err != nil { 92 os.RemoveAll(r.dir) 93 return nil, err 94 } 95 if runtime.GOOS == "windows" { 96 // Git for Windows by default does not support paths longer than 97 // MAX_PATH (260 characters) because that may interfere with navigation 98 // in some Windows programs. However, cmd/go should be able to handle 99 // long paths just fine, and we expect people to use 'go clean' to 100 // manipulate the module cache, so it should be harmless to set here, 101 // and in some cases may be necessary in order to download modules with 102 // long branch names. 103 // 104 // See https://github.com/git-for-windows/git/wiki/Git-cannot-create-a-file-or-directory-with-a-long-path. 105 if _, err := Run(ctx, r.dir, "git", "config", "core.longpaths", "true"); err != nil { 106 os.RemoveAll(r.dir) 107 return nil, err 108 } 109 } 110 } 111 r.remoteURL = r.remote 112 r.remote = "origin" 113 } else { 114 // Local path. 115 // Disallow colon (not in ://) because sometimes 116 // that's rcp-style host:path syntax and sometimes it's not (c:\work). 117 // The go command has always insisted on URL syntax for ssh. 118 if strings.Contains(remote, ":") { 119 return nil, fmt.Errorf("git remote cannot use host:path syntax") 120 } 121 if !localOK { 122 return nil, fmt.Errorf("git remote must not be local directory") 123 } 124 r.local = true 125 info, err := os.Stat(remote) 126 if err != nil { 127 return nil, err 128 } 129 if !info.IsDir() { 130 return nil, fmt.Errorf("%s exists but is not a directory", remote) 131 } 132 r.dir = remote 133 r.mu.Path = r.dir + ".lock" 134 } 135 return r, nil 136 } 137 138 type gitRepo struct { 139 ctx context.Context 140 141 remote, remoteURL string 142 local bool 143 dir string 144 145 mu lockedfile.Mutex // protects fetchLevel and git repo state 146 147 fetchLevel int 148 149 statCache par.ErrCache[string, *RevInfo] 150 151 refsOnce sync.Once 152 // refs maps branch and tag refs (e.g., "HEAD", "refs/heads/master") 153 // to commits (e.g., "37ffd2e798afde829a34e8955b716ab730b2a6d6") 154 refs map[string]string 155 refsErr error 156 157 localTagsOnce sync.Once 158 localTags sync.Map // map[string]bool 159 } 160 161 const ( 162 // How much have we fetched into the git repo (in this process)? 163 fetchNone = iota // nothing yet 164 fetchSome // shallow fetches of individual hashes 165 fetchAll // "fetch -t origin": get all remote branches and tags 166 ) 167 168 // loadLocalTags loads tag references from the local git cache 169 // into the map r.localTags. 170 func (r *gitRepo) loadLocalTags(ctx context.Context) { 171 // The git protocol sends all known refs and ls-remote filters them on the client side, 172 // so we might as well record both heads and tags in one shot. 173 // Most of the time we only care about tags but sometimes we care about heads too. 174 out, err := Run(ctx, r.dir, "git", "tag", "-l") 175 if err != nil { 176 return 177 } 178 179 for _, line := range strings.Split(string(out), "\n") { 180 if line != "" { 181 r.localTags.Store(line, true) 182 } 183 } 184 } 185 186 func (r *gitRepo) CheckReuse(ctx context.Context, old *Origin, subdir string) error { 187 if old == nil { 188 return fmt.Errorf("missing origin") 189 } 190 if old.VCS != "git" || old.URL != r.remoteURL { 191 return fmt.Errorf("origin moved from %v %q to %v %q", old.VCS, old.URL, "git", r.remoteURL) 192 } 193 if old.Subdir != subdir { 194 return fmt.Errorf("origin moved from %v %q %q to %v %q %q", old.VCS, old.URL, old.Subdir, "git", r.remoteURL, subdir) 195 } 196 197 // Note: Can have Hash with no Ref and no TagSum and no RepoSum, 198 // meaning the Hash simply has to remain in the repo. 199 // In that case we assume it does in the absence of any real way to check. 200 // But if neither Hash nor TagSum is present, we have nothing to check, 201 // which we take to mean we didn't record enough information to be sure. 202 if old.Hash == "" && old.TagSum == "" && old.RepoSum == "" { 203 return fmt.Errorf("non-specific origin") 204 } 205 206 r.loadRefs(ctx) 207 if r.refsErr != nil { 208 return r.refsErr 209 } 210 211 if old.Ref != "" { 212 hash, ok := r.refs[old.Ref] 213 if !ok { 214 return fmt.Errorf("ref %q deleted", old.Ref) 215 } 216 if hash != old.Hash { 217 return fmt.Errorf("ref %q moved from %s to %s", old.Ref, old.Hash, hash) 218 } 219 } 220 if old.TagSum != "" { 221 tags, err := r.Tags(ctx, old.TagPrefix) 222 if err != nil { 223 return err 224 } 225 if tags.Origin.TagSum != old.TagSum { 226 return fmt.Errorf("tags changed") 227 } 228 } 229 if old.RepoSum != "" { 230 if r.repoSum(r.refs) != old.RepoSum { 231 return fmt.Errorf("refs changed") 232 } 233 } 234 return nil 235 } 236 237 // loadRefs loads heads and tags references from the remote into the map r.refs. 238 // The result is cached in memory. 239 func (r *gitRepo) loadRefs(ctx context.Context) (map[string]string, error) { 240 r.refsOnce.Do(func() { 241 // The git protocol sends all known refs and ls-remote filters them on the client side, 242 // so we might as well record both heads and tags in one shot. 243 // Most of the time we only care about tags but sometimes we care about heads too. 244 release, err := base.AcquireNet() 245 if err != nil { 246 r.refsErr = err 247 return 248 } 249 out, gitErr := Run(ctx, r.dir, "git", "ls-remote", "-q", r.remote) 250 release() 251 252 if gitErr != nil { 253 if rerr, ok := gitErr.(*RunError); ok { 254 if bytes.Contains(rerr.Stderr, []byte("fatal: could not read Username")) { 255 rerr.HelpText = "Confirm the import path was entered correctly.\nIf this is a private repository, see https://golang.org/doc/faq#git_https for additional information." 256 } 257 } 258 259 // If the remote URL doesn't exist at all, ideally we should treat the whole 260 // repository as nonexistent by wrapping the error in a notExistError. 261 // For HTTP and HTTPS, that's easy to detect: we'll try to fetch the URL 262 // ourselves and see what code it serves. 263 if u, err := url.Parse(r.remoteURL); err == nil && (u.Scheme == "http" || u.Scheme == "https") { 264 if _, err := web.GetBytes(u); errors.Is(err, fs.ErrNotExist) { 265 gitErr = notExistError{gitErr} 266 } 267 } 268 269 r.refsErr = gitErr 270 return 271 } 272 273 refs := make(map[string]string) 274 for _, line := range strings.Split(string(out), "\n") { 275 f := strings.Fields(line) 276 if len(f) != 2 { 277 continue 278 } 279 if f[1] == "HEAD" || strings.HasPrefix(f[1], "refs/heads/") || strings.HasPrefix(f[1], "refs/tags/") { 280 refs[f[1]] = f[0] 281 } 282 } 283 for ref, hash := range refs { 284 if k, found := strings.CutSuffix(ref, "^{}"); found { // record unwrapped annotated tag as value of tag 285 refs[k] = hash 286 delete(refs, ref) 287 } 288 } 289 r.refs = refs 290 }) 291 return r.refs, r.refsErr 292 } 293 294 func (r *gitRepo) Tags(ctx context.Context, prefix string) (*Tags, error) { 295 refs, err := r.loadRefs(ctx) 296 if err != nil { 297 return nil, err 298 } 299 300 tags := &Tags{ 301 Origin: &Origin{ 302 VCS: "git", 303 URL: r.remoteURL, 304 TagPrefix: prefix, 305 }, 306 List: []Tag{}, 307 } 308 for ref, hash := range refs { 309 if !strings.HasPrefix(ref, "refs/tags/") { 310 continue 311 } 312 tag := ref[len("refs/tags/"):] 313 if !strings.HasPrefix(tag, prefix) { 314 continue 315 } 316 tags.List = append(tags.List, Tag{tag, hash}) 317 } 318 sort.Slice(tags.List, func(i, j int) bool { 319 return tags.List[i].Name < tags.List[j].Name 320 }) 321 322 dir := prefix[:strings.LastIndex(prefix, "/")+1] 323 h := sha256.New() 324 for _, tag := range tags.List { 325 if isOriginTag(strings.TrimPrefix(tag.Name, dir)) { 326 fmt.Fprintf(h, "%q %s\n", tag.Name, tag.Hash) 327 } 328 } 329 tags.Origin.TagSum = "t1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)) 330 return tags, nil 331 } 332 333 // repoSum returns a checksum of the entire repo state, 334 // which can be checked (as Origin.RepoSum) to cache 335 // the absence of a specific module version. 336 // The caller must supply refs, the result of a successful r.loadRefs. 337 func (r *gitRepo) repoSum(refs map[string]string) string { 338 var list []string 339 for ref := range refs { 340 list = append(list, ref) 341 } 342 sort.Strings(list) 343 h := sha256.New() 344 for _, ref := range list { 345 fmt.Fprintf(h, "%q %s\n", ref, refs[ref]) 346 } 347 return "r1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)) 348 } 349 350 // unknownRevisionInfo returns a RevInfo containing an Origin containing a RepoSum of refs, 351 // for use when returning an UnknownRevisionError. 352 func (r *gitRepo) unknownRevisionInfo(refs map[string]string) *RevInfo { 353 return &RevInfo{ 354 Origin: &Origin{ 355 VCS: "git", 356 URL: r.remoteURL, 357 RepoSum: r.repoSum(refs), 358 }, 359 } 360 } 361 362 func (r *gitRepo) Latest(ctx context.Context) (*RevInfo, error) { 363 refs, err := r.loadRefs(ctx) 364 if err != nil { 365 return nil, err 366 } 367 if refs["HEAD"] == "" { 368 return nil, ErrNoCommits 369 } 370 statInfo, err := r.Stat(ctx, refs["HEAD"]) 371 if err != nil { 372 return nil, err 373 } 374 375 // Stat may return cached info, so make a copy to modify here. 376 info := new(RevInfo) 377 *info = *statInfo 378 info.Origin = new(Origin) 379 if statInfo.Origin != nil { 380 *info.Origin = *statInfo.Origin 381 } 382 info.Origin.Ref = "HEAD" 383 info.Origin.Hash = refs["HEAD"] 384 385 return info, nil 386 } 387 388 // findRef finds some ref name for the given hash, 389 // for use when the server requires giving a ref instead of a hash. 390 // There may be multiple ref names for a given hash, 391 // in which case this returns some name - it doesn't matter which. 392 func (r *gitRepo) findRef(ctx context.Context, hash string) (ref string, ok bool) { 393 refs, err := r.loadRefs(ctx) 394 if err != nil { 395 return "", false 396 } 397 for ref, h := range refs { 398 if h == hash { 399 return ref, true 400 } 401 } 402 return "", false 403 } 404 405 // minHashDigits is the minimum number of digits to require 406 // before accepting a hex digit sequence as potentially identifying 407 // a specific commit in a git repo. (Of course, users can always 408 // specify more digits, and many will paste in all 40 digits, 409 // but many of git's commands default to printing short hashes 410 // as 7 digits.) 411 const minHashDigits = 7 412 413 // stat stats the given rev in the local repository, 414 // or else it fetches more info from the remote repository and tries again. 415 func (r *gitRepo) stat(ctx context.Context, rev string) (info *RevInfo, err error) { 416 if r.local { 417 return r.statLocal(ctx, rev, rev) 418 } 419 420 // Fast path: maybe rev is a hash we already have locally. 421 didStatLocal := false 422 if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { 423 if info, err := r.statLocal(ctx, rev, rev); err == nil { 424 return info, nil 425 } 426 didStatLocal = true 427 } 428 429 // Maybe rev is a tag we already have locally. 430 // (Note that we're excluding branches, which can be stale.) 431 r.localTagsOnce.Do(func() { r.loadLocalTags(ctx) }) 432 if _, ok := r.localTags.Load(rev); ok { 433 return r.statLocal(ctx, rev, "refs/tags/"+rev) 434 } 435 436 // Maybe rev is the name of a tag or branch on the remote server. 437 // Or maybe it's the prefix of a hash of a named ref. 438 // Try to resolve to both a ref (git name) and full (40-hex-digit) commit hash. 439 refs, err := r.loadRefs(ctx) 440 if err != nil { 441 return nil, err 442 } 443 // loadRefs may return an error if git fails, for example segfaults, or 444 // could not load a private repo, but defer checking to the else block 445 // below, in case we already have the rev in question in the local cache. 446 var ref, hash string 447 if refs["refs/tags/"+rev] != "" { 448 ref = "refs/tags/" + rev 449 hash = refs[ref] 450 // Keep rev as is: tags are assumed not to change meaning. 451 } else if refs["refs/heads/"+rev] != "" { 452 ref = "refs/heads/" + rev 453 hash = refs[ref] 454 rev = hash // Replace rev, because meaning of refs/heads/foo can change. 455 } else if rev == "HEAD" && refs["HEAD"] != "" { 456 ref = "HEAD" 457 hash = refs[ref] 458 rev = hash // Replace rev, because meaning of HEAD can change. 459 } else if len(rev) >= minHashDigits && len(rev) <= 40 && AllHex(rev) { 460 // At the least, we have a hash prefix we can look up after the fetch below. 461 // Maybe we can map it to a full hash using the known refs. 462 prefix := rev 463 // Check whether rev is prefix of known ref hash. 464 for k, h := range refs { 465 if strings.HasPrefix(h, prefix) { 466 if hash != "" && hash != h { 467 // Hash is an ambiguous hash prefix. 468 // More information will not change that. 469 return nil, fmt.Errorf("ambiguous revision %s", rev) 470 } 471 if ref == "" || ref > k { // Break ties deterministically when multiple refs point at same hash. 472 ref = k 473 } 474 rev = h 475 hash = h 476 } 477 } 478 if hash == "" && len(rev) == 40 { // Didn't find a ref, but rev is a full hash. 479 hash = rev 480 } 481 } else { 482 return r.unknownRevisionInfo(refs), &UnknownRevisionError{Rev: rev} 483 } 484 485 defer func() { 486 if info != nil { 487 info.Origin.Hash = info.Name 488 // There's a ref = hash below; don't write that hash down as Origin.Ref. 489 if ref != info.Origin.Hash { 490 info.Origin.Ref = ref 491 } 492 } 493 }() 494 495 // Protect r.fetchLevel and the "fetch more and more" sequence. 496 unlock, err := r.mu.Lock() 497 if err != nil { 498 return nil, err 499 } 500 defer unlock() 501 502 // Perhaps r.localTags did not have the ref when we loaded local tags, 503 // but we've since done fetches that pulled down the hash we need 504 // (or already have the hash we need, just without its tag). 505 // Either way, try a local stat before falling back to network I/O. 506 if !didStatLocal { 507 if info, err := r.statLocal(ctx, rev, hash); err == nil { 508 tag, fromTag := strings.CutPrefix(ref, "refs/tags/") 509 if fromTag && !slices.Contains(info.Tags, tag) { 510 // The local repo includes the commit hash we want, but it is missing 511 // the corresponding tag. Add that tag and try again. 512 _, err := Run(ctx, r.dir, "git", "tag", tag, hash) 513 if err != nil { 514 return nil, err 515 } 516 r.localTags.Store(tag, true) 517 return r.statLocal(ctx, rev, ref) 518 } 519 return info, err 520 } 521 } 522 523 // If we know a specific commit we need and its ref, fetch it. 524 // We do NOT fetch arbitrary hashes (when we don't know the ref) 525 // because we want to avoid ever importing a commit that isn't 526 // reachable from refs/tags/* or refs/heads/* or HEAD. 527 // Both Gerrit and GitHub expose every CL/PR as a named ref, 528 // and we don't want those commits masquerading as being real 529 // pseudo-versions in the main repo. 530 if r.fetchLevel <= fetchSome && ref != "" && hash != "" && !r.local { 531 r.fetchLevel = fetchSome 532 var refspec string 533 if ref == "HEAD" { 534 // Fetch the hash but give it a local name (refs/dummy), 535 // because that triggers the fetch behavior of creating any 536 // other known remote tags for the hash. We never use 537 // refs/dummy (it's not refs/tags/dummy) and it will be 538 // overwritten in the next command, and that's fine. 539 ref = hash 540 refspec = hash + ":refs/dummy" 541 } else { 542 // If we do know the ref name, save the mapping locally 543 // so that (if it is a tag) it can show up in localTags 544 // on a future call. Also, some servers refuse to allow 545 // full hashes in ref specs, so prefer a ref name if known. 546 refspec = ref + ":" + ref 547 } 548 549 release, err := base.AcquireNet() 550 if err != nil { 551 return nil, err 552 } 553 // We explicitly set protocol.version=2 for this command to work around 554 // an apparent Git bug introduced in Git 2.21 (commit 61c771), 555 // which causes the handler for protocol version 1 to sometimes miss 556 // tags that point to the requested commit (see https://go.dev/issue/56881). 557 _, err = Run(ctx, r.dir, "git", "fetch", "-f", "-c", "protocol.version=2", "--depth=1", r.remote, refspec) 558 release() 559 560 if err == nil { 561 return r.statLocal(ctx, rev, ref) 562 } 563 // Don't try to be smart about parsing the error. 564 // It's too complex and varies too much by git version. 565 // No matter what went wrong, fall back to a complete fetch. 566 } 567 568 // Last resort. 569 // Fetch all heads and tags and hope the hash we want is in the history. 570 if err := r.fetchRefsLocked(ctx); err != nil { 571 return nil, err 572 } 573 574 return r.statLocal(ctx, rev, rev) 575 } 576 577 // fetchRefsLocked fetches all heads and tags from the origin, along with the 578 // ancestors of those commits. 579 // 580 // We only fetch heads and tags, not arbitrary other commits: we don't want to 581 // pull in off-branch commits (such as rejected GitHub pull requests) that the 582 // server may be willing to provide. (See the comments within the stat method 583 // for more detail.) 584 // 585 // fetchRefsLocked requires that r.mu remain locked for the duration of the call. 586 func (r *gitRepo) fetchRefsLocked(ctx context.Context) error { 587 if r.fetchLevel < fetchAll { 588 // NOTE: To work around a bug affecting Git clients up to at least 2.23.0 589 // (2019-08-16), we must first expand the set of local refs, and only then 590 // unshallow the repository as a separate fetch operation. (See 591 // golang.org/issue/34266 and 592 // https://github.com/git/git/blob/4c86140027f4a0d2caaa3ab4bd8bfc5ce3c11c8a/transport.c#L1303-L1309.) 593 594 release, err := base.AcquireNet() 595 if err != nil { 596 return err 597 } 598 defer release() 599 600 if _, err := Run(ctx, r.dir, "git", "fetch", "-f", r.remote, "refs/heads/*:refs/heads/*", "refs/tags/*:refs/tags/*"); err != nil { 601 return err 602 } 603 604 if _, err := os.Stat(filepath.Join(r.dir, "shallow")); err == nil { 605 if _, err := Run(ctx, r.dir, "git", "fetch", "--unshallow", "-f", r.remote); err != nil { 606 return err 607 } 608 } 609 610 r.fetchLevel = fetchAll 611 } 612 return nil 613 } 614 615 // statLocal returns a new RevInfo describing rev in the local git repository. 616 // It uses version as info.Version. 617 func (r *gitRepo) statLocal(ctx context.Context, version, rev string) (*RevInfo, error) { 618 out, err := Run(ctx, r.dir, "git", "-c", "log.showsignature=false", "log", "--no-decorate", "-n1", "--format=format:%H %ct %D", rev, "--") 619 if err != nil { 620 // Return info with Origin.RepoSum if possible to allow caching of negative lookup. 621 var info *RevInfo 622 if refs, err := r.loadRefs(ctx); err == nil { 623 info = r.unknownRevisionInfo(refs) 624 } 625 return info, &UnknownRevisionError{Rev: rev} 626 } 627 f := strings.Fields(string(out)) 628 if len(f) < 2 { 629 return nil, fmt.Errorf("unexpected response from git log: %q", out) 630 } 631 hash := f[0] 632 if strings.HasPrefix(hash, version) { 633 version = hash // extend to full hash 634 } 635 t, err := strconv.ParseInt(f[1], 10, 64) 636 if err != nil { 637 return nil, fmt.Errorf("invalid time from git log: %q", out) 638 } 639 640 info := &RevInfo{ 641 Origin: &Origin{ 642 VCS: "git", 643 URL: r.remoteURL, 644 Hash: hash, 645 }, 646 Name: hash, 647 Short: ShortenSHA1(hash), 648 Time: time.Unix(t, 0).UTC(), 649 Version: hash, 650 } 651 if !strings.HasPrefix(hash, rev) { 652 info.Origin.Ref = rev 653 } 654 655 // Add tags. Output looks like: 656 // ede458df7cd0fdca520df19a33158086a8a68e81 1523994202 HEAD -> master, tag: v1.2.4-annotated, tag: v1.2.3, origin/master, origin/HEAD 657 for i := 2; i < len(f); i++ { 658 if f[i] == "tag:" { 659 i++ 660 if i < len(f) { 661 info.Tags = append(info.Tags, strings.TrimSuffix(f[i], ",")) 662 } 663 } 664 } 665 sort.Strings(info.Tags) 666 667 // Used hash as info.Version above. 668 // Use caller's suggested version if it appears in the tag list 669 // (filters out branch names, HEAD). 670 for _, tag := range info.Tags { 671 if version == tag { 672 info.Version = version 673 } 674 } 675 676 return info, nil 677 } 678 679 func (r *gitRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) { 680 if rev == "latest" { 681 return r.Latest(ctx) 682 } 683 return r.statCache.Do(rev, func() (*RevInfo, error) { 684 return r.stat(ctx, rev) 685 }) 686 } 687 688 func (r *gitRepo) ReadFile(ctx context.Context, rev, file string, maxSize int64) ([]byte, error) { 689 // TODO: Could use git cat-file --batch. 690 info, err := r.Stat(ctx, rev) // download rev into local git repo 691 if err != nil { 692 return nil, err 693 } 694 out, err := Run(ctx, r.dir, "git", "cat-file", "blob", info.Name+":"+file) 695 if err != nil { 696 return nil, fs.ErrNotExist 697 } 698 return out, nil 699 } 700 701 func (r *gitRepo) RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) { 702 info, err := r.Stat(ctx, rev) 703 if err != nil { 704 return "", err 705 } 706 rev = info.Name // expand hash prefixes 707 708 // describe sets tag and err using 'git for-each-ref' and reports whether the 709 // result is definitive. 710 describe := func() (definitive bool) { 711 var out []byte 712 out, err = Run(ctx, r.dir, "git", "for-each-ref", "--format", "%(refname)", "refs/tags", "--merged", rev) 713 if err != nil { 714 return true 715 } 716 717 // prefixed tags aren't valid semver tags so compare without prefix, but only tags with correct prefix 718 var highest string 719 for _, line := range strings.Split(string(out), "\n") { 720 line = strings.TrimSpace(line) 721 // git do support lstrip in for-each-ref format, but it was added in v2.13.0. Stripping here 722 // instead gives support for git v2.7.0. 723 if !strings.HasPrefix(line, "refs/tags/") { 724 continue 725 } 726 line = line[len("refs/tags/"):] 727 728 if !strings.HasPrefix(line, prefix) { 729 continue 730 } 731 if !allowed(line) { 732 continue 733 } 734 735 semtag := line[len(prefix):] 736 if semver.Compare(semtag, highest) > 0 { 737 highest = semtag 738 } 739 } 740 741 if highest != "" { 742 tag = prefix + highest 743 } 744 745 return tag != "" && !AllHex(tag) 746 } 747 748 if describe() { 749 return tag, err 750 } 751 752 // Git didn't find a version tag preceding the requested rev. 753 // See whether any plausible tag exists. 754 tags, err := r.Tags(ctx, prefix+"v") 755 if err != nil { 756 return "", err 757 } 758 if len(tags.List) == 0 { 759 return "", nil 760 } 761 762 // There are plausible tags, but we don't know if rev is a descendent of any of them. 763 // Fetch the history to find out. 764 765 unlock, err := r.mu.Lock() 766 if err != nil { 767 return "", err 768 } 769 defer unlock() 770 771 if err := r.fetchRefsLocked(ctx); err != nil { 772 return "", err 773 } 774 775 // If we've reached this point, we have all of the commits that are reachable 776 // from all heads and tags. 777 // 778 // The only refs we should be missing are those that are no longer reachable 779 // (or never were reachable) from any branch or tag, including the master 780 // branch, and we don't want to resolve them anyway (they're probably 781 // unreachable for a reason). 782 // 783 // Try one last time in case some other goroutine fetched rev while we were 784 // waiting on the lock. 785 describe() 786 return tag, err 787 } 788 789 func (r *gitRepo) DescendsFrom(ctx context.Context, rev, tag string) (bool, error) { 790 // The "--is-ancestor" flag was added to "git merge-base" in version 1.8.0, so 791 // this won't work with Git 1.7.1. According to golang.org/issue/28550, cmd/go 792 // already doesn't work with Git 1.7.1, so at least it's not a regression. 793 // 794 // git merge-base --is-ancestor exits with status 0 if rev is an ancestor, or 795 // 1 if not. 796 _, err := Run(ctx, r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) 797 798 // Git reports "is an ancestor" with exit code 0 and "not an ancestor" with 799 // exit code 1. 800 // Unfortunately, if we've already fetched rev with a shallow history, git 801 // merge-base has been observed to report a false-negative, so don't stop yet 802 // even if the exit code is 1! 803 if err == nil { 804 return true, nil 805 } 806 807 // See whether the tag and rev even exist. 808 tags, err := r.Tags(ctx, tag) 809 if err != nil { 810 return false, err 811 } 812 if len(tags.List) == 0 { 813 return false, nil 814 } 815 816 // NOTE: r.stat is very careful not to fetch commits that we shouldn't know 817 // about, like rejected GitHub pull requests, so don't try to short-circuit 818 // that here. 819 if _, err = r.stat(ctx, rev); err != nil { 820 return false, err 821 } 822 823 // Now fetch history so that git can search for a path. 824 unlock, err := r.mu.Lock() 825 if err != nil { 826 return false, err 827 } 828 defer unlock() 829 830 if r.fetchLevel < fetchAll { 831 // Fetch the complete history for all refs and heads. It would be more 832 // efficient to only fetch the history from rev to tag, but that's much more 833 // complicated, and any kind of shallow fetch is fairly likely to trigger 834 // bugs in JGit servers and/or the go command anyway. 835 if err := r.fetchRefsLocked(ctx); err != nil { 836 return false, err 837 } 838 } 839 840 _, err = Run(ctx, r.dir, "git", "merge-base", "--is-ancestor", "--", tag, rev) 841 if err == nil { 842 return true, nil 843 } 844 if ee, ok := err.(*RunError).Err.(*exec.ExitError); ok && ee.ExitCode() == 1 { 845 return false, nil 846 } 847 return false, err 848 } 849 850 func (r *gitRepo) ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) { 851 // TODO: Use maxSize or drop it. 852 args := []string{} 853 if subdir != "" { 854 args = append(args, "--", subdir) 855 } 856 info, err := r.Stat(ctx, rev) // download rev into local git repo 857 if err != nil { 858 return nil, err 859 } 860 861 unlock, err := r.mu.Lock() 862 if err != nil { 863 return nil, err 864 } 865 defer unlock() 866 867 if err := ensureGitAttributes(r.dir); err != nil { 868 return nil, err 869 } 870 871 // Incredibly, git produces different archives depending on whether 872 // it is running on a Windows system or not, in an attempt to normalize 873 // text file line endings. Setting -c core.autocrlf=input means only 874 // translate files on the way into the repo, not on the way out (archive). 875 // The -c core.eol=lf should be unnecessary but set it anyway. 876 archive, err := Run(ctx, r.dir, "git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", info.Name, args) 877 if err != nil { 878 if bytes.Contains(err.(*RunError).Stderr, []byte("did not match any files")) { 879 return nil, fs.ErrNotExist 880 } 881 return nil, err 882 } 883 884 return io.NopCloser(bytes.NewReader(archive)), nil 885 } 886 887 // ensureGitAttributes makes sure export-subst and export-ignore features are 888 // disabled for this repo. This is intended to be run prior to running git 889 // archive so that zip files are generated that produce consistent ziphashes 890 // for a given revision, independent of variables such as git version and the 891 // size of the repo. 892 // 893 // See: https://github.com/golang/go/issues/27153 894 func ensureGitAttributes(repoDir string) (err error) { 895 const attr = "\n* -export-subst -export-ignore\n" 896 897 d := repoDir + "/info" 898 p := d + "/attributes" 899 900 if err := os.MkdirAll(d, 0755); err != nil { 901 return err 902 } 903 904 f, err := os.OpenFile(p, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) 905 if err != nil { 906 return err 907 } 908 defer func() { 909 closeErr := f.Close() 910 if closeErr != nil { 911 err = closeErr 912 } 913 }() 914 915 b, err := io.ReadAll(f) 916 if err != nil { 917 return err 918 } 919 if !bytes.HasSuffix(b, []byte(attr)) { 920 _, err := f.WriteString(attr) 921 return err 922 } 923 924 return nil 925 }