github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/go/not-internal/get/vcs.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package get 6 7 import ( 8 "encoding/json" 9 "errors" 10 "fmt" 11 "github.com/gagliardetto/golang-go/not-internal/lazyregexp" 12 "github.com/gagliardetto/golang-go/not-internal/singleflight" 13 "log" 14 urlpkg "net/url" 15 "os" 16 "os/exec" 17 "path/filepath" 18 "regexp" 19 "strings" 20 "sync" 21 22 "github.com/gagliardetto/golang-go/cmd/go/not-internal/base" 23 "github.com/gagliardetto/golang-go/cmd/go/not-internal/cfg" 24 "github.com/gagliardetto/golang-go/cmd/go/not-internal/load" 25 "github.com/gagliardetto/golang-go/cmd/go/not-internal/web" 26 ) 27 28 // A vcsCmd describes how to use a version control system 29 // like Mercurial, Git, or Subversion. 30 type vcsCmd struct { 31 name string 32 cmd string // name of binary to invoke command 33 34 createCmd []string // commands to download a fresh copy of a repository 35 downloadCmd []string // commands to download updates into an existing repository 36 37 tagCmd []tagCmd // commands to list tags 38 tagLookupCmd []tagCmd // commands to lookup tags before running tagSyncCmd 39 tagSyncCmd []string // commands to sync to specific tag 40 tagSyncDefault []string // commands to sync to default tag 41 42 scheme []string 43 pingCmd string 44 45 remoteRepo func(v *vcsCmd, rootDir string) (remoteRepo string, err error) 46 resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) 47 } 48 49 var defaultSecureScheme = map[string]bool{ 50 "https": true, 51 "git+ssh": true, 52 "bzr+ssh": true, 53 "svn+ssh": true, 54 "ssh": true, 55 } 56 57 func (v *vcsCmd) isSecure(repo string) bool { 58 u, err := urlpkg.Parse(repo) 59 if err != nil { 60 // If repo is not a URL, it's not secure. 61 return false 62 } 63 return v.isSecureScheme(u.Scheme) 64 } 65 66 func (v *vcsCmd) isSecureScheme(scheme string) bool { 67 switch v.cmd { 68 case "git": 69 // GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a 70 // colon-separated list of schemes that are allowed to be used with git 71 // fetch/clone. Any scheme not mentioned will be considered insecure. 72 if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" { 73 for _, s := range strings.Split(allow, ":") { 74 if s == scheme { 75 return true 76 } 77 } 78 return false 79 } 80 } 81 return defaultSecureScheme[scheme] 82 } 83 84 // A tagCmd describes a command to list available tags 85 // that can be passed to tagSyncCmd. 86 type tagCmd struct { 87 cmd string // command to list tags 88 pattern string // regexp to extract tags from list 89 } 90 91 // vcsList lists the known version control systems 92 var vcsList = []*vcsCmd{ 93 vcsHg, 94 vcsGit, 95 vcsSvn, 96 vcsBzr, 97 vcsFossil, 98 } 99 100 // vcsByCmd returns the version control system for the given 101 // command name (hg, git, svn, bzr). 102 func vcsByCmd(cmd string) *vcsCmd { 103 for _, vcs := range vcsList { 104 if vcs.cmd == cmd { 105 return vcs 106 } 107 } 108 return nil 109 } 110 111 // vcsHg describes how to use Mercurial. 112 var vcsHg = &vcsCmd{ 113 name: "Mercurial", 114 cmd: "hg", 115 116 createCmd: []string{"clone -U -- {repo} {dir}"}, 117 downloadCmd: []string{"pull"}, 118 119 // We allow both tag and branch names as 'tags' 120 // for selecting a version. This lets people have 121 // a go.release.r60 branch and a go1 branch 122 // and make changes in both, without constantly 123 // editing .hgtags. 124 tagCmd: []tagCmd{ 125 {"tags", `^(\S+)`}, 126 {"branches", `^(\S+)`}, 127 }, 128 tagSyncCmd: []string{"update -r {tag}"}, 129 tagSyncDefault: []string{"update default"}, 130 131 scheme: []string{"https", "http", "ssh"}, 132 pingCmd: "identify -- {scheme}://{repo}", 133 remoteRepo: hgRemoteRepo, 134 } 135 136 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) { 137 out, err := vcsHg.runOutput(rootDir, "paths default") 138 if err != nil { 139 return "", err 140 } 141 return strings.TrimSpace(string(out)), nil 142 } 143 144 // vcsGit describes how to use Git. 145 var vcsGit = &vcsCmd{ 146 name: "Git", 147 cmd: "git", 148 149 createCmd: []string{"clone -- {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"}, 150 downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"}, 151 152 tagCmd: []tagCmd{ 153 // tags/xxx matches a git tag named xxx 154 // origin/xxx matches a git branch named xxx on the default remote repository 155 {"show-ref", `(?:tags|origin)/(\S+)$`}, 156 }, 157 tagLookupCmd: []tagCmd{ 158 {"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`}, 159 }, 160 tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"}, 161 // both createCmd and downloadCmd update the working dir. 162 // No need to do more here. We used to 'checkout master' 163 // but that doesn't work if the default branch is not named master. 164 // DO NOT add 'checkout master' here. 165 // See golang.org/issue/9032. 166 tagSyncDefault: []string{"submodule update --init --recursive"}, 167 168 scheme: []string{"git", "https", "http", "git+ssh", "ssh"}, 169 170 // Leave out the '--' separator in the ls-remote command: git 2.7.4 does not 171 // support such a separator for that command, and this use should be safe 172 // without it because the {scheme} value comes from the predefined list above. 173 // See golang.org/issue/33836. 174 pingCmd: "ls-remote {scheme}://{repo}", 175 176 remoteRepo: gitRemoteRepo, 177 } 178 179 // scpSyntaxRe matches the SCP-like addresses used by Git to access 180 // repositories by SSH. 181 var scpSyntaxRe = lazyregexp.New(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 182 183 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) { 184 cmd := "config remote.origin.url" 185 errParse := errors.New("unable to parse output of git " + cmd) 186 errRemoteOriginNotFound := errors.New("remote origin not found") 187 outb, err := vcsGit.run1(rootDir, cmd, nil, false) 188 if err != nil { 189 // if it doesn't output any message, it means the config argument is correct, 190 // but the config value itself doesn't exist 191 if outb != nil && len(outb) == 0 { 192 return "", errRemoteOriginNotFound 193 } 194 return "", err 195 } 196 out := strings.TrimSpace(string(outb)) 197 198 var repoURL *urlpkg.URL 199 if m := scpSyntaxRe.FindStringSubmatch(out); m != nil { 200 // Match SCP-like syntax and convert it to a URL. 201 // Eg, "git@github.com:user/repo" becomes 202 // "ssh://git@github.com/user/repo". 203 repoURL = &urlpkg.URL{ 204 Scheme: "ssh", 205 User: urlpkg.User(m[1]), 206 Host: m[2], 207 Path: m[3], 208 } 209 } else { 210 repoURL, err = urlpkg.Parse(out) 211 if err != nil { 212 return "", err 213 } 214 } 215 216 // Iterate over insecure schemes too, because this function simply 217 // reports the state of the repo. If we can't see insecure schemes then 218 // we can't report the actual repo URL. 219 for _, s := range vcsGit.scheme { 220 if repoURL.Scheme == s { 221 return repoURL.String(), nil 222 } 223 } 224 return "", errParse 225 } 226 227 // vcsBzr describes how to use Bazaar. 228 var vcsBzr = &vcsCmd{ 229 name: "Bazaar", 230 cmd: "bzr", 231 232 createCmd: []string{"branch -- {repo} {dir}"}, 233 234 // Without --overwrite bzr will not pull tags that changed. 235 // Replace by --overwrite-tags after http://pad.lv/681792 goes in. 236 downloadCmd: []string{"pull --overwrite"}, 237 238 tagCmd: []tagCmd{{"tags", `^(\S+)`}}, 239 tagSyncCmd: []string{"update -r {tag}"}, 240 tagSyncDefault: []string{"update -r revno:-1"}, 241 242 scheme: []string{"https", "http", "bzr", "bzr+ssh"}, 243 pingCmd: "info -- {scheme}://{repo}", 244 remoteRepo: bzrRemoteRepo, 245 resolveRepo: bzrResolveRepo, 246 } 247 248 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) { 249 outb, err := vcsBzr.runOutput(rootDir, "config parent_location") 250 if err != nil { 251 return "", err 252 } 253 return strings.TrimSpace(string(outb)), nil 254 } 255 256 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) { 257 outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo) 258 if err != nil { 259 return "", err 260 } 261 out := string(outb) 262 263 // Expect: 264 // ... 265 // (branch root|repository branch): <URL> 266 // ... 267 268 found := false 269 for _, prefix := range []string{"\n branch root: ", "\n repository branch: "} { 270 i := strings.Index(out, prefix) 271 if i >= 0 { 272 out = out[i+len(prefix):] 273 found = true 274 break 275 } 276 } 277 if !found { 278 return "", fmt.Errorf("unable to parse output of bzr info") 279 } 280 281 i := strings.Index(out, "\n") 282 if i < 0 { 283 return "", fmt.Errorf("unable to parse output of bzr info") 284 } 285 out = out[:i] 286 return strings.TrimSpace(out), nil 287 } 288 289 // vcsSvn describes how to use Subversion. 290 var vcsSvn = &vcsCmd{ 291 name: "Subversion", 292 cmd: "svn", 293 294 createCmd: []string{"checkout -- {repo} {dir}"}, 295 downloadCmd: []string{"update"}, 296 297 // There is no tag command in subversion. 298 // The branch information is all in the path names. 299 300 scheme: []string{"https", "http", "svn", "svn+ssh"}, 301 pingCmd: "info -- {scheme}://{repo}", 302 remoteRepo: svnRemoteRepo, 303 } 304 305 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) { 306 outb, err := vcsSvn.runOutput(rootDir, "info") 307 if err != nil { 308 return "", err 309 } 310 out := string(outb) 311 312 // Expect: 313 // 314 // ... 315 // URL: <URL> 316 // ... 317 // 318 // Note that we're not using the Repository Root line, 319 // because svn allows checking out subtrees. 320 // The URL will be the URL of the subtree (what we used with 'svn co') 321 // while the Repository Root may be a much higher parent. 322 i := strings.Index(out, "\nURL: ") 323 if i < 0 { 324 return "", fmt.Errorf("unable to parse output of svn info") 325 } 326 out = out[i+len("\nURL: "):] 327 i = strings.Index(out, "\n") 328 if i < 0 { 329 return "", fmt.Errorf("unable to parse output of svn info") 330 } 331 out = out[:i] 332 return strings.TrimSpace(out), nil 333 } 334 335 // fossilRepoName is the name go get associates with a fossil repository. In the 336 // real world the file can be named anything. 337 const fossilRepoName = ".fossil" 338 339 // vcsFossil describes how to use Fossil (fossil-scm.org) 340 var vcsFossil = &vcsCmd{ 341 name: "Fossil", 342 cmd: "fossil", 343 344 createCmd: []string{"-go-internal-mkdir {dir} clone -- {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"}, 345 downloadCmd: []string{"up"}, 346 347 tagCmd: []tagCmd{{"tag ls", `(.*)`}}, 348 tagSyncCmd: []string{"up tag:{tag}"}, 349 tagSyncDefault: []string{"up trunk"}, 350 351 scheme: []string{"https", "http"}, 352 remoteRepo: fossilRemoteRepo, 353 } 354 355 func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) { 356 out, err := vcsFossil.runOutput(rootDir, "remote-url") 357 if err != nil { 358 return "", err 359 } 360 return strings.TrimSpace(string(out)), nil 361 } 362 363 func (v *vcsCmd) String() string { 364 return v.name 365 } 366 367 // run runs the command line cmd in the given directory. 368 // keyval is a list of key, value pairs. run expands 369 // instances of {key} in cmd into value, but only after 370 // splitting cmd into individual arguments. 371 // If an error occurs, run prints the command line and the 372 // command's combined stdout+stderr to standard error. 373 // Otherwise run discards the command's output. 374 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error { 375 _, err := v.run1(dir, cmd, keyval, true) 376 return err 377 } 378 379 // runVerboseOnly is like run but only generates error output to standard error in verbose mode. 380 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error { 381 _, err := v.run1(dir, cmd, keyval, false) 382 return err 383 } 384 385 // runOutput is like run but returns the output of the command. 386 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) { 387 return v.run1(dir, cmd, keyval, true) 388 } 389 390 // run1 is the generalized implementation of run and runOutput. 391 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) { 392 m := make(map[string]string) 393 for i := 0; i < len(keyval); i += 2 { 394 m[keyval[i]] = keyval[i+1] 395 } 396 args := strings.Fields(cmdline) 397 for i, arg := range args { 398 args[i] = expand(m, arg) 399 } 400 401 if len(args) >= 2 && args[0] == "-go-internal-mkdir" { 402 var err error 403 if filepath.IsAbs(args[1]) { 404 err = os.Mkdir(args[1], os.ModePerm) 405 } else { 406 err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm) 407 } 408 if err != nil { 409 return nil, err 410 } 411 args = args[2:] 412 } 413 414 if len(args) >= 2 && args[0] == "-go-internal-cd" { 415 if filepath.IsAbs(args[1]) { 416 dir = args[1] 417 } else { 418 dir = filepath.Join(dir, args[1]) 419 } 420 args = args[2:] 421 } 422 423 _, err := exec.LookPath(v.cmd) 424 if err != nil { 425 fmt.Fprintf(os.Stderr, 426 "go: missing %s command. See https://golang.org/s/gogetcmd\n", 427 v.name) 428 return nil, err 429 } 430 431 cmd := exec.Command(v.cmd, args...) 432 cmd.Dir = dir 433 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 434 if cfg.BuildX { 435 fmt.Fprintf(os.Stderr, "cd %s\n", dir) 436 fmt.Fprintf(os.Stderr, "%s %s\n", v.cmd, strings.Join(args, " ")) 437 } 438 out, err := cmd.Output() 439 if err != nil { 440 if verbose || cfg.BuildV { 441 fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " ")) 442 if ee, ok := err.(*exec.ExitError); ok && len(ee.Stderr) > 0 { 443 os.Stderr.Write(ee.Stderr) 444 } else { 445 fmt.Fprintf(os.Stderr, err.Error()) 446 } 447 } 448 } 449 return out, err 450 } 451 452 // ping pings to determine scheme to use. 453 func (v *vcsCmd) ping(scheme, repo string) error { 454 return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo) 455 } 456 457 // create creates a new copy of repo in dir. 458 // The parent of dir must exist; dir must not. 459 func (v *vcsCmd) create(dir, repo string) error { 460 for _, cmd := range v.createCmd { 461 if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil { 462 return err 463 } 464 } 465 return nil 466 } 467 468 // download downloads any new changes for the repo in dir. 469 func (v *vcsCmd) download(dir string) error { 470 for _, cmd := range v.downloadCmd { 471 if err := v.run(dir, cmd); err != nil { 472 return err 473 } 474 } 475 return nil 476 } 477 478 // tags returns the list of available tags for the repo in dir. 479 func (v *vcsCmd) tags(dir string) ([]string, error) { 480 var tags []string 481 for _, tc := range v.tagCmd { 482 out, err := v.runOutput(dir, tc.cmd) 483 if err != nil { 484 return nil, err 485 } 486 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 487 for _, m := range re.FindAllStringSubmatch(string(out), -1) { 488 tags = append(tags, m[1]) 489 } 490 } 491 return tags, nil 492 } 493 494 // tagSync syncs the repo in dir to the named tag, 495 // which either is a tag returned by tags or is v.tagDefault. 496 func (v *vcsCmd) tagSync(dir, tag string) error { 497 if v.tagSyncCmd == nil { 498 return nil 499 } 500 if tag != "" { 501 for _, tc := range v.tagLookupCmd { 502 out, err := v.runOutput(dir, tc.cmd, "tag", tag) 503 if err != nil { 504 return err 505 } 506 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 507 m := re.FindStringSubmatch(string(out)) 508 if len(m) > 1 { 509 tag = m[1] 510 break 511 } 512 } 513 } 514 515 if tag == "" && v.tagSyncDefault != nil { 516 for _, cmd := range v.tagSyncDefault { 517 if err := v.run(dir, cmd); err != nil { 518 return err 519 } 520 } 521 return nil 522 } 523 524 for _, cmd := range v.tagSyncCmd { 525 if err := v.run(dir, cmd, "tag", tag); err != nil { 526 return err 527 } 528 } 529 return nil 530 } 531 532 // A vcsPath describes how to convert an import path into a 533 // version control system and repository name. 534 type vcsPath struct { 535 prefix string // prefix this description applies to 536 regexp *lazyregexp.Regexp // compiled pattern for import path 537 repo string // repository to use (expand with match of re) 538 vcs string // version control system to use (expand with match of re) 539 check func(match map[string]string) error // additional checks 540 schemelessRepo bool // if true, the repo pattern lacks a scheme 541 } 542 543 // vcsFromDir inspects dir and its parents to determine the 544 // version control system and code repository to use. 545 // On return, root is the import path 546 // corresponding to the root of the repository. 547 func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) { 548 // Clean and double-check that dir is in (a subdirectory of) srcRoot. 549 dir = filepath.Clean(dir) 550 srcRoot = filepath.Clean(srcRoot) 551 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 552 return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 553 } 554 555 var vcsRet *vcsCmd 556 var rootRet string 557 558 origDir := dir 559 for len(dir) > len(srcRoot) { 560 for _, vcs := range vcsList { 561 if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil { 562 root := filepath.ToSlash(dir[len(srcRoot)+1:]) 563 // Record first VCS we find, but keep looking, 564 // to detect mistakes like one kind of VCS inside another. 565 if vcsRet == nil { 566 vcsRet = vcs 567 rootRet = root 568 continue 569 } 570 // Allow .git inside .git, which can arise due to submodules. 571 if vcsRet == vcs && vcs.cmd == "git" { 572 continue 573 } 574 // Otherwise, we have one VCS inside a different VCS. 575 return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s", 576 filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd) 577 } 578 } 579 580 // Move to parent. 581 ndir := filepath.Dir(dir) 582 if len(ndir) >= len(dir) { 583 // Shouldn't happen, but just in case, stop. 584 break 585 } 586 dir = ndir 587 } 588 589 if vcsRet != nil { 590 return vcsRet, rootRet, nil 591 } 592 593 return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir) 594 } 595 596 // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS 597 // situation for dir, checking parents up until srcRoot. 598 func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error { 599 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 600 return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 601 } 602 603 otherDir := dir 604 for len(otherDir) > len(srcRoot) { 605 for _, otherVCS := range vcsList { 606 if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil { 607 // Allow expected vcs in original dir. 608 if otherDir == dir && otherVCS == vcs { 609 continue 610 } 611 // Allow .git inside .git, which can arise due to submodules. 612 if otherVCS == vcs && vcs.cmd == "git" { 613 continue 614 } 615 // Otherwise, we have one VCS inside a different VCS. 616 return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd) 617 } 618 } 619 // Move to parent. 620 newDir := filepath.Dir(otherDir) 621 if len(newDir) >= len(otherDir) { 622 // Shouldn't happen, but just in case, stop. 623 break 624 } 625 otherDir = newDir 626 } 627 628 return nil 629 } 630 631 // RepoRoot describes the repository root for a tree of source code. 632 type RepoRoot struct { 633 Repo string // repository URL, including scheme 634 Root string // import path corresponding to root of repo 635 IsCustom bool // defined by served <meta> tags (as opposed to hard-coded pattern) 636 VCS string // vcs type ("mod", "git", ...) 637 638 vcs *vcsCmd // internal: vcs command access 639 } 640 641 func httpPrefix(s string) string { 642 for _, prefix := range [...]string{"http:", "https:"} { 643 if strings.HasPrefix(s, prefix) { 644 return prefix 645 } 646 } 647 return "" 648 } 649 650 // ModuleMode specifies whether to prefer modules when looking up code sources. 651 type ModuleMode int 652 653 const ( 654 IgnoreMod ModuleMode = iota 655 PreferMod 656 ) 657 658 // RepoRootForImportPath analyzes importPath to determine the 659 // version control system, and code repository to use. 660 func RepoRootForImportPath(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) { 661 rr, err := repoRootFromVCSPaths(importPath, security, vcsPaths) 662 if err == errUnknownSite { 663 rr, err = repoRootForImportDynamic(importPath, mod, security) 664 if err != nil { 665 err = load.ImportErrorf(importPath, "unrecognized import path %q: %v", importPath, err) 666 } 667 } 668 if err != nil { 669 rr1, err1 := repoRootFromVCSPaths(importPath, security, vcsPathsAfterDynamic) 670 if err1 == nil { 671 rr = rr1 672 err = nil 673 } 674 } 675 676 // Should have been taken care of above, but make sure. 677 if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.Root, "...") { 678 // Do not allow wildcards in the repo root. 679 rr = nil 680 err = load.ImportErrorf(importPath, "cannot expand ... in %q", importPath) 681 } 682 return rr, err 683 } 684 685 var errUnknownSite = errors.New("dynamic lookup required to find mapping") 686 687 // repoRootFromVCSPaths attempts to map importPath to a repoRoot 688 // using the mappings defined in vcsPaths. 689 func repoRootFromVCSPaths(importPath string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) { 690 // A common error is to use https://packagepath because that's what 691 // hg and git require. Diagnose this helpfully. 692 if prefix := httpPrefix(importPath); prefix != "" { 693 // The importPath has been cleaned, so has only one slash. The pattern 694 // ignores the slashes; the error message puts them back on the RHS at least. 695 return nil, fmt.Errorf("%q not allowed in import path", prefix+"//") 696 } 697 for _, srv := range vcsPaths { 698 if !strings.HasPrefix(importPath, srv.prefix) { 699 continue 700 } 701 m := srv.regexp.FindStringSubmatch(importPath) 702 if m == nil { 703 if srv.prefix != "" { 704 return nil, load.ImportErrorf(importPath, "invalid %s import path %q", srv.prefix, importPath) 705 } 706 continue 707 } 708 709 // Build map of named subexpression matches for expand. 710 match := map[string]string{ 711 "prefix": srv.prefix, 712 "import": importPath, 713 } 714 for i, name := range srv.regexp.SubexpNames() { 715 if name != "" && match[name] == "" { 716 match[name] = m[i] 717 } 718 } 719 if srv.vcs != "" { 720 match["vcs"] = expand(match, srv.vcs) 721 } 722 if srv.repo != "" { 723 match["repo"] = expand(match, srv.repo) 724 } 725 if srv.check != nil { 726 if err := srv.check(match); err != nil { 727 return nil, err 728 } 729 } 730 vcs := vcsByCmd(match["vcs"]) 731 if vcs == nil { 732 return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) 733 } 734 var repoURL string 735 if !srv.schemelessRepo { 736 repoURL = match["repo"] 737 } else { 738 scheme := vcs.scheme[0] // default to first scheme 739 repo := match["repo"] 740 if vcs.pingCmd != "" { 741 // If we know how to test schemes, scan to find one. 742 for _, s := range vcs.scheme { 743 if security == web.SecureOnly && !vcs.isSecureScheme(s) { 744 continue 745 } 746 if vcs.ping(s, repo) == nil { 747 scheme = s 748 break 749 } 750 } 751 } 752 repoURL = scheme + "://" + repo 753 } 754 rr := &RepoRoot{ 755 Repo: repoURL, 756 Root: match["root"], 757 VCS: vcs.cmd, 758 vcs: vcs, 759 } 760 return rr, nil 761 } 762 return nil, errUnknownSite 763 } 764 765 // urlForImportPath returns a partially-populated URL for the given Go import path. 766 // 767 // The URL leaves the Scheme field blank so that web.Get will try any scheme 768 // allowed by the selected security mode. 769 func urlForImportPath(importPath string) (*urlpkg.URL, error) { 770 slash := strings.Index(importPath, "/") 771 if slash < 0 { 772 slash = len(importPath) 773 } 774 host, path := importPath[:slash], importPath[slash:] 775 if !strings.Contains(host, ".") { 776 return nil, errors.New("import path does not begin with hostname") 777 } 778 if len(path) == 0 { 779 path = "/" 780 } 781 return &urlpkg.URL{Host: host, Path: path, RawQuery: "go-get=1"}, nil 782 } 783 784 // repoRootForImportDynamic finds a *RepoRoot for a custom domain that's not 785 // statically known by repoRootForImportPathStatic. 786 // 787 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld". 788 func repoRootForImportDynamic(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) { 789 url, err := urlForImportPath(importPath) 790 if err != nil { 791 return nil, err 792 } 793 resp, err := web.Get(security, url) 794 if err != nil { 795 msg := "https fetch: %v" 796 if security == web.Insecure { 797 msg = "http/" + msg 798 } 799 return nil, fmt.Errorf(msg, err) 800 } 801 body := resp.Body 802 defer body.Close() 803 imports, err := parseMetaGoImports(body, mod) 804 if len(imports) == 0 { 805 if respErr := resp.Err(); respErr != nil { 806 // If the server's status was not OK, prefer to report that instead of 807 // an XML parse error. 808 return nil, respErr 809 } 810 } 811 if err != nil { 812 return nil, fmt.Errorf("parsing %s: %v", importPath, err) 813 } 814 // Find the matched meta import. 815 mmi, err := matchGoImport(imports, importPath) 816 if err != nil { 817 if _, ok := err.(ImportMismatchError); !ok { 818 return nil, fmt.Errorf("parse %s: %v", url, err) 819 } 820 return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", resp.URL, err) 821 } 822 if cfg.BuildV { 823 log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, url) 824 } 825 // If the import was "uni.edu/bob/project", which said the 826 // prefix was "uni.edu" and the RepoRoot was "evilroot.com", 827 // make sure we don't trust Bob and check out evilroot.com to 828 // "uni.edu" yet (possibly overwriting/preempting another 829 // non-evil student). Instead, first verify the root and see 830 // if it matches Bob's claim. 831 if mmi.Prefix != importPath { 832 if cfg.BuildV { 833 log.Printf("get %q: verifying non-authoritative meta tag", importPath) 834 } 835 var imports []metaImport 836 url, imports, err = metaImportsForPrefix(mmi.Prefix, mod, security) 837 if err != nil { 838 return nil, err 839 } 840 metaImport2, err := matchGoImport(imports, importPath) 841 if err != nil || mmi != metaImport2 { 842 return nil, fmt.Errorf("%s and %s disagree about go-import for %s", resp.URL, url, mmi.Prefix) 843 } 844 } 845 846 if err := validateRepoRoot(mmi.RepoRoot); err != nil { 847 return nil, fmt.Errorf("%s: invalid repo root %q: %v", resp.URL, mmi.RepoRoot, err) 848 } 849 vcs := vcsByCmd(mmi.VCS) 850 if vcs == nil && mmi.VCS != "mod" { 851 return nil, fmt.Errorf("%s: unknown vcs %q", resp.URL, mmi.VCS) 852 } 853 854 rr := &RepoRoot{ 855 Repo: mmi.RepoRoot, 856 Root: mmi.Prefix, 857 IsCustom: true, 858 VCS: mmi.VCS, 859 vcs: vcs, 860 } 861 return rr, nil 862 } 863 864 // validateRepoRoot returns an error if repoRoot does not seem to be 865 // a valid URL with scheme. 866 func validateRepoRoot(repoRoot string) error { 867 url, err := urlpkg.Parse(repoRoot) 868 if err != nil { 869 return err 870 } 871 if url.Scheme == "" { 872 return errors.New("no scheme") 873 } 874 if url.Scheme == "file" { 875 return errors.New("file scheme disallowed") 876 } 877 return nil 878 } 879 880 var fetchGroup singleflight.Group 881 var ( 882 fetchCacheMu sync.Mutex 883 fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix 884 ) 885 886 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag 887 // and returns its HTML discovery URL and the parsed metaImport lines 888 // found on the page. 889 // 890 // The importPath is of the form "golang.org/x/tools". 891 // It is an error if no imports are found. 892 // url will still be valid if err != nil. 893 // The returned url will be of the form "https://golang.org/x/tools?go-get=1" 894 func metaImportsForPrefix(importPrefix string, mod ModuleMode, security web.SecurityMode) (*urlpkg.URL, []metaImport, error) { 895 setCache := func(res fetchResult) (fetchResult, error) { 896 fetchCacheMu.Lock() 897 defer fetchCacheMu.Unlock() 898 fetchCache[importPrefix] = res 899 return res, nil 900 } 901 902 resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { 903 fetchCacheMu.Lock() 904 if res, ok := fetchCache[importPrefix]; ok { 905 fetchCacheMu.Unlock() 906 return res, nil 907 } 908 fetchCacheMu.Unlock() 909 910 url, err := urlForImportPath(importPrefix) 911 if err != nil { 912 return setCache(fetchResult{err: err}) 913 } 914 resp, err := web.Get(security, url) 915 if err != nil { 916 return setCache(fetchResult{url: url, err: fmt.Errorf("fetching %s: %v", importPrefix, err)}) 917 } 918 body := resp.Body 919 defer body.Close() 920 imports, err := parseMetaGoImports(body, mod) 921 if len(imports) == 0 { 922 if respErr := resp.Err(); respErr != nil { 923 // If the server's status was not OK, prefer to report that instead of 924 // an XML parse error. 925 return setCache(fetchResult{url: url, err: respErr}) 926 } 927 } 928 if err != nil { 929 return setCache(fetchResult{url: url, err: fmt.Errorf("parsing %s: %v", resp.URL, err)}) 930 } 931 if len(imports) == 0 { 932 err = fmt.Errorf("fetching %s: no go-import meta tag found in %s", importPrefix, resp.URL) 933 } 934 return setCache(fetchResult{url: url, imports: imports, err: err}) 935 }) 936 res := resi.(fetchResult) 937 return res.url, res.imports, res.err 938 } 939 940 type fetchResult struct { 941 url *urlpkg.URL 942 imports []metaImport 943 err error 944 } 945 946 // metaImport represents the parsed <meta name="go-import" 947 // content="prefix vcs reporoot" /> tags from HTML files. 948 type metaImport struct { 949 Prefix, VCS, RepoRoot string 950 } 951 952 // pathPrefix reports whether sub is a prefix of s, 953 // only considering entire path components. 954 func pathPrefix(s, sub string) bool { 955 // strings.HasPrefix is necessary but not sufficient. 956 if !strings.HasPrefix(s, sub) { 957 return false 958 } 959 // The remainder after the prefix must either be empty or start with a slash. 960 rem := s[len(sub):] 961 return rem == "" || rem[0] == '/' 962 } 963 964 // A ImportMismatchError is returned where metaImport/s are present 965 // but none match our import path. 966 type ImportMismatchError struct { 967 importPath string 968 mismatches []string // the meta imports that were discarded for not matching our importPath 969 } 970 971 func (m ImportMismatchError) Error() string { 972 formattedStrings := make([]string, len(m.mismatches)) 973 for i, pre := range m.mismatches { 974 formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath) 975 } 976 return strings.Join(formattedStrings, ", ") 977 } 978 979 // matchGoImport returns the metaImport from imports matching importPath. 980 // An error is returned if there are multiple matches. 981 // An ImportMismatchError is returned if none match. 982 func matchGoImport(imports []metaImport, importPath string) (metaImport, error) { 983 match := -1 984 985 errImportMismatch := ImportMismatchError{importPath: importPath} 986 for i, im := range imports { 987 if !pathPrefix(importPath, im.Prefix) { 988 errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix) 989 continue 990 } 991 992 if match >= 0 { 993 if imports[match].VCS == "mod" && im.VCS != "mod" { 994 // All the mod entries precede all the non-mod entries. 995 // We have a mod entry and don't care about the rest, 996 // matching or not. 997 break 998 } 999 return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath) 1000 } 1001 match = i 1002 } 1003 1004 if match == -1 { 1005 return metaImport{}, errImportMismatch 1006 } 1007 return imports[match], nil 1008 } 1009 1010 // expand rewrites s to replace {k} with match[k] for each key k in match. 1011 func expand(match map[string]string, s string) string { 1012 // We want to replace each match exactly once, and the result of expansion 1013 // must not depend on the iteration order through the map. 1014 // A strings.Replacer has exactly the properties we're looking for. 1015 oldNew := make([]string, 0, 2*len(match)) 1016 for k, v := range match { 1017 oldNew = append(oldNew, "{"+k+"}", v) 1018 } 1019 return strings.NewReplacer(oldNew...).Replace(s) 1020 } 1021 1022 // vcsPaths defines the meaning of import paths referring to 1023 // commonly-used VCS hosting sites (github.com/user/dir) 1024 // and import paths referring to a fully-qualified importPath 1025 // containing a VCS type (foo.com/repo.git/dir) 1026 var vcsPaths = []*vcsPath{ 1027 // Github 1028 { 1029 prefix: "github.com/", 1030 regexp: lazyregexp.New(`^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`), 1031 vcs: "git", 1032 repo: "https://{root}", 1033 check: noVCSSuffix, 1034 }, 1035 1036 // Bitbucket 1037 { 1038 prefix: "bitbucket.org/", 1039 regexp: lazyregexp.New(`^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`), 1040 repo: "https://{root}", 1041 check: bitbucketVCS, 1042 }, 1043 1044 // IBM DevOps Services (JazzHub) 1045 { 1046 prefix: "hub.jazz.net/git/", 1047 regexp: lazyregexp.New(`^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`), 1048 vcs: "git", 1049 repo: "https://{root}", 1050 check: noVCSSuffix, 1051 }, 1052 1053 // Git at Apache 1054 { 1055 prefix: "git.apache.org/", 1056 regexp: lazyregexp.New(`^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`), 1057 vcs: "git", 1058 repo: "https://{root}", 1059 }, 1060 1061 // Git at OpenStack 1062 { 1063 prefix: "git.openstack.org/", 1064 regexp: lazyregexp.New(`^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`), 1065 vcs: "git", 1066 repo: "https://{root}", 1067 }, 1068 1069 // chiselapp.com for fossil 1070 { 1071 prefix: "chiselapp.com/", 1072 regexp: lazyregexp.New(`^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`), 1073 vcs: "fossil", 1074 repo: "https://{root}", 1075 }, 1076 1077 // General syntax for any server. 1078 // Must be last. 1079 { 1080 regexp: lazyregexp.New(`(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`), 1081 schemelessRepo: true, 1082 }, 1083 } 1084 1085 // vcsPathsAfterDynamic gives additional vcsPaths entries 1086 // to try after the dynamic HTML check. 1087 // This gives those sites a chance to introduce <meta> tags 1088 // as part of a graceful transition away from the hard-coded logic. 1089 var vcsPathsAfterDynamic = []*vcsPath{ 1090 // Launchpad. See golang.org/issue/11436. 1091 { 1092 prefix: "launchpad.net/", 1093 regexp: lazyregexp.New(`^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`), 1094 vcs: "bzr", 1095 repo: "https://{root}", 1096 check: launchpadVCS, 1097 }, 1098 } 1099 1100 // noVCSSuffix checks that the repository name does not 1101 // end in .foo for any version control system foo. 1102 // The usual culprit is ".git". 1103 func noVCSSuffix(match map[string]string) error { 1104 repo := match["repo"] 1105 for _, vcs := range vcsList { 1106 if strings.HasSuffix(repo, "."+vcs.cmd) { 1107 return fmt.Errorf("invalid version control suffix in %s path", match["prefix"]) 1108 } 1109 } 1110 return nil 1111 } 1112 1113 // bitbucketVCS determines the version control system for a 1114 // Bitbucket repository, by using the Bitbucket API. 1115 func bitbucketVCS(match map[string]string) error { 1116 if err := noVCSSuffix(match); err != nil { 1117 return err 1118 } 1119 1120 var resp struct { 1121 SCM string `json:"scm"` 1122 } 1123 url := &urlpkg.URL{ 1124 Scheme: "https", 1125 Host: "api.bitbucket.org", 1126 Path: expand(match, "/2.0/repositories/{bitname}"), 1127 RawQuery: "fields=scm", 1128 } 1129 data, err := web.GetBytes(url) 1130 if err != nil { 1131 if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 { 1132 // this may be a private repository. If so, attempt to determine which 1133 // VCS it uses. See issue 5375. 1134 root := match["root"] 1135 for _, vcs := range []string{"git", "hg"} { 1136 if vcsByCmd(vcs).ping("https", root) == nil { 1137 resp.SCM = vcs 1138 break 1139 } 1140 } 1141 } 1142 1143 if resp.SCM == "" { 1144 return err 1145 } 1146 } else { 1147 if err := json.Unmarshal(data, &resp); err != nil { 1148 return fmt.Errorf("decoding %s: %v", url, err) 1149 } 1150 } 1151 1152 if vcsByCmd(resp.SCM) != nil { 1153 match["vcs"] = resp.SCM 1154 if resp.SCM == "git" { 1155 match["repo"] += ".git" 1156 } 1157 return nil 1158 } 1159 1160 return fmt.Errorf("unable to detect version control system for bitbucket.org/ path") 1161 } 1162 1163 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case, 1164 // "foo" could be a series name registered in Launchpad with its own branch, 1165 // and it could also be the name of a directory within the main project 1166 // branch one level up. 1167 func launchpadVCS(match map[string]string) error { 1168 if match["project"] == "" || match["series"] == "" { 1169 return nil 1170 } 1171 url := &urlpkg.URL{ 1172 Scheme: "https", 1173 Host: "code.launchpad.net", 1174 Path: expand(match, "/{project}{series}/.bzr/branch-format"), 1175 } 1176 _, err := web.GetBytes(url) 1177 if err != nil { 1178 match["root"] = expand(match, "launchpad.net/{project}") 1179 match["repo"] = expand(match, "https://{root}") 1180 } 1181 return nil 1182 }