github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/cmd/go/internal/get/vcs.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package get 6 7 import ( 8 "encoding/json" 9 "errors" 10 "fmt" 11 "internal/singleflight" 12 "log" 13 "net/url" 14 "os" 15 "os/exec" 16 "path/filepath" 17 "regexp" 18 "strings" 19 "sync" 20 21 "cmd/go/internal/base" 22 "cmd/go/internal/cfg" 23 "cmd/go/internal/web" 24 ) 25 26 // A vcsCmd describes how to use a version control system 27 // like Mercurial, Git, or Subversion. 28 type vcsCmd struct { 29 name string 30 cmd string // name of binary to invoke command 31 32 createCmd []string // commands to download a fresh copy of a repository 33 downloadCmd []string // commands to download updates into an existing repository 34 35 tagCmd []tagCmd // commands to list tags 36 tagLookupCmd []tagCmd // commands to lookup tags before running tagSyncCmd 37 tagSyncCmd []string // commands to sync to specific tag 38 tagSyncDefault []string // commands to sync to default tag 39 40 scheme []string 41 pingCmd string 42 43 remoteRepo func(v *vcsCmd, rootDir string) (remoteRepo string, err error) 44 resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) 45 } 46 47 var defaultSecureScheme = map[string]bool{ 48 "https": true, 49 "git+ssh": true, 50 "bzr+ssh": true, 51 "svn+ssh": true, 52 "ssh": true, 53 } 54 55 func (v *vcsCmd) isSecure(repo string) bool { 56 u, err := url.Parse(repo) 57 if err != nil { 58 // If repo is not a URL, it's not secure. 59 return false 60 } 61 return v.isSecureScheme(u.Scheme) 62 } 63 64 func (v *vcsCmd) isSecureScheme(scheme string) bool { 65 switch v.cmd { 66 case "git": 67 // GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a 68 // colon-separated list of schemes that are allowed to be used with git 69 // fetch/clone. Any scheme not mentioned will be considered insecure. 70 if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" { 71 for _, s := range strings.Split(allow, ":") { 72 if s == scheme { 73 return true 74 } 75 } 76 return false 77 } 78 } 79 return defaultSecureScheme[scheme] 80 } 81 82 // A tagCmd describes a command to list available tags 83 // that can be passed to tagSyncCmd. 84 type tagCmd struct { 85 cmd string // command to list tags 86 pattern string // regexp to extract tags from list 87 } 88 89 // vcsList lists the known version control systems 90 var vcsList = []*vcsCmd{ 91 vcsHg, 92 vcsGit, 93 vcsSvn, 94 vcsBzr, 95 vcsFossil, 96 } 97 98 // vcsByCmd returns the version control system for the given 99 // command name (hg, git, svn, bzr). 100 func vcsByCmd(cmd string) *vcsCmd { 101 for _, vcs := range vcsList { 102 if vcs.cmd == cmd { 103 return vcs 104 } 105 } 106 return nil 107 } 108 109 // vcsHg describes how to use Mercurial. 110 var vcsHg = &vcsCmd{ 111 name: "Mercurial", 112 cmd: "hg", 113 114 createCmd: []string{"clone -U {repo} {dir}"}, 115 downloadCmd: []string{"pull"}, 116 117 // We allow both tag and branch names as 'tags' 118 // for selecting a version. This lets people have 119 // a go.release.r60 branch and a go1 branch 120 // and make changes in both, without constantly 121 // editing .hgtags. 122 tagCmd: []tagCmd{ 123 {"tags", `^(\S+)`}, 124 {"branches", `^(\S+)`}, 125 }, 126 tagSyncCmd: []string{"update -r {tag}"}, 127 tagSyncDefault: []string{"update default"}, 128 129 scheme: []string{"https", "http", "ssh"}, 130 pingCmd: "identify {scheme}://{repo}", 131 remoteRepo: hgRemoteRepo, 132 } 133 134 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) { 135 out, err := vcsHg.runOutput(rootDir, "paths default") 136 if err != nil { 137 return "", err 138 } 139 return strings.TrimSpace(string(out)), nil 140 } 141 142 // vcsGit describes how to use Git. 143 var vcsGit = &vcsCmd{ 144 name: "Git", 145 cmd: "git", 146 147 createCmd: []string{"clone {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"}, 148 downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"}, 149 150 tagCmd: []tagCmd{ 151 // tags/xxx matches a git tag named xxx 152 // origin/xxx matches a git branch named xxx on the default remote repository 153 {"show-ref", `(?:tags|origin)/(\S+)$`}, 154 }, 155 tagLookupCmd: []tagCmd{ 156 {"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`}, 157 }, 158 tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"}, 159 // both createCmd and downloadCmd update the working dir. 160 // No need to do more here. We used to 'checkout master' 161 // but that doesn't work if the default branch is not named master. 162 // DO NOT add 'checkout master' here. 163 // See golang.org/issue/9032. 164 tagSyncDefault: []string{"submodule update --init --recursive"}, 165 166 scheme: []string{"git", "https", "http", "git+ssh", "ssh"}, 167 pingCmd: "ls-remote {scheme}://{repo}", 168 remoteRepo: gitRemoteRepo, 169 } 170 171 // scpSyntaxRe matches the SCP-like addresses used by Git to access 172 // repositories by SSH. 173 var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 174 175 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) { 176 cmd := "config remote.origin.url" 177 errParse := errors.New("unable to parse output of git " + cmd) 178 errRemoteOriginNotFound := errors.New("remote origin not found") 179 outb, err := vcsGit.run1(rootDir, cmd, nil, false) 180 if err != nil { 181 // if it doesn't output any message, it means the config argument is correct, 182 // but the config value itself doesn't exist 183 if outb != nil && len(outb) == 0 { 184 return "", errRemoteOriginNotFound 185 } 186 return "", err 187 } 188 out := strings.TrimSpace(string(outb)) 189 190 var repoURL *url.URL 191 if m := scpSyntaxRe.FindStringSubmatch(out); m != nil { 192 // Match SCP-like syntax and convert it to a URL. 193 // Eg, "git@github.com:user/repo" becomes 194 // "ssh://git@github.com/user/repo". 195 repoURL = &url.URL{ 196 Scheme: "ssh", 197 User: url.User(m[1]), 198 Host: m[2], 199 Path: m[3], 200 } 201 } else { 202 repoURL, err = url.Parse(out) 203 if err != nil { 204 return "", err 205 } 206 } 207 208 // Iterate over insecure schemes too, because this function simply 209 // reports the state of the repo. If we can't see insecure schemes then 210 // we can't report the actual repo URL. 211 for _, s := range vcsGit.scheme { 212 if repoURL.Scheme == s { 213 return repoURL.String(), nil 214 } 215 } 216 return "", errParse 217 } 218 219 // vcsBzr describes how to use Bazaar. 220 var vcsBzr = &vcsCmd{ 221 name: "Bazaar", 222 cmd: "bzr", 223 224 createCmd: []string{"branch {repo} {dir}"}, 225 226 // Without --overwrite bzr will not pull tags that changed. 227 // Replace by --overwrite-tags after http://pad.lv/681792 goes in. 228 downloadCmd: []string{"pull --overwrite"}, 229 230 tagCmd: []tagCmd{{"tags", `^(\S+)`}}, 231 tagSyncCmd: []string{"update -r {tag}"}, 232 tagSyncDefault: []string{"update -r revno:-1"}, 233 234 scheme: []string{"https", "http", "bzr", "bzr+ssh"}, 235 pingCmd: "info {scheme}://{repo}", 236 remoteRepo: bzrRemoteRepo, 237 resolveRepo: bzrResolveRepo, 238 } 239 240 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) { 241 outb, err := vcsBzr.runOutput(rootDir, "config parent_location") 242 if err != nil { 243 return "", err 244 } 245 return strings.TrimSpace(string(outb)), nil 246 } 247 248 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) { 249 outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo) 250 if err != nil { 251 return "", err 252 } 253 out := string(outb) 254 255 // Expect: 256 // ... 257 // (branch root|repository branch): <URL> 258 // ... 259 260 found := false 261 for _, prefix := range []string{"\n branch root: ", "\n repository branch: "} { 262 i := strings.Index(out, prefix) 263 if i >= 0 { 264 out = out[i+len(prefix):] 265 found = true 266 break 267 } 268 } 269 if !found { 270 return "", fmt.Errorf("unable to parse output of bzr info") 271 } 272 273 i := strings.Index(out, "\n") 274 if i < 0 { 275 return "", fmt.Errorf("unable to parse output of bzr info") 276 } 277 out = out[:i] 278 return strings.TrimSpace(out), nil 279 } 280 281 // vcsSvn describes how to use Subversion. 282 var vcsSvn = &vcsCmd{ 283 name: "Subversion", 284 cmd: "svn", 285 286 createCmd: []string{"checkout {repo} {dir}"}, 287 downloadCmd: []string{"update"}, 288 289 // There is no tag command in subversion. 290 // The branch information is all in the path names. 291 292 scheme: []string{"https", "http", "svn", "svn+ssh"}, 293 pingCmd: "info {scheme}://{repo}", 294 remoteRepo: svnRemoteRepo, 295 } 296 297 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) { 298 outb, err := vcsSvn.runOutput(rootDir, "info") 299 if err != nil { 300 return "", err 301 } 302 out := string(outb) 303 304 // Expect: 305 // 306 // ... 307 // URL: <URL> 308 // ... 309 // 310 // Note that we're not using the Repository Root line, 311 // because svn allows checking out subtrees. 312 // The URL will be the URL of the subtree (what we used with 'svn co') 313 // while the Repository Root may be a much higher parent. 314 i := strings.Index(out, "\nURL: ") 315 if i < 0 { 316 return "", fmt.Errorf("unable to parse output of svn info") 317 } 318 out = out[i+len("\nURL: "):] 319 i = strings.Index(out, "\n") 320 if i < 0 { 321 return "", fmt.Errorf("unable to parse output of svn info") 322 } 323 out = out[:i] 324 return strings.TrimSpace(out), nil 325 } 326 327 // fossilRepoName is the name go get associates with a fossil repository. In the 328 // real world the file can be named anything. 329 const fossilRepoName = ".fossil" 330 331 // vcsFossil describes how to use Fossil (fossil-scm.org) 332 var vcsFossil = &vcsCmd{ 333 name: "Fossil", 334 cmd: "fossil", 335 336 createCmd: []string{"-go-internal-mkdir {dir} clone {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"}, 337 downloadCmd: []string{"up"}, 338 339 tagCmd: []tagCmd{{"tag ls", `(.*)`}}, 340 tagSyncCmd: []string{"up tag:{tag}"}, 341 tagSyncDefault: []string{"up trunk"}, 342 343 scheme: []string{"https", "http"}, 344 remoteRepo: fossilRemoteRepo, 345 } 346 347 func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) { 348 out, err := vcsFossil.runOutput(rootDir, "remote-url") 349 if err != nil { 350 return "", err 351 } 352 return strings.TrimSpace(string(out)), nil 353 } 354 355 func (v *vcsCmd) String() string { 356 return v.name 357 } 358 359 // run runs the command line cmd in the given directory. 360 // keyval is a list of key, value pairs. run expands 361 // instances of {key} in cmd into value, but only after 362 // splitting cmd into individual arguments. 363 // If an error occurs, run prints the command line and the 364 // command's combined stdout+stderr to standard error. 365 // Otherwise run discards the command's output. 366 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error { 367 _, err := v.run1(dir, cmd, keyval, true) 368 return err 369 } 370 371 // runVerboseOnly is like run but only generates error output to standard error in verbose mode. 372 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error { 373 _, err := v.run1(dir, cmd, keyval, false) 374 return err 375 } 376 377 // runOutput is like run but returns the output of the command. 378 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) { 379 return v.run1(dir, cmd, keyval, true) 380 } 381 382 // run1 is the generalized implementation of run and runOutput. 383 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) { 384 m := make(map[string]string) 385 for i := 0; i < len(keyval); i += 2 { 386 m[keyval[i]] = keyval[i+1] 387 } 388 args := strings.Fields(cmdline) 389 for i, arg := range args { 390 args[i] = expand(m, arg) 391 } 392 393 if len(args) >= 2 && args[0] == "-go-internal-mkdir" { 394 var err error 395 if filepath.IsAbs(args[1]) { 396 err = os.Mkdir(args[1], os.ModePerm) 397 } else { 398 err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm) 399 } 400 if err != nil { 401 return nil, err 402 } 403 args = args[2:] 404 } 405 406 if len(args) >= 2 && args[0] == "-go-internal-cd" { 407 if filepath.IsAbs(args[1]) { 408 dir = args[1] 409 } else { 410 dir = filepath.Join(dir, args[1]) 411 } 412 args = args[2:] 413 } 414 415 _, err := exec.LookPath(v.cmd) 416 if err != nil { 417 fmt.Fprintf(os.Stderr, 418 "go: missing %s command. See https://golang.org/s/gogetcmd\n", 419 v.name) 420 return nil, err 421 } 422 423 cmd := exec.Command(v.cmd, args...) 424 cmd.Dir = dir 425 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 426 if cfg.BuildX { 427 fmt.Printf("cd %s\n", dir) 428 fmt.Printf("%s %s\n", v.cmd, strings.Join(args, " ")) 429 } 430 out, err := cmd.Output() 431 if err != nil { 432 if verbose || cfg.BuildV { 433 fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " ")) 434 if ee, ok := err.(*exec.ExitError); ok && len(ee.Stderr) > 0 { 435 os.Stderr.Write(ee.Stderr) 436 } else { 437 fmt.Fprintf(os.Stderr, err.Error()) 438 } 439 } 440 } 441 return out, err 442 } 443 444 // ping pings to determine scheme to use. 445 func (v *vcsCmd) ping(scheme, repo string) error { 446 return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo) 447 } 448 449 // create creates a new copy of repo in dir. 450 // The parent of dir must exist; dir must not. 451 func (v *vcsCmd) create(dir, repo string) error { 452 for _, cmd := range v.createCmd { 453 if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil { 454 return err 455 } 456 } 457 return nil 458 } 459 460 // download downloads any new changes for the repo in dir. 461 func (v *vcsCmd) download(dir string) error { 462 for _, cmd := range v.downloadCmd { 463 if err := v.run(dir, cmd); err != nil { 464 return err 465 } 466 } 467 return nil 468 } 469 470 // tags returns the list of available tags for the repo in dir. 471 func (v *vcsCmd) tags(dir string) ([]string, error) { 472 var tags []string 473 for _, tc := range v.tagCmd { 474 out, err := v.runOutput(dir, tc.cmd) 475 if err != nil { 476 return nil, err 477 } 478 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 479 for _, m := range re.FindAllStringSubmatch(string(out), -1) { 480 tags = append(tags, m[1]) 481 } 482 } 483 return tags, nil 484 } 485 486 // tagSync syncs the repo in dir to the named tag, 487 // which either is a tag returned by tags or is v.tagDefault. 488 func (v *vcsCmd) tagSync(dir, tag string) error { 489 if v.tagSyncCmd == nil { 490 return nil 491 } 492 if tag != "" { 493 for _, tc := range v.tagLookupCmd { 494 out, err := v.runOutput(dir, tc.cmd, "tag", tag) 495 if err != nil { 496 return err 497 } 498 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 499 m := re.FindStringSubmatch(string(out)) 500 if len(m) > 1 { 501 tag = m[1] 502 break 503 } 504 } 505 } 506 507 if tag == "" && v.tagSyncDefault != nil { 508 for _, cmd := range v.tagSyncDefault { 509 if err := v.run(dir, cmd); err != nil { 510 return err 511 } 512 } 513 return nil 514 } 515 516 for _, cmd := range v.tagSyncCmd { 517 if err := v.run(dir, cmd, "tag", tag); err != nil { 518 return err 519 } 520 } 521 return nil 522 } 523 524 // A vcsPath describes how to convert an import path into a 525 // version control system and repository name. 526 type vcsPath struct { 527 prefix string // prefix this description applies to 528 re string // pattern for import path 529 repo string // repository to use (expand with match of re) 530 vcs string // version control system to use (expand with match of re) 531 check func(match map[string]string) error // additional checks 532 ping bool // ping for scheme to use to download repo 533 534 regexp *regexp.Regexp // cached compiled form of re 535 } 536 537 // vcsFromDir inspects dir and its parents to determine the 538 // version control system and code repository to use. 539 // On return, root is the import path 540 // corresponding to the root of the repository. 541 func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) { 542 // Clean and double-check that dir is in (a subdirectory of) srcRoot. 543 dir = filepath.Clean(dir) 544 srcRoot = filepath.Clean(srcRoot) 545 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 546 return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 547 } 548 549 var vcsRet *vcsCmd 550 var rootRet string 551 552 origDir := dir 553 for len(dir) > len(srcRoot) { 554 for _, vcs := range vcsList { 555 if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil { 556 root := filepath.ToSlash(dir[len(srcRoot)+1:]) 557 // Record first VCS we find, but keep looking, 558 // to detect mistakes like one kind of VCS inside another. 559 if vcsRet == nil { 560 vcsRet = vcs 561 rootRet = root 562 continue 563 } 564 // Allow .git inside .git, which can arise due to submodules. 565 if vcsRet == vcs && vcs.cmd == "git" { 566 continue 567 } 568 // Otherwise, we have one VCS inside a different VCS. 569 return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s", 570 filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd) 571 } 572 } 573 574 // Move to parent. 575 ndir := filepath.Dir(dir) 576 if len(ndir) >= len(dir) { 577 // Shouldn't happen, but just in case, stop. 578 break 579 } 580 dir = ndir 581 } 582 583 if vcsRet != nil { 584 return vcsRet, rootRet, nil 585 } 586 587 return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir) 588 } 589 590 // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS 591 // situation for dir, checking parents up until srcRoot. 592 func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error { 593 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 594 return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 595 } 596 597 otherDir := dir 598 for len(otherDir) > len(srcRoot) { 599 for _, otherVCS := range vcsList { 600 if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil { 601 // Allow expected vcs in original dir. 602 if otherDir == dir && otherVCS == vcs { 603 continue 604 } 605 // Allow .git inside .git, which can arise due to submodules. 606 if otherVCS == vcs && vcs.cmd == "git" { 607 continue 608 } 609 // Otherwise, we have one VCS inside a different VCS. 610 return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd) 611 } 612 } 613 // Move to parent. 614 newDir := filepath.Dir(otherDir) 615 if len(newDir) >= len(otherDir) { 616 // Shouldn't happen, but just in case, stop. 617 break 618 } 619 otherDir = newDir 620 } 621 622 return nil 623 } 624 625 // RepoRoot describes the repository root for a tree of source code. 626 type RepoRoot struct { 627 Repo string // repository URL, including scheme 628 Root string // import path corresponding to root of repo 629 IsCustom bool // defined by served <meta> tags (as opposed to hard-coded pattern) 630 VCS string // vcs type ("mod", "git", ...) 631 632 vcs *vcsCmd // internal: vcs command access 633 } 634 635 var httpPrefixRE = regexp.MustCompile(`^https?:`) 636 637 // ModuleMode specifies whether to prefer modules when looking up code sources. 638 type ModuleMode int 639 640 const ( 641 IgnoreMod ModuleMode = iota 642 PreferMod 643 ) 644 645 // RepoRootForImportPath analyzes importPath to determine the 646 // version control system, and code repository to use. 647 func RepoRootForImportPath(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) { 648 rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths) 649 if err == errUnknownSite { 650 // If there are wildcards, look up the thing before the wildcard, 651 // hoping it applies to the wildcarded parts too. 652 // This makes 'go get rsc.io/pdf/...' work in a fresh GOPATH. 653 lookup := strings.TrimSuffix(importPath, "/...") 654 if i := strings.Index(lookup, "/.../"); i >= 0 { 655 lookup = lookup[:i] 656 } 657 rr, err = repoRootForImportDynamic(lookup, mod, security) 658 if err != nil { 659 err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err) 660 } 661 } 662 if err != nil { 663 rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic) 664 if err1 == nil { 665 rr = rr1 666 err = nil 667 } 668 } 669 670 if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.Root, "...") { 671 // Do not allow wildcards in the repo root. 672 rr = nil 673 err = fmt.Errorf("cannot expand ... in %q", importPath) 674 } 675 return rr, err 676 } 677 678 var errUnknownSite = errors.New("dynamic lookup required to find mapping") 679 680 // repoRootFromVCSPaths attempts to map importPath to a repoRoot 681 // using the mappings defined in vcsPaths. 682 // If scheme is non-empty, that scheme is forced. 683 func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) { 684 // A common error is to use https://packagepath because that's what 685 // hg and git require. Diagnose this helpfully. 686 if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil { 687 // The importPath has been cleaned, so has only one slash. The pattern 688 // ignores the slashes; the error message puts them back on the RHS at least. 689 return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//") 690 } 691 for _, srv := range vcsPaths { 692 if !strings.HasPrefix(importPath, srv.prefix) { 693 continue 694 } 695 m := srv.regexp.FindStringSubmatch(importPath) 696 if m == nil { 697 if srv.prefix != "" { 698 return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) 699 } 700 continue 701 } 702 703 // Build map of named subexpression matches for expand. 704 match := map[string]string{ 705 "prefix": srv.prefix, 706 "import": importPath, 707 } 708 for i, name := range srv.regexp.SubexpNames() { 709 if name != "" && match[name] == "" { 710 match[name] = m[i] 711 } 712 } 713 if srv.vcs != "" { 714 match["vcs"] = expand(match, srv.vcs) 715 } 716 if srv.repo != "" { 717 match["repo"] = expand(match, srv.repo) 718 } 719 if srv.check != nil { 720 if err := srv.check(match); err != nil { 721 return nil, err 722 } 723 } 724 vcs := vcsByCmd(match["vcs"]) 725 if vcs == nil { 726 return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) 727 } 728 if srv.ping { 729 if scheme != "" { 730 match["repo"] = scheme + "://" + match["repo"] 731 } else { 732 for _, scheme := range vcs.scheme { 733 if security == web.Secure && !vcs.isSecureScheme(scheme) { 734 continue 735 } 736 if vcs.pingCmd != "" && vcs.ping(scheme, match["repo"]) == nil { 737 match["repo"] = scheme + "://" + match["repo"] 738 goto Found 739 } 740 } 741 // No scheme found. Fall back to the first one. 742 match["repo"] = vcs.scheme[0] + "://" + match["repo"] 743 Found: 744 } 745 } 746 rr := &RepoRoot{ 747 Repo: match["repo"], 748 Root: match["root"], 749 VCS: vcs.cmd, 750 vcs: vcs, 751 } 752 return rr, nil 753 } 754 return nil, errUnknownSite 755 } 756 757 // repoRootForImportDynamic finds a *RepoRoot for a custom domain that's not 758 // statically known by repoRootForImportPathStatic. 759 // 760 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld". 761 func repoRootForImportDynamic(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) { 762 slash := strings.Index(importPath, "/") 763 if slash < 0 { 764 slash = len(importPath) 765 } 766 host := importPath[:slash] 767 if !strings.Contains(host, ".") { 768 return nil, errors.New("import path does not begin with hostname") 769 } 770 urlStr, body, err := web.GetMaybeInsecure(importPath, security) 771 if err != nil { 772 msg := "https fetch: %v" 773 if security == web.Insecure { 774 msg = "http/" + msg 775 } 776 return nil, fmt.Errorf(msg, err) 777 } 778 defer body.Close() 779 imports, err := parseMetaGoImports(body, mod) 780 if err != nil { 781 return nil, fmt.Errorf("parsing %s: %v", importPath, err) 782 } 783 // Find the matched meta import. 784 mmi, err := matchGoImport(imports, importPath) 785 if err != nil { 786 if _, ok := err.(ImportMismatchError); !ok { 787 return nil, fmt.Errorf("parse %s: %v", urlStr, err) 788 } 789 return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", urlStr, err) 790 } 791 if cfg.BuildV { 792 log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr) 793 } 794 // If the import was "uni.edu/bob/project", which said the 795 // prefix was "uni.edu" and the RepoRoot was "evilroot.com", 796 // make sure we don't trust Bob and check out evilroot.com to 797 // "uni.edu" yet (possibly overwriting/preempting another 798 // non-evil student). Instead, first verify the root and see 799 // if it matches Bob's claim. 800 if mmi.Prefix != importPath { 801 if cfg.BuildV { 802 log.Printf("get %q: verifying non-authoritative meta tag", importPath) 803 } 804 urlStr0 := urlStr 805 var imports []metaImport 806 urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, mod, security) 807 if err != nil { 808 return nil, err 809 } 810 metaImport2, err := matchGoImport(imports, importPath) 811 if err != nil || mmi != metaImport2 { 812 return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix) 813 } 814 } 815 816 if err := validateRepoRoot(mmi.RepoRoot); err != nil { 817 return nil, fmt.Errorf("%s: invalid repo root %q: %v", urlStr, mmi.RepoRoot, err) 818 } 819 vcs := vcsByCmd(mmi.VCS) 820 if vcs == nil && mmi.VCS != "mod" { 821 return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS) 822 } 823 824 rr := &RepoRoot{ 825 Repo: mmi.RepoRoot, 826 Root: mmi.Prefix, 827 IsCustom: true, 828 VCS: mmi.VCS, 829 vcs: vcs, 830 } 831 return rr, nil 832 } 833 834 // validateRepoRoot returns an error if repoRoot does not seem to be 835 // a valid URL with scheme. 836 func validateRepoRoot(repoRoot string) error { 837 url, err := url.Parse(repoRoot) 838 if err != nil { 839 return err 840 } 841 if url.Scheme == "" { 842 return errors.New("no scheme") 843 } 844 return nil 845 } 846 847 var fetchGroup singleflight.Group 848 var ( 849 fetchCacheMu sync.Mutex 850 fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix 851 ) 852 853 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag 854 // and returns its HTML discovery URL and the parsed metaImport lines 855 // found on the page. 856 // 857 // The importPath is of the form "golang.org/x/tools". 858 // It is an error if no imports are found. 859 // urlStr will still be valid if err != nil. 860 // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1" 861 func metaImportsForPrefix(importPrefix string, mod ModuleMode, security web.SecurityMode) (urlStr string, imports []metaImport, err error) { 862 setCache := func(res fetchResult) (fetchResult, error) { 863 fetchCacheMu.Lock() 864 defer fetchCacheMu.Unlock() 865 fetchCache[importPrefix] = res 866 return res, nil 867 } 868 869 resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { 870 fetchCacheMu.Lock() 871 if res, ok := fetchCache[importPrefix]; ok { 872 fetchCacheMu.Unlock() 873 return res, nil 874 } 875 fetchCacheMu.Unlock() 876 877 urlStr, body, err := web.GetMaybeInsecure(importPrefix, security) 878 if err != nil { 879 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)}) 880 } 881 imports, err := parseMetaGoImports(body, mod) 882 if err != nil { 883 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)}) 884 } 885 if len(imports) == 0 { 886 err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr) 887 } 888 return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err}) 889 }) 890 res := resi.(fetchResult) 891 return res.urlStr, res.imports, res.err 892 } 893 894 type fetchResult struct { 895 urlStr string // e.g. "https://foo.com/x/bar?go-get=1" 896 imports []metaImport 897 err error 898 } 899 900 // metaImport represents the parsed <meta name="go-import" 901 // content="prefix vcs reporoot" /> tags from HTML files. 902 type metaImport struct { 903 Prefix, VCS, RepoRoot string 904 } 905 906 // pathPrefix reports whether sub is a prefix of s, 907 // only considering entire path components. 908 func pathPrefix(s, sub string) bool { 909 // strings.HasPrefix is necessary but not sufficient. 910 if !strings.HasPrefix(s, sub) { 911 return false 912 } 913 // The remainder after the prefix must either be empty or start with a slash. 914 rem := s[len(sub):] 915 return rem == "" || rem[0] == '/' 916 } 917 918 // A ImportMismatchError is returned where metaImport/s are present 919 // but none match our import path. 920 type ImportMismatchError struct { 921 importPath string 922 mismatches []string // the meta imports that were discarded for not matching our importPath 923 } 924 925 func (m ImportMismatchError) Error() string { 926 formattedStrings := make([]string, len(m.mismatches)) 927 for i, pre := range m.mismatches { 928 formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath) 929 } 930 return strings.Join(formattedStrings, ", ") 931 } 932 933 // matchGoImport returns the metaImport from imports matching importPath. 934 // An error is returned if there are multiple matches. 935 // errNoMatch is returned if none match. 936 func matchGoImport(imports []metaImport, importPath string) (metaImport, error) { 937 match := -1 938 939 errImportMismatch := ImportMismatchError{importPath: importPath} 940 for i, im := range imports { 941 if !pathPrefix(importPath, im.Prefix) { 942 errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix) 943 continue 944 } 945 946 if match >= 0 { 947 if imports[match].VCS == "mod" && im.VCS != "mod" { 948 // All the mod entries precede all the non-mod entries. 949 // We have a mod entry and don't care about the rest, 950 // matching or not. 951 break 952 } 953 return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath) 954 } 955 match = i 956 } 957 958 if match == -1 { 959 return metaImport{}, errImportMismatch 960 } 961 return imports[match], nil 962 } 963 964 // expand rewrites s to replace {k} with match[k] for each key k in match. 965 func expand(match map[string]string, s string) string { 966 for k, v := range match { 967 s = strings.ReplaceAll(s, "{"+k+"}", v) 968 } 969 return s 970 } 971 972 // vcsPaths defines the meaning of import paths referring to 973 // commonly-used VCS hosting sites (github.com/user/dir) 974 // and import paths referring to a fully-qualified importPath 975 // containing a VCS type (foo.com/repo.git/dir) 976 var vcsPaths = []*vcsPath{ 977 // Github 978 { 979 prefix: "github.com/", 980 re: `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`, 981 vcs: "git", 982 repo: "https://{root}", 983 check: noVCSSuffix, 984 }, 985 986 // Bitbucket 987 { 988 prefix: "bitbucket.org/", 989 re: `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 990 repo: "https://{root}", 991 check: bitbucketVCS, 992 }, 993 994 // IBM DevOps Services (JazzHub) 995 { 996 prefix: "hub.jazz.net/git/", 997 re: `^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 998 vcs: "git", 999 repo: "https://{root}", 1000 check: noVCSSuffix, 1001 }, 1002 1003 // Git at Apache 1004 { 1005 prefix: "git.apache.org/", 1006 re: `^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`, 1007 vcs: "git", 1008 repo: "https://{root}", 1009 }, 1010 1011 // Git at OpenStack 1012 { 1013 prefix: "git.openstack.org/", 1014 re: `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`, 1015 vcs: "git", 1016 repo: "https://{root}", 1017 }, 1018 1019 // chiselapp.com for fossil 1020 { 1021 prefix: "chiselapp.com/", 1022 re: `^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`, 1023 vcs: "fossil", 1024 repo: "https://{root}", 1025 }, 1026 1027 // General syntax for any server. 1028 // Must be last. 1029 { 1030 re: `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`, 1031 ping: true, 1032 }, 1033 } 1034 1035 // vcsPathsAfterDynamic gives additional vcsPaths entries 1036 // to try after the dynamic HTML check. 1037 // This gives those sites a chance to introduce <meta> tags 1038 // as part of a graceful transition away from the hard-coded logic. 1039 var vcsPathsAfterDynamic = []*vcsPath{ 1040 // Launchpad. See golang.org/issue/11436. 1041 { 1042 prefix: "launchpad.net/", 1043 re: `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 1044 vcs: "bzr", 1045 repo: "https://{root}", 1046 check: launchpadVCS, 1047 }, 1048 } 1049 1050 func init() { 1051 // fill in cached regexps. 1052 // Doing this eagerly discovers invalid regexp syntax 1053 // without having to run a command that needs that regexp. 1054 for _, srv := range vcsPaths { 1055 srv.regexp = regexp.MustCompile(srv.re) 1056 } 1057 for _, srv := range vcsPathsAfterDynamic { 1058 srv.regexp = regexp.MustCompile(srv.re) 1059 } 1060 } 1061 1062 // noVCSSuffix checks that the repository name does not 1063 // end in .foo for any version control system foo. 1064 // The usual culprit is ".git". 1065 func noVCSSuffix(match map[string]string) error { 1066 repo := match["repo"] 1067 for _, vcs := range vcsList { 1068 if strings.HasSuffix(repo, "."+vcs.cmd) { 1069 return fmt.Errorf("invalid version control suffix in %s path", match["prefix"]) 1070 } 1071 } 1072 return nil 1073 } 1074 1075 // bitbucketVCS determines the version control system for a 1076 // Bitbucket repository, by using the Bitbucket API. 1077 func bitbucketVCS(match map[string]string) error { 1078 if err := noVCSSuffix(match); err != nil { 1079 return err 1080 } 1081 1082 var resp struct { 1083 SCM string `json:"scm"` 1084 } 1085 url := expand(match, "https://api.bitbucket.org/2.0/repositories/{bitname}?fields=scm") 1086 data, err := web.Get(url) 1087 if err != nil { 1088 if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 { 1089 // this may be a private repository. If so, attempt to determine which 1090 // VCS it uses. See issue 5375. 1091 root := match["root"] 1092 for _, vcs := range []string{"git", "hg"} { 1093 if vcsByCmd(vcs).ping("https", root) == nil { 1094 resp.SCM = vcs 1095 break 1096 } 1097 } 1098 } 1099 1100 if resp.SCM == "" { 1101 return err 1102 } 1103 } else { 1104 if err := json.Unmarshal(data, &resp); err != nil { 1105 return fmt.Errorf("decoding %s: %v", url, err) 1106 } 1107 } 1108 1109 if vcsByCmd(resp.SCM) != nil { 1110 match["vcs"] = resp.SCM 1111 if resp.SCM == "git" { 1112 match["repo"] += ".git" 1113 } 1114 return nil 1115 } 1116 1117 return fmt.Errorf("unable to detect version control system for bitbucket.org/ path") 1118 } 1119 1120 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case, 1121 // "foo" could be a series name registered in Launchpad with its own branch, 1122 // and it could also be the name of a directory within the main project 1123 // branch one level up. 1124 func launchpadVCS(match map[string]string) error { 1125 if match["project"] == "" || match["series"] == "" { 1126 return nil 1127 } 1128 _, err := web.Get(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format")) 1129 if err != nil { 1130 match["root"] = expand(match, "launchpad.net/{project}") 1131 match["repo"] = expand(match, "https://{root}") 1132 } 1133 return nil 1134 }