github.com/MangoDowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/cmd/go/internal/get/vcs.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package get 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "errors" 11 "fmt" 12 "internal/singleflight" 13 "log" 14 "net/url" 15 "os" 16 "os/exec" 17 "path/filepath" 18 "regexp" 19 "strings" 20 "sync" 21 22 "cmd/go/internal/base" 23 "cmd/go/internal/cfg" 24 "cmd/go/internal/web" 25 ) 26 27 // A vcsCmd describes how to use a version control system 28 // like Mercurial, Git, or Subversion. 29 type vcsCmd struct { 30 name string 31 cmd string // name of binary to invoke command 32 33 createCmd []string // commands to download a fresh copy of a repository 34 downloadCmd []string // commands to download updates into an existing repository 35 36 tagCmd []tagCmd // commands to list tags 37 tagLookupCmd []tagCmd // commands to lookup tags before running tagSyncCmd 38 tagSyncCmd []string // commands to sync to specific tag 39 tagSyncDefault []string // commands to sync to default tag 40 41 scheme []string 42 pingCmd string 43 44 remoteRepo func(v *vcsCmd, rootDir string) (remoteRepo string, err error) 45 resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) 46 } 47 48 var defaultSecureScheme = map[string]bool{ 49 "https": true, 50 "git+ssh": true, 51 "bzr+ssh": true, 52 "svn+ssh": true, 53 "ssh": true, 54 } 55 56 func (v *vcsCmd) isSecure(repo string) bool { 57 u, err := url.Parse(repo) 58 if err != nil { 59 // If repo is not a URL, it's not secure. 60 return false 61 } 62 return v.isSecureScheme(u.Scheme) 63 } 64 65 func (v *vcsCmd) isSecureScheme(scheme string) bool { 66 switch v.cmd { 67 case "git": 68 // GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a 69 // colon-separated list of schemes that are allowed to be used with git 70 // fetch/clone. Any scheme not mentioned will be considered insecure. 71 if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" { 72 for _, s := range strings.Split(allow, ":") { 73 if s == scheme { 74 return true 75 } 76 } 77 return false 78 } 79 } 80 return defaultSecureScheme[scheme] 81 } 82 83 // A tagCmd describes a command to list available tags 84 // that can be passed to tagSyncCmd. 85 type tagCmd struct { 86 cmd string // command to list tags 87 pattern string // regexp to extract tags from list 88 } 89 90 // vcsList lists the known version control systems 91 var vcsList = []*vcsCmd{ 92 vcsHg, 93 vcsGit, 94 vcsSvn, 95 vcsBzr, 96 } 97 98 // vcsByCmd returns the version control system for the given 99 // command name (hg, git, svn, bzr). 100 func vcsByCmd(cmd string) *vcsCmd { 101 for _, vcs := range vcsList { 102 if vcs.cmd == cmd { 103 return vcs 104 } 105 } 106 return nil 107 } 108 109 // vcsHg describes how to use Mercurial. 110 var vcsHg = &vcsCmd{ 111 name: "Mercurial", 112 cmd: "hg", 113 114 createCmd: []string{"clone -U {repo} {dir}"}, 115 downloadCmd: []string{"pull"}, 116 117 // We allow both tag and branch names as 'tags' 118 // for selecting a version. This lets people have 119 // a go.release.r60 branch and a go1 branch 120 // and make changes in both, without constantly 121 // editing .hgtags. 122 tagCmd: []tagCmd{ 123 {"tags", `^(\S+)`}, 124 {"branches", `^(\S+)`}, 125 }, 126 tagSyncCmd: []string{"update -r {tag}"}, 127 tagSyncDefault: []string{"update default"}, 128 129 scheme: []string{"https", "http", "ssh"}, 130 pingCmd: "identify {scheme}://{repo}", 131 remoteRepo: hgRemoteRepo, 132 } 133 134 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) { 135 out, err := vcsHg.runOutput(rootDir, "paths default") 136 if err != nil { 137 return "", err 138 } 139 return strings.TrimSpace(string(out)), nil 140 } 141 142 // vcsGit describes how to use Git. 143 var vcsGit = &vcsCmd{ 144 name: "Git", 145 cmd: "git", 146 147 createCmd: []string{"clone {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"}, 148 downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"}, 149 150 tagCmd: []tagCmd{ 151 // tags/xxx matches a git tag named xxx 152 // origin/xxx matches a git branch named xxx on the default remote repository 153 {"show-ref", `(?:tags|origin)/(\S+)$`}, 154 }, 155 tagLookupCmd: []tagCmd{ 156 {"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`}, 157 }, 158 tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"}, 159 // both createCmd and downloadCmd update the working dir. 160 // No need to do more here. We used to 'checkout master' 161 // but that doesn't work if the default branch is not named master. 162 // DO NOT add 'checkout master' here. 163 // See golang.org/issue/9032. 164 tagSyncDefault: []string{"submodule update --init --recursive"}, 165 166 scheme: []string{"git", "https", "http", "git+ssh", "ssh"}, 167 pingCmd: "ls-remote {scheme}://{repo}", 168 remoteRepo: gitRemoteRepo, 169 } 170 171 // scpSyntaxRe matches the SCP-like addresses used by Git to access 172 // repositories by SSH. 173 var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 174 175 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) { 176 cmd := "config remote.origin.url" 177 errParse := errors.New("unable to parse output of git " + cmd) 178 errRemoteOriginNotFound := errors.New("remote origin not found") 179 outb, err := vcsGit.run1(rootDir, cmd, nil, false) 180 if err != nil { 181 // if it doesn't output any message, it means the config argument is correct, 182 // but the config value itself doesn't exist 183 if outb != nil && len(outb) == 0 { 184 return "", errRemoteOriginNotFound 185 } 186 return "", err 187 } 188 out := strings.TrimSpace(string(outb)) 189 190 var repoURL *url.URL 191 if m := scpSyntaxRe.FindStringSubmatch(out); m != nil { 192 // Match SCP-like syntax and convert it to a URL. 193 // Eg, "git@github.com:user/repo" becomes 194 // "ssh://git@github.com/user/repo". 195 repoURL = &url.URL{ 196 Scheme: "ssh", 197 User: url.User(m[1]), 198 Host: m[2], 199 Path: m[3], 200 } 201 } else { 202 repoURL, err = url.Parse(out) 203 if err != nil { 204 return "", err 205 } 206 } 207 208 // Iterate over insecure schemes too, because this function simply 209 // reports the state of the repo. If we can't see insecure schemes then 210 // we can't report the actual repo URL. 211 for _, s := range vcsGit.scheme { 212 if repoURL.Scheme == s { 213 return repoURL.String(), nil 214 } 215 } 216 return "", errParse 217 } 218 219 // vcsBzr describes how to use Bazaar. 220 var vcsBzr = &vcsCmd{ 221 name: "Bazaar", 222 cmd: "bzr", 223 224 createCmd: []string{"branch {repo} {dir}"}, 225 226 // Without --overwrite bzr will not pull tags that changed. 227 // Replace by --overwrite-tags after http://pad.lv/681792 goes in. 228 downloadCmd: []string{"pull --overwrite"}, 229 230 tagCmd: []tagCmd{{"tags", `^(\S+)`}}, 231 tagSyncCmd: []string{"update -r {tag}"}, 232 tagSyncDefault: []string{"update -r revno:-1"}, 233 234 scheme: []string{"https", "http", "bzr", "bzr+ssh"}, 235 pingCmd: "info {scheme}://{repo}", 236 remoteRepo: bzrRemoteRepo, 237 resolveRepo: bzrResolveRepo, 238 } 239 240 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) { 241 outb, err := vcsBzr.runOutput(rootDir, "config parent_location") 242 if err != nil { 243 return "", err 244 } 245 return strings.TrimSpace(string(outb)), nil 246 } 247 248 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) { 249 outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo) 250 if err != nil { 251 return "", err 252 } 253 out := string(outb) 254 255 // Expect: 256 // ... 257 // (branch root|repository branch): <URL> 258 // ... 259 260 found := false 261 for _, prefix := range []string{"\n branch root: ", "\n repository branch: "} { 262 i := strings.Index(out, prefix) 263 if i >= 0 { 264 out = out[i+len(prefix):] 265 found = true 266 break 267 } 268 } 269 if !found { 270 return "", fmt.Errorf("unable to parse output of bzr info") 271 } 272 273 i := strings.Index(out, "\n") 274 if i < 0 { 275 return "", fmt.Errorf("unable to parse output of bzr info") 276 } 277 out = out[:i] 278 return strings.TrimSpace(out), nil 279 } 280 281 // vcsSvn describes how to use Subversion. 282 var vcsSvn = &vcsCmd{ 283 name: "Subversion", 284 cmd: "svn", 285 286 createCmd: []string{"checkout {repo} {dir}"}, 287 downloadCmd: []string{"update"}, 288 289 // There is no tag command in subversion. 290 // The branch information is all in the path names. 291 292 scheme: []string{"https", "http", "svn", "svn+ssh"}, 293 pingCmd: "info {scheme}://{repo}", 294 remoteRepo: svnRemoteRepo, 295 } 296 297 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) { 298 outb, err := vcsSvn.runOutput(rootDir, "info") 299 if err != nil { 300 return "", err 301 } 302 out := string(outb) 303 304 // Expect: 305 // 306 // ... 307 // URL: <URL> 308 // ... 309 // 310 // Note that we're not using the Repository Root line, 311 // because svn allows checking out subtrees. 312 // The URL will be the URL of the subtree (what we used with 'svn co') 313 // while the Repository Root may be a much higher parent. 314 i := strings.Index(out, "\nURL: ") 315 if i < 0 { 316 return "", fmt.Errorf("unable to parse output of svn info") 317 } 318 out = out[i+len("\nURL: "):] 319 i = strings.Index(out, "\n") 320 if i < 0 { 321 return "", fmt.Errorf("unable to parse output of svn info") 322 } 323 out = out[:i] 324 return strings.TrimSpace(out), nil 325 } 326 327 func (v *vcsCmd) String() string { 328 return v.name 329 } 330 331 // run runs the command line cmd in the given directory. 332 // keyval is a list of key, value pairs. run expands 333 // instances of {key} in cmd into value, but only after 334 // splitting cmd into individual arguments. 335 // If an error occurs, run prints the command line and the 336 // command's combined stdout+stderr to standard error. 337 // Otherwise run discards the command's output. 338 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error { 339 _, err := v.run1(dir, cmd, keyval, true) 340 return err 341 } 342 343 // runVerboseOnly is like run but only generates error output to standard error in verbose mode. 344 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error { 345 _, err := v.run1(dir, cmd, keyval, false) 346 return err 347 } 348 349 // runOutput is like run but returns the output of the command. 350 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) { 351 return v.run1(dir, cmd, keyval, true) 352 } 353 354 // run1 is the generalized implementation of run and runOutput. 355 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) { 356 m := make(map[string]string) 357 for i := 0; i < len(keyval); i += 2 { 358 m[keyval[i]] = keyval[i+1] 359 } 360 args := strings.Fields(cmdline) 361 for i, arg := range args { 362 args[i] = expand(m, arg) 363 } 364 365 if len(args) >= 2 && args[0] == "-go-internal-cd" { 366 if filepath.IsAbs(args[1]) { 367 dir = args[1] 368 } else { 369 dir = filepath.Join(dir, args[1]) 370 } 371 args = args[2:] 372 } 373 374 _, err := exec.LookPath(v.cmd) 375 if err != nil { 376 fmt.Fprintf(os.Stderr, 377 "go: missing %s command. See https://golang.org/s/gogetcmd\n", 378 v.name) 379 return nil, err 380 } 381 382 cmd := exec.Command(v.cmd, args...) 383 cmd.Dir = dir 384 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 385 if cfg.BuildX { 386 fmt.Printf("cd %s\n", dir) 387 fmt.Printf("%s %s\n", v.cmd, strings.Join(args, " ")) 388 } 389 var buf bytes.Buffer 390 cmd.Stdout = &buf 391 cmd.Stderr = &buf 392 err = cmd.Run() 393 out := buf.Bytes() 394 if err != nil { 395 if verbose || cfg.BuildV { 396 fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " ")) 397 os.Stderr.Write(out) 398 } 399 return out, err 400 } 401 return out, nil 402 } 403 404 // ping pings to determine scheme to use. 405 func (v *vcsCmd) ping(scheme, repo string) error { 406 return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo) 407 } 408 409 // create creates a new copy of repo in dir. 410 // The parent of dir must exist; dir must not. 411 func (v *vcsCmd) create(dir, repo string) error { 412 for _, cmd := range v.createCmd { 413 if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil { 414 return err 415 } 416 } 417 return nil 418 } 419 420 // download downloads any new changes for the repo in dir. 421 func (v *vcsCmd) download(dir string) error { 422 for _, cmd := range v.downloadCmd { 423 if err := v.run(dir, cmd); err != nil { 424 return err 425 } 426 } 427 return nil 428 } 429 430 // tags returns the list of available tags for the repo in dir. 431 func (v *vcsCmd) tags(dir string) ([]string, error) { 432 var tags []string 433 for _, tc := range v.tagCmd { 434 out, err := v.runOutput(dir, tc.cmd) 435 if err != nil { 436 return nil, err 437 } 438 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 439 for _, m := range re.FindAllStringSubmatch(string(out), -1) { 440 tags = append(tags, m[1]) 441 } 442 } 443 return tags, nil 444 } 445 446 // tagSync syncs the repo in dir to the named tag, 447 // which either is a tag returned by tags or is v.tagDefault. 448 func (v *vcsCmd) tagSync(dir, tag string) error { 449 if v.tagSyncCmd == nil { 450 return nil 451 } 452 if tag != "" { 453 for _, tc := range v.tagLookupCmd { 454 out, err := v.runOutput(dir, tc.cmd, "tag", tag) 455 if err != nil { 456 return err 457 } 458 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 459 m := re.FindStringSubmatch(string(out)) 460 if len(m) > 1 { 461 tag = m[1] 462 break 463 } 464 } 465 } 466 467 if tag == "" && v.tagSyncDefault != nil { 468 for _, cmd := range v.tagSyncDefault { 469 if err := v.run(dir, cmd); err != nil { 470 return err 471 } 472 } 473 return nil 474 } 475 476 for _, cmd := range v.tagSyncCmd { 477 if err := v.run(dir, cmd, "tag", tag); err != nil { 478 return err 479 } 480 } 481 return nil 482 } 483 484 // A vcsPath describes how to convert an import path into a 485 // version control system and repository name. 486 type vcsPath struct { 487 prefix string // prefix this description applies to 488 re string // pattern for import path 489 repo string // repository to use (expand with match of re) 490 vcs string // version control system to use (expand with match of re) 491 check func(match map[string]string) error // additional checks 492 ping bool // ping for scheme to use to download repo 493 494 regexp *regexp.Regexp // cached compiled form of re 495 } 496 497 // vcsFromDir inspects dir and its parents to determine the 498 // version control system and code repository to use. 499 // On return, root is the import path 500 // corresponding to the root of the repository. 501 func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) { 502 // Clean and double-check that dir is in (a subdirectory of) srcRoot. 503 dir = filepath.Clean(dir) 504 srcRoot = filepath.Clean(srcRoot) 505 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 506 return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 507 } 508 509 var vcsRet *vcsCmd 510 var rootRet string 511 512 origDir := dir 513 for len(dir) > len(srcRoot) { 514 for _, vcs := range vcsList { 515 if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil { 516 root := filepath.ToSlash(dir[len(srcRoot)+1:]) 517 // Record first VCS we find, but keep looking, 518 // to detect mistakes like one kind of VCS inside another. 519 if vcsRet == nil { 520 vcsRet = vcs 521 rootRet = root 522 continue 523 } 524 // Allow .git inside .git, which can arise due to submodules. 525 if vcsRet == vcs && vcs.cmd == "git" { 526 continue 527 } 528 // Otherwise, we have one VCS inside a different VCS. 529 return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s", 530 filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd) 531 } 532 } 533 534 // Move to parent. 535 ndir := filepath.Dir(dir) 536 if len(ndir) >= len(dir) { 537 // Shouldn't happen, but just in case, stop. 538 break 539 } 540 dir = ndir 541 } 542 543 if vcsRet != nil { 544 return vcsRet, rootRet, nil 545 } 546 547 return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir) 548 } 549 550 // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS 551 // situation for dir, checking parents up until srcRoot. 552 func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error { 553 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 554 return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 555 } 556 557 otherDir := dir 558 for len(otherDir) > len(srcRoot) { 559 for _, otherVCS := range vcsList { 560 if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil { 561 // Allow expected vcs in original dir. 562 if otherDir == dir && otherVCS == vcs { 563 continue 564 } 565 // Allow .git inside .git, which can arise due to submodules. 566 if otherVCS == vcs && vcs.cmd == "git" { 567 continue 568 } 569 // Otherwise, we have one VCS inside a different VCS. 570 return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd) 571 } 572 } 573 // Move to parent. 574 newDir := filepath.Dir(otherDir) 575 if len(newDir) >= len(otherDir) { 576 // Shouldn't happen, but just in case, stop. 577 break 578 } 579 otherDir = newDir 580 } 581 582 return nil 583 } 584 585 // repoRoot represents a version control system, a repo, and a root of 586 // where to put it on disk. 587 type repoRoot struct { 588 vcs *vcsCmd 589 590 // repo is the repository URL, including scheme 591 repo string 592 593 // root is the import path corresponding to the root of the 594 // repository 595 root string 596 597 // isCustom is true for custom import paths (those defined by HTML meta tags) 598 isCustom bool 599 } 600 601 var httpPrefixRE = regexp.MustCompile(`^https?:`) 602 603 // repoRootForImportPath analyzes importPath to determine the 604 // version control system, and code repository to use. 605 func repoRootForImportPath(importPath string, security web.SecurityMode) (*repoRoot, error) { 606 rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths) 607 if err == errUnknownSite { 608 // If there are wildcards, look up the thing before the wildcard, 609 // hoping it applies to the wildcarded parts too. 610 // This makes 'go get rsc.io/pdf/...' work in a fresh GOPATH. 611 lookup := strings.TrimSuffix(importPath, "/...") 612 if i := strings.Index(lookup, "/.../"); i >= 0 { 613 lookup = lookup[:i] 614 } 615 rr, err = repoRootForImportDynamic(lookup, security) 616 if err != nil { 617 err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err) 618 } 619 } 620 if err != nil { 621 rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic) 622 if err1 == nil { 623 rr = rr1 624 err = nil 625 } 626 } 627 628 if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.root, "...") { 629 // Do not allow wildcards in the repo root. 630 rr = nil 631 err = fmt.Errorf("cannot expand ... in %q", importPath) 632 } 633 return rr, err 634 } 635 636 var errUnknownSite = errors.New("dynamic lookup required to find mapping") 637 638 // repoRootFromVCSPaths attempts to map importPath to a repoRoot 639 // using the mappings defined in vcsPaths. 640 // If scheme is non-empty, that scheme is forced. 641 func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*repoRoot, error) { 642 // A common error is to use https://packagepath because that's what 643 // hg and git require. Diagnose this helpfully. 644 if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil { 645 // The importPath has been cleaned, so has only one slash. The pattern 646 // ignores the slashes; the error message puts them back on the RHS at least. 647 return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//") 648 } 649 for _, srv := range vcsPaths { 650 if !strings.HasPrefix(importPath, srv.prefix) { 651 continue 652 } 653 m := srv.regexp.FindStringSubmatch(importPath) 654 if m == nil { 655 if srv.prefix != "" { 656 return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) 657 } 658 continue 659 } 660 661 // Build map of named subexpression matches for expand. 662 match := map[string]string{ 663 "prefix": srv.prefix, 664 "import": importPath, 665 } 666 for i, name := range srv.regexp.SubexpNames() { 667 if name != "" && match[name] == "" { 668 match[name] = m[i] 669 } 670 } 671 if srv.vcs != "" { 672 match["vcs"] = expand(match, srv.vcs) 673 } 674 if srv.repo != "" { 675 match["repo"] = expand(match, srv.repo) 676 } 677 if srv.check != nil { 678 if err := srv.check(match); err != nil { 679 return nil, err 680 } 681 } 682 vcs := vcsByCmd(match["vcs"]) 683 if vcs == nil { 684 return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) 685 } 686 if srv.ping { 687 if scheme != "" { 688 match["repo"] = scheme + "://" + match["repo"] 689 } else { 690 for _, scheme := range vcs.scheme { 691 if security == web.Secure && !vcs.isSecureScheme(scheme) { 692 continue 693 } 694 if vcs.ping(scheme, match["repo"]) == nil { 695 match["repo"] = scheme + "://" + match["repo"] 696 break 697 } 698 } 699 } 700 } 701 rr := &repoRoot{ 702 vcs: vcs, 703 repo: match["repo"], 704 root: match["root"], 705 } 706 return rr, nil 707 } 708 return nil, errUnknownSite 709 } 710 711 // repoRootForImportDynamic finds a *repoRoot for a custom domain that's not 712 // statically known by repoRootForImportPathStatic. 713 // 714 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld". 715 func repoRootForImportDynamic(importPath string, security web.SecurityMode) (*repoRoot, error) { 716 slash := strings.Index(importPath, "/") 717 if slash < 0 { 718 slash = len(importPath) 719 } 720 host := importPath[:slash] 721 if !strings.Contains(host, ".") { 722 return nil, errors.New("import path does not begin with hostname") 723 } 724 urlStr, body, err := web.GetMaybeInsecure(importPath, security) 725 if err != nil { 726 msg := "https fetch: %v" 727 if security == web.Insecure { 728 msg = "http/" + msg 729 } 730 return nil, fmt.Errorf(msg, err) 731 } 732 defer body.Close() 733 imports, err := parseMetaGoImports(body) 734 if err != nil { 735 return nil, fmt.Errorf("parsing %s: %v", importPath, err) 736 } 737 // Find the matched meta import. 738 mmi, err := matchGoImport(imports, importPath) 739 if err != nil { 740 if _, ok := err.(ImportMismatchError); !ok { 741 return nil, fmt.Errorf("parse %s: %v", urlStr, err) 742 } 743 return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", urlStr, err) 744 } 745 if cfg.BuildV { 746 log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr) 747 } 748 // If the import was "uni.edu/bob/project", which said the 749 // prefix was "uni.edu" and the RepoRoot was "evilroot.com", 750 // make sure we don't trust Bob and check out evilroot.com to 751 // "uni.edu" yet (possibly overwriting/preempting another 752 // non-evil student). Instead, first verify the root and see 753 // if it matches Bob's claim. 754 if mmi.Prefix != importPath { 755 if cfg.BuildV { 756 log.Printf("get %q: verifying non-authoritative meta tag", importPath) 757 } 758 urlStr0 := urlStr 759 var imports []metaImport 760 urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, security) 761 if err != nil { 762 return nil, err 763 } 764 metaImport2, err := matchGoImport(imports, importPath) 765 if err != nil || mmi != metaImport2 { 766 return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix) 767 } 768 } 769 770 if err := validateRepoRootScheme(mmi.RepoRoot); err != nil { 771 return nil, fmt.Errorf("%s: invalid repo root %q: %v", urlStr, mmi.RepoRoot, err) 772 } 773 rr := &repoRoot{ 774 vcs: vcsByCmd(mmi.VCS), 775 repo: mmi.RepoRoot, 776 root: mmi.Prefix, 777 isCustom: true, 778 } 779 if rr.vcs == nil { 780 return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS) 781 } 782 return rr, nil 783 } 784 785 // validateRepoRootScheme returns an error if repoRoot does not seem 786 // to have a valid URL scheme. At this point we permit things that 787 // aren't valid URLs, although later, if not using -insecure, we will 788 // restrict repoRoots to be valid URLs. This is only because we've 789 // historically permitted them, and people may depend on that. 790 func validateRepoRootScheme(repoRoot string) error { 791 end := strings.Index(repoRoot, "://") 792 if end <= 0 { 793 return errors.New("no scheme") 794 } 795 796 // RFC 3986 section 3.1. 797 for i := 0; i < end; i++ { 798 c := repoRoot[i] 799 switch { 800 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 801 // OK. 802 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 803 // OK except at start. 804 if i == 0 { 805 return errors.New("invalid scheme") 806 } 807 default: 808 return errors.New("invalid scheme") 809 } 810 } 811 812 return nil 813 } 814 815 var fetchGroup singleflight.Group 816 var ( 817 fetchCacheMu sync.Mutex 818 fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix 819 ) 820 821 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag 822 // and returns its HTML discovery URL and the parsed metaImport lines 823 // found on the page. 824 // 825 // The importPath is of the form "golang.org/x/tools". 826 // It is an error if no imports are found. 827 // urlStr will still be valid if err != nil. 828 // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1" 829 func metaImportsForPrefix(importPrefix string, security web.SecurityMode) (urlStr string, imports []metaImport, err error) { 830 setCache := func(res fetchResult) (fetchResult, error) { 831 fetchCacheMu.Lock() 832 defer fetchCacheMu.Unlock() 833 fetchCache[importPrefix] = res 834 return res, nil 835 } 836 837 resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { 838 fetchCacheMu.Lock() 839 if res, ok := fetchCache[importPrefix]; ok { 840 fetchCacheMu.Unlock() 841 return res, nil 842 } 843 fetchCacheMu.Unlock() 844 845 urlStr, body, err := web.GetMaybeInsecure(importPrefix, security) 846 if err != nil { 847 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)}) 848 } 849 imports, err := parseMetaGoImports(body) 850 if err != nil { 851 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)}) 852 } 853 if len(imports) == 0 { 854 err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr) 855 } 856 return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err}) 857 }) 858 res := resi.(fetchResult) 859 return res.urlStr, res.imports, res.err 860 } 861 862 type fetchResult struct { 863 urlStr string // e.g. "https://foo.com/x/bar?go-get=1" 864 imports []metaImport 865 err error 866 } 867 868 // metaImport represents the parsed <meta name="go-import" 869 // content="prefix vcs reporoot" /> tags from HTML files. 870 type metaImport struct { 871 Prefix, VCS, RepoRoot string 872 } 873 874 func splitPathHasPrefix(path, prefix []string) bool { 875 if len(path) < len(prefix) { 876 return false 877 } 878 for i, p := range prefix { 879 if path[i] != p { 880 return false 881 } 882 } 883 return true 884 } 885 886 // A ImportMismatchError is returned where metaImport/s are present 887 // but none match our import path. 888 type ImportMismatchError struct { 889 importPath string 890 mismatches []string // the meta imports that were discarded for not matching our importPath 891 } 892 893 func (m ImportMismatchError) Error() string { 894 formattedStrings := make([]string, len(m.mismatches)) 895 for i, pre := range m.mismatches { 896 formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath) 897 } 898 return strings.Join(formattedStrings, ", ") 899 } 900 901 // matchGoImport returns the metaImport from imports matching importPath. 902 // An error is returned if there are multiple matches. 903 // errNoMatch is returned if none match. 904 func matchGoImport(imports []metaImport, importPath string) (metaImport, error) { 905 match := -1 906 imp := strings.Split(importPath, "/") 907 908 errImportMismatch := ImportMismatchError{importPath: importPath} 909 for i, im := range imports { 910 pre := strings.Split(im.Prefix, "/") 911 912 if !splitPathHasPrefix(imp, pre) { 913 errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix) 914 continue 915 } 916 917 if match != -1 { 918 return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath) 919 } 920 match = i 921 } 922 923 if match == -1 { 924 return metaImport{}, errImportMismatch 925 } 926 return imports[match], nil 927 } 928 929 // expand rewrites s to replace {k} with match[k] for each key k in match. 930 func expand(match map[string]string, s string) string { 931 for k, v := range match { 932 s = strings.Replace(s, "{"+k+"}", v, -1) 933 } 934 return s 935 } 936 937 // vcsPaths defines the meaning of import paths referring to 938 // commonly-used VCS hosting sites (github.com/user/dir) 939 // and import paths referring to a fully-qualified importPath 940 // containing a VCS type (foo.com/repo.git/dir) 941 var vcsPaths = []*vcsPath{ 942 // Github 943 { 944 prefix: "github.com/", 945 re: `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`, 946 vcs: "git", 947 repo: "https://{root}", 948 check: noVCSSuffix, 949 }, 950 951 // Bitbucket 952 { 953 prefix: "bitbucket.org/", 954 re: `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 955 repo: "https://{root}", 956 check: bitbucketVCS, 957 }, 958 959 // IBM DevOps Services (JazzHub) 960 { 961 prefix: "hub.jazz.net/git", 962 re: `^(?P<root>hub.jazz.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 963 vcs: "git", 964 repo: "https://{root}", 965 check: noVCSSuffix, 966 }, 967 968 // Git at Apache 969 { 970 prefix: "git.apache.org", 971 re: `^(?P<root>git.apache.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`, 972 vcs: "git", 973 repo: "https://{root}", 974 }, 975 976 // Git at OpenStack 977 { 978 prefix: "git.openstack.org", 979 re: `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`, 980 vcs: "git", 981 repo: "https://{root}", 982 }, 983 984 // General syntax for any server. 985 // Must be last. 986 { 987 re: `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`, 988 ping: true, 989 }, 990 } 991 992 // vcsPathsAfterDynamic gives additional vcsPaths entries 993 // to try after the dynamic HTML check. 994 // This gives those sites a chance to introduce <meta> tags 995 // as part of a graceful transition away from the hard-coded logic. 996 var vcsPathsAfterDynamic = []*vcsPath{ 997 // Launchpad. See golang.org/issue/11436. 998 { 999 prefix: "launchpad.net/", 1000 re: `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 1001 vcs: "bzr", 1002 repo: "https://{root}", 1003 check: launchpadVCS, 1004 }, 1005 } 1006 1007 func init() { 1008 // fill in cached regexps. 1009 // Doing this eagerly discovers invalid regexp syntax 1010 // without having to run a command that needs that regexp. 1011 for _, srv := range vcsPaths { 1012 srv.regexp = regexp.MustCompile(srv.re) 1013 } 1014 for _, srv := range vcsPathsAfterDynamic { 1015 srv.regexp = regexp.MustCompile(srv.re) 1016 } 1017 } 1018 1019 // noVCSSuffix checks that the repository name does not 1020 // end in .foo for any version control system foo. 1021 // The usual culprit is ".git". 1022 func noVCSSuffix(match map[string]string) error { 1023 repo := match["repo"] 1024 for _, vcs := range vcsList { 1025 if strings.HasSuffix(repo, "."+vcs.cmd) { 1026 return fmt.Errorf("invalid version control suffix in %s path", match["prefix"]) 1027 } 1028 } 1029 return nil 1030 } 1031 1032 // bitbucketVCS determines the version control system for a 1033 // Bitbucket repository, by using the Bitbucket API. 1034 func bitbucketVCS(match map[string]string) error { 1035 if err := noVCSSuffix(match); err != nil { 1036 return err 1037 } 1038 1039 var resp struct { 1040 SCM string `json:"scm"` 1041 } 1042 url := expand(match, "https://api.bitbucket.org/2.0/repositories/{bitname}?fields=scm") 1043 data, err := web.Get(url) 1044 if err != nil { 1045 if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 { 1046 // this may be a private repository. If so, attempt to determine which 1047 // VCS it uses. See issue 5375. 1048 root := match["root"] 1049 for _, vcs := range []string{"git", "hg"} { 1050 if vcsByCmd(vcs).ping("https", root) == nil { 1051 resp.SCM = vcs 1052 break 1053 } 1054 } 1055 } 1056 1057 if resp.SCM == "" { 1058 return err 1059 } 1060 } else { 1061 if err := json.Unmarshal(data, &resp); err != nil { 1062 return fmt.Errorf("decoding %s: %v", url, err) 1063 } 1064 } 1065 1066 if vcsByCmd(resp.SCM) != nil { 1067 match["vcs"] = resp.SCM 1068 if resp.SCM == "git" { 1069 match["repo"] += ".git" 1070 } 1071 return nil 1072 } 1073 1074 return fmt.Errorf("unable to detect version control system for bitbucket.org/ path") 1075 } 1076 1077 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case, 1078 // "foo" could be a series name registered in Launchpad with its own branch, 1079 // and it could also be the name of a directory within the main project 1080 // branch one level up. 1081 func launchpadVCS(match map[string]string) error { 1082 if match["project"] == "" || match["series"] == "" { 1083 return nil 1084 } 1085 _, err := web.Get(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format")) 1086 if err != nil { 1087 match["root"] = expand(match, "launchpad.net/{project}") 1088 match["repo"] = expand(match, "https://{root}") 1089 } 1090 return nil 1091 }