github.com/yukk001/go1.10.8@v0.0.0-20190813125351-6df2d3982e20/src/cmd/go/internal/get/vcs.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package get 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "errors" 11 "fmt" 12 "internal/singleflight" 13 "log" 14 "net/url" 15 "os" 16 "os/exec" 17 "path/filepath" 18 "regexp" 19 "strings" 20 "sync" 21 22 "cmd/go/internal/base" 23 "cmd/go/internal/cfg" 24 "cmd/go/internal/web" 25 ) 26 27 // A vcsCmd describes how to use a version control system 28 // like Mercurial, Git, or Subversion. 29 type vcsCmd struct { 30 name string 31 cmd string // name of binary to invoke command 32 33 createCmd []string // commands to download a fresh copy of a repository 34 downloadCmd []string // commands to download updates into an existing repository 35 36 tagCmd []tagCmd // commands to list tags 37 tagLookupCmd []tagCmd // commands to lookup tags before running tagSyncCmd 38 tagSyncCmd []string // commands to sync to specific tag 39 tagSyncDefault []string // commands to sync to default tag 40 41 scheme []string 42 pingCmd string 43 44 remoteRepo func(v *vcsCmd, rootDir string) (remoteRepo string, err error) 45 resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) 46 } 47 48 var defaultSecureScheme = map[string]bool{ 49 "https": true, 50 "git+ssh": true, 51 "bzr+ssh": true, 52 "svn+ssh": true, 53 "ssh": true, 54 } 55 56 func (v *vcsCmd) isSecure(repo string) bool { 57 u, err := url.Parse(repo) 58 if err != nil { 59 // If repo is not a URL, it's not secure. 60 return false 61 } 62 return v.isSecureScheme(u.Scheme) 63 } 64 65 func (v *vcsCmd) isSecureScheme(scheme string) bool { 66 switch v.cmd { 67 case "git": 68 // GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a 69 // colon-separated list of schemes that are allowed to be used with git 70 // fetch/clone. Any scheme not mentioned will be considered insecure. 71 if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" { 72 for _, s := range strings.Split(allow, ":") { 73 if s == scheme { 74 return true 75 } 76 } 77 return false 78 } 79 } 80 return defaultSecureScheme[scheme] 81 } 82 83 // A tagCmd describes a command to list available tags 84 // that can be passed to tagSyncCmd. 85 type tagCmd struct { 86 cmd string // command to list tags 87 pattern string // regexp to extract tags from list 88 } 89 90 // vcsList lists the known version control systems 91 var vcsList = []*vcsCmd{ 92 vcsHg, 93 vcsGit, 94 vcsSvn, 95 vcsBzr, 96 vcsFossil, 97 } 98 99 // vcsByCmd returns the version control system for the given 100 // command name (hg, git, svn, bzr). 101 func vcsByCmd(cmd string) *vcsCmd { 102 for _, vcs := range vcsList { 103 if vcs.cmd == cmd { 104 return vcs 105 } 106 } 107 return nil 108 } 109 110 // vcsHg describes how to use Mercurial. 111 var vcsHg = &vcsCmd{ 112 name: "Mercurial", 113 cmd: "hg", 114 115 createCmd: []string{"clone -U {repo} {dir}"}, 116 downloadCmd: []string{"pull"}, 117 118 // We allow both tag and branch names as 'tags' 119 // for selecting a version. This lets people have 120 // a go.release.r60 branch and a go1 branch 121 // and make changes in both, without constantly 122 // editing .hgtags. 123 tagCmd: []tagCmd{ 124 {"tags", `^(\S+)`}, 125 {"branches", `^(\S+)`}, 126 }, 127 tagSyncCmd: []string{"update -r {tag}"}, 128 tagSyncDefault: []string{"update default"}, 129 130 scheme: []string{"https", "http", "ssh"}, 131 pingCmd: "identify {scheme}://{repo}", 132 remoteRepo: hgRemoteRepo, 133 } 134 135 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) { 136 out, err := vcsHg.runOutput(rootDir, "paths default") 137 if err != nil { 138 return "", err 139 } 140 return strings.TrimSpace(string(out)), nil 141 } 142 143 // vcsGit describes how to use Git. 144 var vcsGit = &vcsCmd{ 145 name: "Git", 146 cmd: "git", 147 148 createCmd: []string{"clone {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"}, 149 downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"}, 150 151 tagCmd: []tagCmd{ 152 // tags/xxx matches a git tag named xxx 153 // origin/xxx matches a git branch named xxx on the default remote repository 154 {"show-ref", `(?:tags|origin)/(\S+)$`}, 155 }, 156 tagLookupCmd: []tagCmd{ 157 {"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`}, 158 }, 159 tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"}, 160 // both createCmd and downloadCmd update the working dir. 161 // No need to do more here. We used to 'checkout master' 162 // but that doesn't work if the default branch is not named master. 163 // DO NOT add 'checkout master' here. 164 // See golang.org/issue/9032. 165 tagSyncDefault: []string{"submodule update --init --recursive"}, 166 167 scheme: []string{"git", "https", "http", "git+ssh", "ssh"}, 168 pingCmd: "ls-remote {scheme}://{repo}", 169 remoteRepo: gitRemoteRepo, 170 } 171 172 // scpSyntaxRe matches the SCP-like addresses used by Git to access 173 // repositories by SSH. 174 var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 175 176 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) { 177 cmd := "config remote.origin.url" 178 errParse := errors.New("unable to parse output of git " + cmd) 179 errRemoteOriginNotFound := errors.New("remote origin not found") 180 outb, err := vcsGit.run1(rootDir, cmd, nil, false) 181 if err != nil { 182 // if it doesn't output any message, it means the config argument is correct, 183 // but the config value itself doesn't exist 184 if outb != nil && len(outb) == 0 { 185 return "", errRemoteOriginNotFound 186 } 187 return "", err 188 } 189 out := strings.TrimSpace(string(outb)) 190 191 var repoURL *url.URL 192 if m := scpSyntaxRe.FindStringSubmatch(out); m != nil { 193 // Match SCP-like syntax and convert it to a URL. 194 // Eg, "git@github.com:user/repo" becomes 195 // "ssh://git@github.com/user/repo". 196 repoURL = &url.URL{ 197 Scheme: "ssh", 198 User: url.User(m[1]), 199 Host: m[2], 200 Path: m[3], 201 } 202 } else { 203 repoURL, err = url.Parse(out) 204 if err != nil { 205 return "", err 206 } 207 } 208 209 // Iterate over insecure schemes too, because this function simply 210 // reports the state of the repo. If we can't see insecure schemes then 211 // we can't report the actual repo URL. 212 for _, s := range vcsGit.scheme { 213 if repoURL.Scheme == s { 214 return repoURL.String(), nil 215 } 216 } 217 return "", errParse 218 } 219 220 // vcsBzr describes how to use Bazaar. 221 var vcsBzr = &vcsCmd{ 222 name: "Bazaar", 223 cmd: "bzr", 224 225 createCmd: []string{"branch {repo} {dir}"}, 226 227 // Without --overwrite bzr will not pull tags that changed. 228 // Replace by --overwrite-tags after http://pad.lv/681792 goes in. 229 downloadCmd: []string{"pull --overwrite"}, 230 231 tagCmd: []tagCmd{{"tags", `^(\S+)`}}, 232 tagSyncCmd: []string{"update -r {tag}"}, 233 tagSyncDefault: []string{"update -r revno:-1"}, 234 235 scheme: []string{"https", "http", "bzr", "bzr+ssh"}, 236 pingCmd: "info {scheme}://{repo}", 237 remoteRepo: bzrRemoteRepo, 238 resolveRepo: bzrResolveRepo, 239 } 240 241 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) { 242 outb, err := vcsBzr.runOutput(rootDir, "config parent_location") 243 if err != nil { 244 return "", err 245 } 246 return strings.TrimSpace(string(outb)), nil 247 } 248 249 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) { 250 outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo) 251 if err != nil { 252 return "", err 253 } 254 out := string(outb) 255 256 // Expect: 257 // ... 258 // (branch root|repository branch): <URL> 259 // ... 260 261 found := false 262 for _, prefix := range []string{"\n branch root: ", "\n repository branch: "} { 263 i := strings.Index(out, prefix) 264 if i >= 0 { 265 out = out[i+len(prefix):] 266 found = true 267 break 268 } 269 } 270 if !found { 271 return "", fmt.Errorf("unable to parse output of bzr info") 272 } 273 274 i := strings.Index(out, "\n") 275 if i < 0 { 276 return "", fmt.Errorf("unable to parse output of bzr info") 277 } 278 out = out[:i] 279 return strings.TrimSpace(out), nil 280 } 281 282 // vcsSvn describes how to use Subversion. 283 var vcsSvn = &vcsCmd{ 284 name: "Subversion", 285 cmd: "svn", 286 287 createCmd: []string{"checkout {repo} {dir}"}, 288 downloadCmd: []string{"update"}, 289 290 // There is no tag command in subversion. 291 // The branch information is all in the path names. 292 293 scheme: []string{"https", "http", "svn", "svn+ssh"}, 294 pingCmd: "info {scheme}://{repo}", 295 remoteRepo: svnRemoteRepo, 296 } 297 298 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) { 299 outb, err := vcsSvn.runOutput(rootDir, "info") 300 if err != nil { 301 return "", err 302 } 303 out := string(outb) 304 305 // Expect: 306 // 307 // ... 308 // URL: <URL> 309 // ... 310 // 311 // Note that we're not using the Repository Root line, 312 // because svn allows checking out subtrees. 313 // The URL will be the URL of the subtree (what we used with 'svn co') 314 // while the Repository Root may be a much higher parent. 315 i := strings.Index(out, "\nURL: ") 316 if i < 0 { 317 return "", fmt.Errorf("unable to parse output of svn info") 318 } 319 out = out[i+len("\nURL: "):] 320 i = strings.Index(out, "\n") 321 if i < 0 { 322 return "", fmt.Errorf("unable to parse output of svn info") 323 } 324 out = out[:i] 325 return strings.TrimSpace(out), nil 326 } 327 328 // fossilRepoName is the name go get associates with a fossil repository. In the 329 // real world the file can be named anything. 330 const fossilRepoName = ".fossil" 331 332 // vcsFossil describes how to use Fossil (fossil-scm.org) 333 var vcsFossil = &vcsCmd{ 334 name: "Fossil", 335 cmd: "fossil", 336 337 createCmd: []string{"-go-internal-mkdir {dir} clone {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"}, 338 downloadCmd: []string{"up"}, 339 340 tagCmd: []tagCmd{{"tag ls", `(.*)`}}, 341 tagSyncCmd: []string{"up tag:{tag}"}, 342 tagSyncDefault: []string{"up trunk"}, 343 344 scheme: []string{"https", "http"}, 345 remoteRepo: fossilRemoteRepo, 346 } 347 348 func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) { 349 out, err := vcsFossil.runOutput(rootDir, "remote-url") 350 if err != nil { 351 return "", err 352 } 353 return strings.TrimSpace(string(out)), nil 354 } 355 356 func (v *vcsCmd) String() string { 357 return v.name 358 } 359 360 // run runs the command line cmd in the given directory. 361 // keyval is a list of key, value pairs. run expands 362 // instances of {key} in cmd into value, but only after 363 // splitting cmd into individual arguments. 364 // If an error occurs, run prints the command line and the 365 // command's combined stdout+stderr to standard error. 366 // Otherwise run discards the command's output. 367 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error { 368 _, err := v.run1(dir, cmd, keyval, true) 369 return err 370 } 371 372 // runVerboseOnly is like run but only generates error output to standard error in verbose mode. 373 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error { 374 _, err := v.run1(dir, cmd, keyval, false) 375 return err 376 } 377 378 // runOutput is like run but returns the output of the command. 379 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) { 380 return v.run1(dir, cmd, keyval, true) 381 } 382 383 // run1 is the generalized implementation of run and runOutput. 384 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) { 385 m := make(map[string]string) 386 for i := 0; i < len(keyval); i += 2 { 387 m[keyval[i]] = keyval[i+1] 388 } 389 args := strings.Fields(cmdline) 390 for i, arg := range args { 391 args[i] = expand(m, arg) 392 } 393 394 if len(args) >= 2 && args[0] == "-go-internal-mkdir" { 395 var err error 396 if filepath.IsAbs(args[1]) { 397 err = os.Mkdir(args[1], os.ModePerm) 398 } else { 399 err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm) 400 } 401 if err != nil { 402 return nil, err 403 } 404 args = args[2:] 405 } 406 407 if len(args) >= 2 && args[0] == "-go-internal-cd" { 408 if filepath.IsAbs(args[1]) { 409 dir = args[1] 410 } else { 411 dir = filepath.Join(dir, args[1]) 412 } 413 args = args[2:] 414 } 415 416 _, err := exec.LookPath(v.cmd) 417 if err != nil { 418 fmt.Fprintf(os.Stderr, 419 "go: missing %s command. See https://golang.org/s/gogetcmd\n", 420 v.name) 421 return nil, err 422 } 423 424 cmd := exec.Command(v.cmd, args...) 425 cmd.Dir = dir 426 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 427 if cfg.BuildX { 428 fmt.Printf("cd %s\n", dir) 429 fmt.Printf("%s %s\n", v.cmd, strings.Join(args, " ")) 430 } 431 var buf bytes.Buffer 432 cmd.Stdout = &buf 433 cmd.Stderr = &buf 434 err = cmd.Run() 435 out := buf.Bytes() 436 if err != nil { 437 if verbose || cfg.BuildV { 438 fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " ")) 439 os.Stderr.Write(out) 440 } 441 return out, err 442 } 443 return out, nil 444 } 445 446 // ping pings to determine scheme to use. 447 func (v *vcsCmd) ping(scheme, repo string) error { 448 return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo) 449 } 450 451 // create creates a new copy of repo in dir. 452 // The parent of dir must exist; dir must not. 453 func (v *vcsCmd) create(dir, repo string) error { 454 for _, cmd := range v.createCmd { 455 if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil { 456 return err 457 } 458 } 459 return nil 460 } 461 462 // download downloads any new changes for the repo in dir. 463 func (v *vcsCmd) download(dir string) error { 464 for _, cmd := range v.downloadCmd { 465 if err := v.run(dir, cmd); err != nil { 466 return err 467 } 468 } 469 return nil 470 } 471 472 // tags returns the list of available tags for the repo in dir. 473 func (v *vcsCmd) tags(dir string) ([]string, error) { 474 var tags []string 475 for _, tc := range v.tagCmd { 476 out, err := v.runOutput(dir, tc.cmd) 477 if err != nil { 478 return nil, err 479 } 480 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 481 for _, m := range re.FindAllStringSubmatch(string(out), -1) { 482 tags = append(tags, m[1]) 483 } 484 } 485 return tags, nil 486 } 487 488 // tagSync syncs the repo in dir to the named tag, 489 // which either is a tag returned by tags or is v.tagDefault. 490 func (v *vcsCmd) tagSync(dir, tag string) error { 491 if v.tagSyncCmd == nil { 492 return nil 493 } 494 if tag != "" { 495 for _, tc := range v.tagLookupCmd { 496 out, err := v.runOutput(dir, tc.cmd, "tag", tag) 497 if err != nil { 498 return err 499 } 500 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 501 m := re.FindStringSubmatch(string(out)) 502 if len(m) > 1 { 503 tag = m[1] 504 break 505 } 506 } 507 } 508 509 if tag == "" && v.tagSyncDefault != nil { 510 for _, cmd := range v.tagSyncDefault { 511 if err := v.run(dir, cmd); err != nil { 512 return err 513 } 514 } 515 return nil 516 } 517 518 for _, cmd := range v.tagSyncCmd { 519 if err := v.run(dir, cmd, "tag", tag); err != nil { 520 return err 521 } 522 } 523 return nil 524 } 525 526 // A vcsPath describes how to convert an import path into a 527 // version control system and repository name. 528 type vcsPath struct { 529 prefix string // prefix this description applies to 530 re string // pattern for import path 531 repo string // repository to use (expand with match of re) 532 vcs string // version control system to use (expand with match of re) 533 check func(match map[string]string) error // additional checks 534 ping bool // ping for scheme to use to download repo 535 536 regexp *regexp.Regexp // cached compiled form of re 537 } 538 539 // vcsFromDir inspects dir and its parents to determine the 540 // version control system and code repository to use. 541 // On return, root is the import path 542 // corresponding to the root of the repository. 543 func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) { 544 // Clean and double-check that dir is in (a subdirectory of) srcRoot. 545 dir = filepath.Clean(dir) 546 srcRoot = filepath.Clean(srcRoot) 547 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 548 return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 549 } 550 551 var vcsRet *vcsCmd 552 var rootRet string 553 554 origDir := dir 555 for len(dir) > len(srcRoot) { 556 for _, vcs := range vcsList { 557 if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil { 558 root := filepath.ToSlash(dir[len(srcRoot)+1:]) 559 // Record first VCS we find, but keep looking, 560 // to detect mistakes like one kind of VCS inside another. 561 if vcsRet == nil { 562 vcsRet = vcs 563 rootRet = root 564 continue 565 } 566 // Allow .git inside .git, which can arise due to submodules. 567 if vcsRet == vcs && vcs.cmd == "git" { 568 continue 569 } 570 // Otherwise, we have one VCS inside a different VCS. 571 return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s", 572 filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd) 573 } 574 } 575 576 // Move to parent. 577 ndir := filepath.Dir(dir) 578 if len(ndir) >= len(dir) { 579 // Shouldn't happen, but just in case, stop. 580 break 581 } 582 dir = ndir 583 } 584 585 if vcsRet != nil { 586 return vcsRet, rootRet, nil 587 } 588 589 return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir) 590 } 591 592 // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS 593 // situation for dir, checking parents up until srcRoot. 594 func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error { 595 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 596 return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 597 } 598 599 otherDir := dir 600 for len(otherDir) > len(srcRoot) { 601 for _, otherVCS := range vcsList { 602 if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil { 603 // Allow expected vcs in original dir. 604 if otherDir == dir && otherVCS == vcs { 605 continue 606 } 607 // Allow .git inside .git, which can arise due to submodules. 608 if otherVCS == vcs && vcs.cmd == "git" { 609 continue 610 } 611 // Otherwise, we have one VCS inside a different VCS. 612 return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd) 613 } 614 } 615 // Move to parent. 616 newDir := filepath.Dir(otherDir) 617 if len(newDir) >= len(otherDir) { 618 // Shouldn't happen, but just in case, stop. 619 break 620 } 621 otherDir = newDir 622 } 623 624 return nil 625 } 626 627 // repoRoot represents a version control system, a repo, and a root of 628 // where to put it on disk. 629 type repoRoot struct { 630 vcs *vcsCmd 631 632 // repo is the repository URL, including scheme 633 repo string 634 635 // root is the import path corresponding to the root of the 636 // repository 637 root string 638 639 // isCustom is true for custom import paths (those defined by HTML meta tags) 640 isCustom bool 641 } 642 643 var httpPrefixRE = regexp.MustCompile(`^https?:`) 644 645 // repoRootForImportPath analyzes importPath to determine the 646 // version control system, and code repository to use. 647 func repoRootForImportPath(importPath string, security web.SecurityMode) (*repoRoot, error) { 648 rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths) 649 if err == errUnknownSite { 650 rr, err = repoRootForImportDynamic(importPath, security) 651 if err != nil { 652 err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err) 653 } 654 } 655 if err != nil { 656 rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic) 657 if err1 == nil { 658 rr = rr1 659 err = nil 660 } 661 } 662 663 // Should have been taken care of above, but make sure. 664 if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.root, "...") { 665 // Do not allow wildcards in the repo root. 666 rr = nil 667 err = fmt.Errorf("cannot expand ... in %q", importPath) 668 } 669 return rr, err 670 } 671 672 var errUnknownSite = errors.New("dynamic lookup required to find mapping") 673 674 // repoRootFromVCSPaths attempts to map importPath to a repoRoot 675 // using the mappings defined in vcsPaths. 676 // If scheme is non-empty, that scheme is forced. 677 func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*repoRoot, error) { 678 // A common error is to use https://packagepath because that's what 679 // hg and git require. Diagnose this helpfully. 680 if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil { 681 // The importPath has been cleaned, so has only one slash. The pattern 682 // ignores the slashes; the error message puts them back on the RHS at least. 683 return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//") 684 } 685 for _, srv := range vcsPaths { 686 if !strings.HasPrefix(importPath, srv.prefix) { 687 continue 688 } 689 m := srv.regexp.FindStringSubmatch(importPath) 690 if m == nil { 691 if srv.prefix != "" { 692 return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) 693 } 694 continue 695 } 696 697 // Build map of named subexpression matches for expand. 698 match := map[string]string{ 699 "prefix": srv.prefix, 700 "import": importPath, 701 } 702 for i, name := range srv.regexp.SubexpNames() { 703 if name != "" && match[name] == "" { 704 match[name] = m[i] 705 } 706 } 707 if srv.vcs != "" { 708 match["vcs"] = expand(match, srv.vcs) 709 } 710 if srv.repo != "" { 711 match["repo"] = expand(match, srv.repo) 712 } 713 if srv.check != nil { 714 if err := srv.check(match); err != nil { 715 return nil, err 716 } 717 } 718 vcs := vcsByCmd(match["vcs"]) 719 if vcs == nil { 720 return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) 721 } 722 if srv.ping { 723 if scheme != "" { 724 match["repo"] = scheme + "://" + match["repo"] 725 } else { 726 for _, scheme := range vcs.scheme { 727 if security == web.Secure && !vcs.isSecureScheme(scheme) { 728 continue 729 } 730 if vcs.ping(scheme, match["repo"]) == nil { 731 match["repo"] = scheme + "://" + match["repo"] 732 break 733 } 734 } 735 } 736 } 737 rr := &repoRoot{ 738 vcs: vcs, 739 repo: match["repo"], 740 root: match["root"], 741 } 742 return rr, nil 743 } 744 return nil, errUnknownSite 745 } 746 747 // repoRootForImportDynamic finds a *repoRoot for a custom domain that's not 748 // statically known by repoRootForImportPathStatic. 749 // 750 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld". 751 func repoRootForImportDynamic(importPath string, security web.SecurityMode) (*repoRoot, error) { 752 slash := strings.Index(importPath, "/") 753 if slash < 0 { 754 slash = len(importPath) 755 } 756 host := importPath[:slash] 757 if !strings.Contains(host, ".") { 758 return nil, errors.New("import path does not begin with hostname") 759 } 760 urlStr, body, err := web.GetMaybeInsecure(importPath, security) 761 if err != nil { 762 msg := "https fetch: %v" 763 if security == web.Insecure { 764 msg = "http/" + msg 765 } 766 return nil, fmt.Errorf(msg, err) 767 } 768 defer body.Close() 769 imports, err := parseMetaGoImports(body) 770 if err != nil { 771 return nil, fmt.Errorf("parsing %s: %v", importPath, err) 772 } 773 // Find the matched meta import. 774 mmi, err := matchGoImport(imports, importPath) 775 if err != nil { 776 if _, ok := err.(ImportMismatchError); !ok { 777 return nil, fmt.Errorf("parse %s: %v", urlStr, err) 778 } 779 return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", urlStr, err) 780 } 781 if cfg.BuildV { 782 log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr) 783 } 784 // If the import was "uni.edu/bob/project", which said the 785 // prefix was "uni.edu" and the RepoRoot was "evilroot.com", 786 // make sure we don't trust Bob and check out evilroot.com to 787 // "uni.edu" yet (possibly overwriting/preempting another 788 // non-evil student). Instead, first verify the root and see 789 // if it matches Bob's claim. 790 if mmi.Prefix != importPath { 791 if cfg.BuildV { 792 log.Printf("get %q: verifying non-authoritative meta tag", importPath) 793 } 794 urlStr0 := urlStr 795 var imports []metaImport 796 urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, security) 797 if err != nil { 798 return nil, err 799 } 800 metaImport2, err := matchGoImport(imports, importPath) 801 if err != nil || mmi != metaImport2 { 802 return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix) 803 } 804 } 805 806 if err := validateRepoRootScheme(mmi.RepoRoot); err != nil { 807 return nil, fmt.Errorf("%s: invalid repo root %q: %v", urlStr, mmi.RepoRoot, err) 808 } 809 rr := &repoRoot{ 810 vcs: vcsByCmd(mmi.VCS), 811 repo: mmi.RepoRoot, 812 root: mmi.Prefix, 813 isCustom: true, 814 } 815 if rr.vcs == nil { 816 return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS) 817 } 818 return rr, nil 819 } 820 821 // validateRepoRootScheme returns an error if repoRoot does not seem 822 // to have a valid URL scheme. At this point we permit things that 823 // aren't valid URLs, although later, if not using -insecure, we will 824 // restrict repoRoots to be valid URLs. This is only because we've 825 // historically permitted them, and people may depend on that. 826 func validateRepoRootScheme(repoRoot string) error { 827 end := strings.Index(repoRoot, "://") 828 if end <= 0 { 829 return errors.New("no scheme") 830 } 831 832 // RFC 3986 section 3.1. 833 for i := 0; i < end; i++ { 834 c := repoRoot[i] 835 switch { 836 case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': 837 // OK. 838 case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': 839 // OK except at start. 840 if i == 0 { 841 return errors.New("invalid scheme") 842 } 843 default: 844 return errors.New("invalid scheme") 845 } 846 } 847 848 return nil 849 } 850 851 var fetchGroup singleflight.Group 852 var ( 853 fetchCacheMu sync.Mutex 854 fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix 855 ) 856 857 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag 858 // and returns its HTML discovery URL and the parsed metaImport lines 859 // found on the page. 860 // 861 // The importPath is of the form "golang.org/x/tools". 862 // It is an error if no imports are found. 863 // urlStr will still be valid if err != nil. 864 // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1" 865 func metaImportsForPrefix(importPrefix string, security web.SecurityMode) (urlStr string, imports []metaImport, err error) { 866 setCache := func(res fetchResult) (fetchResult, error) { 867 fetchCacheMu.Lock() 868 defer fetchCacheMu.Unlock() 869 fetchCache[importPrefix] = res 870 return res, nil 871 } 872 873 resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { 874 fetchCacheMu.Lock() 875 if res, ok := fetchCache[importPrefix]; ok { 876 fetchCacheMu.Unlock() 877 return res, nil 878 } 879 fetchCacheMu.Unlock() 880 881 urlStr, body, err := web.GetMaybeInsecure(importPrefix, security) 882 if err != nil { 883 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)}) 884 } 885 imports, err := parseMetaGoImports(body) 886 if err != nil { 887 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)}) 888 } 889 if len(imports) == 0 { 890 err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr) 891 } 892 return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err}) 893 }) 894 res := resi.(fetchResult) 895 return res.urlStr, res.imports, res.err 896 } 897 898 type fetchResult struct { 899 urlStr string // e.g. "https://foo.com/x/bar?go-get=1" 900 imports []metaImport 901 err error 902 } 903 904 // metaImport represents the parsed <meta name="go-import" 905 // content="prefix vcs reporoot" /> tags from HTML files. 906 type metaImport struct { 907 Prefix, VCS, RepoRoot string 908 } 909 910 func splitPathHasPrefix(path, prefix []string) bool { 911 if len(path) < len(prefix) { 912 return false 913 } 914 for i, p := range prefix { 915 if path[i] != p { 916 return false 917 } 918 } 919 return true 920 } 921 922 // A ImportMismatchError is returned where metaImport/s are present 923 // but none match our import path. 924 type ImportMismatchError struct { 925 importPath string 926 mismatches []string // the meta imports that were discarded for not matching our importPath 927 } 928 929 func (m ImportMismatchError) Error() string { 930 formattedStrings := make([]string, len(m.mismatches)) 931 for i, pre := range m.mismatches { 932 formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath) 933 } 934 return strings.Join(formattedStrings, ", ") 935 } 936 937 // matchGoImport returns the metaImport from imports matching importPath. 938 // An error is returned if there are multiple matches. 939 // errNoMatch is returned if none match. 940 func matchGoImport(imports []metaImport, importPath string) (metaImport, error) { 941 match := -1 942 imp := strings.Split(importPath, "/") 943 944 errImportMismatch := ImportMismatchError{importPath: importPath} 945 for i, im := range imports { 946 pre := strings.Split(im.Prefix, "/") 947 948 if !splitPathHasPrefix(imp, pre) { 949 errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix) 950 continue 951 } 952 953 if match != -1 { 954 return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath) 955 } 956 match = i 957 } 958 959 if match == -1 { 960 return metaImport{}, errImportMismatch 961 } 962 return imports[match], nil 963 } 964 965 // expand rewrites s to replace {k} with match[k] for each key k in match. 966 func expand(match map[string]string, s string) string { 967 // We want to replace each match exactly once, and the result of expansion 968 // must not depend on the iteration order through the map. 969 // A strings.Replacer has exactly the properties we're looking for. 970 oldNew := make([]string, 0, 2*len(match)) 971 for k, v := range match { 972 oldNew = append(oldNew, "{"+k+"}", v) 973 } 974 return strings.NewReplacer(oldNew...).Replace(s) 975 } 976 977 // vcsPaths defines the meaning of import paths referring to 978 // commonly-used VCS hosting sites (github.com/user/dir) 979 // and import paths referring to a fully-qualified importPath 980 // containing a VCS type (foo.com/repo.git/dir) 981 var vcsPaths = []*vcsPath{ 982 // Github 983 { 984 prefix: "github.com/", 985 re: `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`, 986 vcs: "git", 987 repo: "https://{root}", 988 check: noVCSSuffix, 989 }, 990 991 // Bitbucket 992 { 993 prefix: "bitbucket.org/", 994 re: `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 995 repo: "https://{root}", 996 check: bitbucketVCS, 997 }, 998 999 // IBM DevOps Services (JazzHub) 1000 { 1001 prefix: "hub.jazz.net/git/", 1002 re: `^(?P<root>hub.jazz.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 1003 vcs: "git", 1004 repo: "https://{root}", 1005 check: noVCSSuffix, 1006 }, 1007 1008 // Git at Apache 1009 { 1010 prefix: "git.apache.org/", 1011 re: `^(?P<root>git.apache.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`, 1012 vcs: "git", 1013 repo: "https://{root}", 1014 }, 1015 1016 // Git at OpenStack 1017 { 1018 prefix: "git.openstack.org/", 1019 re: `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`, 1020 vcs: "git", 1021 repo: "https://{root}", 1022 }, 1023 1024 // chiselapp.com for fossil 1025 { 1026 prefix: "chiselapp.com/", 1027 re: `^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`, 1028 vcs: "fossil", 1029 repo: "https://{root}", 1030 }, 1031 1032 // General syntax for any server. 1033 // Must be last. 1034 { 1035 re: `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`, 1036 ping: true, 1037 }, 1038 } 1039 1040 // vcsPathsAfterDynamic gives additional vcsPaths entries 1041 // to try after the dynamic HTML check. 1042 // This gives those sites a chance to introduce <meta> tags 1043 // as part of a graceful transition away from the hard-coded logic. 1044 var vcsPathsAfterDynamic = []*vcsPath{ 1045 // Launchpad. See golang.org/issue/11436. 1046 { 1047 prefix: "launchpad.net/", 1048 re: `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 1049 vcs: "bzr", 1050 repo: "https://{root}", 1051 check: launchpadVCS, 1052 }, 1053 } 1054 1055 func init() { 1056 // fill in cached regexps. 1057 // Doing this eagerly discovers invalid regexp syntax 1058 // without having to run a command that needs that regexp. 1059 for _, srv := range vcsPaths { 1060 srv.regexp = regexp.MustCompile(srv.re) 1061 } 1062 for _, srv := range vcsPathsAfterDynamic { 1063 srv.regexp = regexp.MustCompile(srv.re) 1064 } 1065 } 1066 1067 // noVCSSuffix checks that the repository name does not 1068 // end in .foo for any version control system foo. 1069 // The usual culprit is ".git". 1070 func noVCSSuffix(match map[string]string) error { 1071 repo := match["repo"] 1072 for _, vcs := range vcsList { 1073 if strings.HasSuffix(repo, "."+vcs.cmd) { 1074 return fmt.Errorf("invalid version control suffix in %s path", match["prefix"]) 1075 } 1076 } 1077 return nil 1078 } 1079 1080 // bitbucketVCS determines the version control system for a 1081 // Bitbucket repository, by using the Bitbucket API. 1082 func bitbucketVCS(match map[string]string) error { 1083 if err := noVCSSuffix(match); err != nil { 1084 return err 1085 } 1086 1087 var resp struct { 1088 SCM string `json:"scm"` 1089 } 1090 url := expand(match, "https://api.bitbucket.org/2.0/repositories/{bitname}?fields=scm") 1091 data, err := web.Get(url) 1092 if err != nil { 1093 if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 { 1094 // this may be a private repository. If so, attempt to determine which 1095 // VCS it uses. See issue 5375. 1096 root := match["root"] 1097 for _, vcs := range []string{"git", "hg"} { 1098 if vcsByCmd(vcs).ping("https", root) == nil { 1099 resp.SCM = vcs 1100 break 1101 } 1102 } 1103 } 1104 1105 if resp.SCM == "" { 1106 return err 1107 } 1108 } else { 1109 if err := json.Unmarshal(data, &resp); err != nil { 1110 return fmt.Errorf("decoding %s: %v", url, err) 1111 } 1112 } 1113 1114 if vcsByCmd(resp.SCM) != nil { 1115 match["vcs"] = resp.SCM 1116 if resp.SCM == "git" { 1117 match["repo"] += ".git" 1118 } 1119 return nil 1120 } 1121 1122 return fmt.Errorf("unable to detect version control system for bitbucket.org/ path") 1123 } 1124 1125 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case, 1126 // "foo" could be a series name registered in Launchpad with its own branch, 1127 // and it could also be the name of a directory within the main project 1128 // branch one level up. 1129 func launchpadVCS(match map[string]string) error { 1130 if match["project"] == "" || match["series"] == "" { 1131 return nil 1132 } 1133 _, err := web.Get(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format")) 1134 if err != nil { 1135 match["root"] = expand(match, "launchpad.net/{project}") 1136 match["repo"] = expand(match, "https://{root}") 1137 } 1138 return nil 1139 }