github.com/gernest/nezuko@v0.1.2/internal/get/vcs.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package get 6 7 import ( 8 "encoding/json" 9 "errors" 10 "fmt" 11 "log" 12 "net/url" 13 "os" 14 "os/exec" 15 "path/filepath" 16 "regexp" 17 "strings" 18 "sync" 19 20 "github.com/gernest/nezuko/internal/singleflight" 21 22 "github.com/gernest/nezuko/internal/base" 23 "github.com/gernest/nezuko/internal/cfg" 24 "github.com/gernest/nezuko/internal/web" 25 ) 26 27 var ( 28 Insecure bool 29 ) 30 31 // A vcsCmd describes how to use a version control system 32 // like Mercurial, Git, or Subversion. 33 type vcsCmd struct { 34 name string 35 cmd string // name of binary to invoke command 36 37 createCmd []string // commands to download a fresh copy of a repository 38 downloadCmd []string // commands to download updates into an existing repository 39 40 tagCmd []tagCmd // commands to list tags 41 tagLookupCmd []tagCmd // commands to lookup tags before running tagSyncCmd 42 tagSyncCmd []string // commands to sync to specific tag 43 tagSyncDefault []string // commands to sync to default tag 44 45 scheme []string 46 pingCmd string 47 48 remoteRepo func(v *vcsCmd, rootDir string) (remoteRepo string, err error) 49 resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) 50 } 51 52 var defaultSecureScheme = map[string]bool{ 53 "https": true, 54 "git+ssh": true, 55 "bzr+ssh": true, 56 "svn+ssh": true, 57 "ssh": true, 58 } 59 60 func (v *vcsCmd) isSecure(repo string) bool { 61 u, err := url.Parse(repo) 62 if err != nil { 63 // If repo is not a URL, it's not secure. 64 return false 65 } 66 return v.isSecureScheme(u.Scheme) 67 } 68 69 func (v *vcsCmd) isSecureScheme(scheme string) bool { 70 switch v.cmd { 71 case "git": 72 // GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a 73 // colon-separated list of schemes that are allowed to be used with git 74 // fetch/clone. Any scheme not mentioned will be considered insecure. 75 if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" { 76 for _, s := range strings.Split(allow, ":") { 77 if s == scheme { 78 return true 79 } 80 } 81 return false 82 } 83 } 84 return defaultSecureScheme[scheme] 85 } 86 87 // A tagCmd describes a command to list available tags 88 // that can be passed to tagSyncCmd. 89 type tagCmd struct { 90 cmd string // command to list tags 91 pattern string // regexp to extract tags from list 92 } 93 94 // vcsList lists the known version control systems 95 var vcsList = []*vcsCmd{ 96 vcsHg, 97 vcsGit, 98 vcsSvn, 99 vcsBzr, 100 vcsFossil, 101 } 102 103 // vcsByCmd returns the version control system for the given 104 // command name (hg, git, svn, bzr). 105 func vcsByCmd(cmd string) *vcsCmd { 106 for _, vcs := range vcsList { 107 if vcs.cmd == cmd { 108 return vcs 109 } 110 } 111 return nil 112 } 113 114 // vcsHg describes how to use Mercurial. 115 var vcsHg = &vcsCmd{ 116 name: "Mercurial", 117 cmd: "hg", 118 119 createCmd: []string{"clone -U {repo} {dir}"}, 120 downloadCmd: []string{"pull"}, 121 122 // We allow both tag and branch names as 'tags' 123 // for selecting a version. This lets people have 124 // a go.release.r60 branch and a go1 branch 125 // and make changes in both, without constantly 126 // editing .hgtags. 127 tagCmd: []tagCmd{ 128 {"tags", `^(\S+)`}, 129 {"branches", `^(\S+)`}, 130 }, 131 tagSyncCmd: []string{"update -r {tag}"}, 132 tagSyncDefault: []string{"update default"}, 133 134 scheme: []string{"https", "http", "ssh"}, 135 pingCmd: "identify {scheme}://{repo}", 136 remoteRepo: hgRemoteRepo, 137 } 138 139 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) { 140 out, err := vcsHg.runOutput(rootDir, "paths default") 141 if err != nil { 142 return "", err 143 } 144 return strings.TrimSpace(string(out)), nil 145 } 146 147 // vcsGit describes how to use Git. 148 var vcsGit = &vcsCmd{ 149 name: "Git", 150 cmd: "git", 151 152 createCmd: []string{"clone {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"}, 153 downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"}, 154 155 tagCmd: []tagCmd{ 156 // tags/xxx matches a git tag named xxx 157 // origin/xxx matches a git branch named xxx on the default remote repository 158 {"show-ref", `(?:tags|origin)/(\S+)$`}, 159 }, 160 tagLookupCmd: []tagCmd{ 161 {"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`}, 162 }, 163 tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"}, 164 // both createCmd and downloadCmd update the working dir. 165 // No need to do more here. We used to 'checkout master' 166 // but that doesn't work if the default branch is not named master. 167 // DO NOT add 'checkout master' here. 168 // See golang.org/issue/9032. 169 tagSyncDefault: []string{"submodule update --init --recursive"}, 170 171 scheme: []string{"git", "https", "http", "git+ssh", "ssh"}, 172 pingCmd: "ls-remote {scheme}://{repo}", 173 remoteRepo: gitRemoteRepo, 174 } 175 176 // scpSyntaxRe matches the SCP-like addresses used by Git to access 177 // repositories by SSH. 178 var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 179 180 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) { 181 cmd := "config remote.origin.url" 182 errParse := errors.New("unable to parse output of git " + cmd) 183 errRemoteOriginNotFound := errors.New("remote origin not found") 184 outb, err := vcsGit.run1(rootDir, cmd, nil, false) 185 if err != nil { 186 // if it doesn't output any message, it means the config argument is correct, 187 // but the config value itself doesn't exist 188 if outb != nil && len(outb) == 0 { 189 return "", errRemoteOriginNotFound 190 } 191 return "", err 192 } 193 out := strings.TrimSpace(string(outb)) 194 195 var repoURL *url.URL 196 if m := scpSyntaxRe.FindStringSubmatch(out); m != nil { 197 // Match SCP-like syntax and convert it to a URL. 198 // Eg, "git@github.com:user/repo" becomes 199 // "ssh://git@github.com/user/repo". 200 repoURL = &url.URL{ 201 Scheme: "ssh", 202 User: url.User(m[1]), 203 Host: m[2], 204 Path: m[3], 205 } 206 } else { 207 repoURL, err = url.Parse(out) 208 if err != nil { 209 return "", err 210 } 211 } 212 213 // Iterate over insecure schemes too, because this function simply 214 // reports the state of the repo. If we can't see insecure schemes then 215 // we can't report the actual repo URL. 216 for _, s := range vcsGit.scheme { 217 if repoURL.Scheme == s { 218 return repoURL.String(), nil 219 } 220 } 221 return "", errParse 222 } 223 224 // vcsBzr describes how to use Bazaar. 225 var vcsBzr = &vcsCmd{ 226 name: "Bazaar", 227 cmd: "bzr", 228 229 createCmd: []string{"branch {repo} {dir}"}, 230 231 // Without --overwrite bzr will not pull tags that changed. 232 // Replace by --overwrite-tags after http://pad.lv/681792 goes in. 233 downloadCmd: []string{"pull --overwrite"}, 234 235 tagCmd: []tagCmd{{"tags", `^(\S+)`}}, 236 tagSyncCmd: []string{"update -r {tag}"}, 237 tagSyncDefault: []string{"update -r revno:-1"}, 238 239 scheme: []string{"https", "http", "bzr", "bzr+ssh"}, 240 pingCmd: "info {scheme}://{repo}", 241 remoteRepo: bzrRemoteRepo, 242 resolveRepo: bzrResolveRepo, 243 } 244 245 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) { 246 outb, err := vcsBzr.runOutput(rootDir, "config parent_location") 247 if err != nil { 248 return "", err 249 } 250 return strings.TrimSpace(string(outb)), nil 251 } 252 253 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) { 254 outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo) 255 if err != nil { 256 return "", err 257 } 258 out := string(outb) 259 260 // Expect: 261 // ... 262 // (branch root|repository branch): <URL> 263 // ... 264 265 found := false 266 for _, prefix := range []string{"\n branch root: ", "\n repository branch: "} { 267 i := strings.Index(out, prefix) 268 if i >= 0 { 269 out = out[i+len(prefix):] 270 found = true 271 break 272 } 273 } 274 if !found { 275 return "", fmt.Errorf("unable to parse output of bzr info") 276 } 277 278 i := strings.Index(out, "\n") 279 if i < 0 { 280 return "", fmt.Errorf("unable to parse output of bzr info") 281 } 282 out = out[:i] 283 return strings.TrimSpace(out), nil 284 } 285 286 // vcsSvn describes how to use Subversion. 287 var vcsSvn = &vcsCmd{ 288 name: "Subversion", 289 cmd: "svn", 290 291 createCmd: []string{"checkout {repo} {dir}"}, 292 downloadCmd: []string{"update"}, 293 294 // There is no tag command in subversion. 295 // The branch information is all in the path names. 296 297 scheme: []string{"https", "http", "svn", "svn+ssh"}, 298 pingCmd: "info {scheme}://{repo}", 299 remoteRepo: svnRemoteRepo, 300 } 301 302 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) { 303 outb, err := vcsSvn.runOutput(rootDir, "info") 304 if err != nil { 305 return "", err 306 } 307 out := string(outb) 308 309 // Expect: 310 // 311 // ... 312 // URL: <URL> 313 // ... 314 // 315 // Note that we're not using the Repository Root line, 316 // because svn allows checking out subtrees. 317 // The URL will be the URL of the subtree (what we used with 'svn co') 318 // while the Repository Root may be a much higher parent. 319 i := strings.Index(out, "\nURL: ") 320 if i < 0 { 321 return "", fmt.Errorf("unable to parse output of svn info") 322 } 323 out = out[i+len("\nURL: "):] 324 i = strings.Index(out, "\n") 325 if i < 0 { 326 return "", fmt.Errorf("unable to parse output of svn info") 327 } 328 out = out[:i] 329 return strings.TrimSpace(out), nil 330 } 331 332 // fossilRepoName is the name go get associates with a fossil repository. In the 333 // real world the file can be named anything. 334 const fossilRepoName = ".fossil" 335 336 // vcsFossil describes how to use Fossil (fossil-scm.org) 337 var vcsFossil = &vcsCmd{ 338 name: "Fossil", 339 cmd: "fossil", 340 341 createCmd: []string{"-go-internal-mkdir {dir} clone {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"}, 342 downloadCmd: []string{"up"}, 343 344 tagCmd: []tagCmd{{"tag ls", `(.*)`}}, 345 tagSyncCmd: []string{"up tag:{tag}"}, 346 tagSyncDefault: []string{"up trunk"}, 347 348 scheme: []string{"https", "http"}, 349 remoteRepo: fossilRemoteRepo, 350 } 351 352 func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) { 353 out, err := vcsFossil.runOutput(rootDir, "remote-url") 354 if err != nil { 355 return "", err 356 } 357 return strings.TrimSpace(string(out)), nil 358 } 359 360 func (v *vcsCmd) String() string { 361 return v.name 362 } 363 364 // run runs the command line cmd in the given directory. 365 // keyval is a list of key, value pairs. run expands 366 // instances of {key} in cmd into value, but only after 367 // splitting cmd into individual arguments. 368 // If an error occurs, run prints the command line and the 369 // command's combined stdout+stderr to standard error. 370 // Otherwise run discards the command's output. 371 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error { 372 _, err := v.run1(dir, cmd, keyval, true) 373 return err 374 } 375 376 // runVerboseOnly is like run but only generates error output to standard error in verbose mode. 377 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error { 378 _, err := v.run1(dir, cmd, keyval, false) 379 return err 380 } 381 382 // runOutput is like run but returns the output of the command. 383 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) { 384 return v.run1(dir, cmd, keyval, true) 385 } 386 387 // run1 is the generalized implementation of run and runOutput. 388 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) { 389 m := make(map[string]string) 390 for i := 0; i < len(keyval); i += 2 { 391 m[keyval[i]] = keyval[i+1] 392 } 393 args := strings.Fields(cmdline) 394 for i, arg := range args { 395 args[i] = expand(m, arg) 396 } 397 398 if len(args) >= 2 && args[0] == "-go-internal-mkdir" { 399 var err error 400 if filepath.IsAbs(args[1]) { 401 err = os.Mkdir(args[1], os.ModePerm) 402 } else { 403 err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm) 404 } 405 if err != nil { 406 return nil, err 407 } 408 args = args[2:] 409 } 410 411 if len(args) >= 2 && args[0] == "-go-internal-cd" { 412 if filepath.IsAbs(args[1]) { 413 dir = args[1] 414 } else { 415 dir = filepath.Join(dir, args[1]) 416 } 417 args = args[2:] 418 } 419 420 _, err := exec.LookPath(v.cmd) 421 if err != nil { 422 fmt.Fprintf(os.Stderr, 423 "z: missing %s command. See https://golang.org/s/gogetcmd\n", 424 v.name) 425 return nil, err 426 } 427 428 cmd := exec.Command(v.cmd, args...) 429 cmd.Dir = dir 430 cmd.Env = base.EnvForDir(cmd.Dir, os.Environ()) 431 if cfg.BuildX { 432 fmt.Fprintf(os.Stderr, "cd %s\n", dir) 433 fmt.Fprintf(os.Stderr, "%s %s\n", v.cmd, strings.Join(args, " ")) 434 } 435 out, err := cmd.Output() 436 if err != nil { 437 if verbose || cfg.BuildV { 438 fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " ")) 439 if ee, ok := err.(*exec.ExitError); ok && len(ee.Stderr) > 0 { 440 os.Stderr.Write(ee.Stderr) 441 } else { 442 fmt.Fprintf(os.Stderr, err.Error()) 443 } 444 } 445 } 446 return out, err 447 } 448 449 // ping pings to determine scheme to use. 450 func (v *vcsCmd) ping(scheme, repo string) error { 451 return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo) 452 } 453 454 // create creates a new copy of repo in dir. 455 // The parent of dir must exist; dir must not. 456 func (v *vcsCmd) create(dir, repo string) error { 457 for _, cmd := range v.createCmd { 458 if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil { 459 return err 460 } 461 } 462 return nil 463 } 464 465 // download downloads any new changes for the repo in dir. 466 func (v *vcsCmd) download(dir string) error { 467 for _, cmd := range v.downloadCmd { 468 if err := v.run(dir, cmd); err != nil { 469 return err 470 } 471 } 472 return nil 473 } 474 475 // tags returns the list of available tags for the repo in dir. 476 func (v *vcsCmd) tags(dir string) ([]string, error) { 477 var tags []string 478 for _, tc := range v.tagCmd { 479 out, err := v.runOutput(dir, tc.cmd) 480 if err != nil { 481 return nil, err 482 } 483 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 484 for _, m := range re.FindAllStringSubmatch(string(out), -1) { 485 tags = append(tags, m[1]) 486 } 487 } 488 return tags, nil 489 } 490 491 // tagSync syncs the repo in dir to the named tag, 492 // which either is a tag returned by tags or is v.tagDefault. 493 func (v *vcsCmd) tagSync(dir, tag string) error { 494 if v.tagSyncCmd == nil { 495 return nil 496 } 497 if tag != "" { 498 for _, tc := range v.tagLookupCmd { 499 out, err := v.runOutput(dir, tc.cmd, "tag", tag) 500 if err != nil { 501 return err 502 } 503 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 504 m := re.FindStringSubmatch(string(out)) 505 if len(m) > 1 { 506 tag = m[1] 507 break 508 } 509 } 510 } 511 512 if tag == "" && v.tagSyncDefault != nil { 513 for _, cmd := range v.tagSyncDefault { 514 if err := v.run(dir, cmd); err != nil { 515 return err 516 } 517 } 518 return nil 519 } 520 521 for _, cmd := range v.tagSyncCmd { 522 if err := v.run(dir, cmd, "tag", tag); err != nil { 523 return err 524 } 525 } 526 return nil 527 } 528 529 // A vcsPath describes how to convert an import path into a 530 // version control system and repository name. 531 type vcsPath struct { 532 prefix string // prefix this description applies to 533 re string // pattern for import path 534 repo string // repository to use (expand with match of re) 535 vcs string // version control system to use (expand with match of re) 536 check func(match map[string]string) error // additional checks 537 ping bool // ping for scheme to use to download repo 538 539 regexp *regexp.Regexp // cached compiled form of re 540 } 541 542 // vcsFromDir inspects dir and its parents to determine the 543 // version control system and code repository to use. 544 // On return, root is the import path 545 // corresponding to the root of the repository. 546 func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) { 547 // Clean and double-check that dir is in (a subdirectory of) srcRoot. 548 dir = filepath.Clean(dir) 549 srcRoot = filepath.Clean(srcRoot) 550 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 551 return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 552 } 553 554 var vcsRet *vcsCmd 555 var rootRet string 556 557 origDir := dir 558 for len(dir) > len(srcRoot) { 559 for _, vcs := range vcsList { 560 if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil { 561 root := filepath.ToSlash(dir[len(srcRoot)+1:]) 562 // Record first VCS we find, but keep looking, 563 // to detect mistakes like one kind of VCS inside another. 564 if vcsRet == nil { 565 vcsRet = vcs 566 rootRet = root 567 continue 568 } 569 // Allow .git inside .git, which can arise due to submodules. 570 if vcsRet == vcs && vcs.cmd == "git" { 571 continue 572 } 573 // Otherwise, we have one VCS inside a different VCS. 574 return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s", 575 filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd) 576 } 577 } 578 579 // Move to parent. 580 ndir := filepath.Dir(dir) 581 if len(ndir) >= len(dir) { 582 // Shouldn't happen, but just in case, stop. 583 break 584 } 585 dir = ndir 586 } 587 588 if vcsRet != nil { 589 return vcsRet, rootRet, nil 590 } 591 592 return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir) 593 } 594 595 // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS 596 // situation for dir, checking parents up until srcRoot. 597 func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error { 598 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 599 return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 600 } 601 602 otherDir := dir 603 for len(otherDir) > len(srcRoot) { 604 for _, otherVCS := range vcsList { 605 if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil { 606 // Allow expected vcs in original dir. 607 if otherDir == dir && otherVCS == vcs { 608 continue 609 } 610 // Allow .git inside .git, which can arise due to submodules. 611 if otherVCS == vcs && vcs.cmd == "git" { 612 continue 613 } 614 // Otherwise, we have one VCS inside a different VCS. 615 return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd) 616 } 617 } 618 // Move to parent. 619 newDir := filepath.Dir(otherDir) 620 if len(newDir) >= len(otherDir) { 621 // Shouldn't happen, but just in case, stop. 622 break 623 } 624 otherDir = newDir 625 } 626 627 return nil 628 } 629 630 // RepoRoot describes the repository root for a tree of source code. 631 type RepoRoot struct { 632 Repo string // repository URL, including scheme 633 Root string // import path corresponding to root of repo 634 IsCustom bool // defined by served <meta> tags (as opposed to hard-coded pattern) 635 VCS string // vcs type ("mod", "git", ...) 636 637 vcs *vcsCmd // internal: vcs command access 638 } 639 640 var httpPrefixRE = regexp.MustCompile(`^https?:`) 641 642 // ModuleMode specifies whether to prefer modules when looking up code sources. 643 type ModuleMode int 644 645 const ( 646 IgnoreMod ModuleMode = iota 647 PreferMod 648 ) 649 650 // RepoRootForImportPath analyzes importPath to determine the 651 // version control system, and code repository to use. 652 func RepoRootForImportPath(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) { 653 rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths) 654 if err == errUnknownSite { 655 rr, err = repoRootForImportDynamic(importPath, mod, security) 656 if err != nil { 657 err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err) 658 } 659 } 660 if err != nil { 661 rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic) 662 if err1 == nil { 663 rr = rr1 664 err = nil 665 } 666 } 667 668 // Should have been taken care of above, but make sure. 669 if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.Root, "...") { 670 // Do not allow wildcards in the repo root. 671 rr = nil 672 err = fmt.Errorf("cannot expand ... in %q", importPath) 673 } 674 return rr, err 675 } 676 677 var errUnknownSite = errors.New("dynamic lookup required to find mapping") 678 679 // repoRootFromVCSPaths attempts to map importPath to a repoRoot 680 // using the mappings defined in vcsPaths. 681 // If scheme is non-empty, that scheme is forced. 682 func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) { 683 // A common error is to use https://packagepath because that's what 684 // hg and git require. Diagnose this helpfully. 685 if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil { 686 // The importPath has been cleaned, so has only one slash. The pattern 687 // ignores the slashes; the error message puts them back on the RHS at least. 688 return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//") 689 } 690 for _, srv := range vcsPaths { 691 if !strings.HasPrefix(importPath, srv.prefix) { 692 continue 693 } 694 m := srv.regexp.FindStringSubmatch(importPath) 695 if m == nil { 696 if srv.prefix != "" { 697 return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) 698 } 699 continue 700 } 701 702 // Build map of named subexpression matches for expand. 703 match := map[string]string{ 704 "prefix": srv.prefix, 705 "import": importPath, 706 } 707 for i, name := range srv.regexp.SubexpNames() { 708 if name != "" && match[name] == "" { 709 match[name] = m[i] 710 } 711 } 712 if srv.vcs != "" { 713 match["vcs"] = expand(match, srv.vcs) 714 } 715 if srv.repo != "" { 716 match["repo"] = expand(match, srv.repo) 717 } 718 if srv.check != nil { 719 if err := srv.check(match); err != nil { 720 return nil, err 721 } 722 } 723 vcs := vcsByCmd(match["vcs"]) 724 if vcs == nil { 725 return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) 726 } 727 if srv.ping { 728 if scheme != "" { 729 match["repo"] = scheme + "://" + match["repo"] 730 } else { 731 for _, scheme := range vcs.scheme { 732 if security == web.Secure && !vcs.isSecureScheme(scheme) { 733 continue 734 } 735 if vcs.pingCmd != "" && vcs.ping(scheme, match["repo"]) == nil { 736 match["repo"] = scheme + "://" + match["repo"] 737 goto Found 738 } 739 } 740 // No scheme found. Fall back to the first one. 741 match["repo"] = vcs.scheme[0] + "://" + match["repo"] 742 Found: 743 } 744 } 745 rr := &RepoRoot{ 746 Repo: match["repo"], 747 Root: match["root"], 748 VCS: vcs.cmd, 749 vcs: vcs, 750 } 751 return rr, nil 752 } 753 return nil, errUnknownSite 754 } 755 756 // repoRootForImportDynamic finds a *RepoRoot for a custom domain that's not 757 // statically known by repoRootForImportPathStatic. 758 // 759 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld". 760 func repoRootForImportDynamic(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) { 761 slash := strings.Index(importPath, "/") 762 if slash < 0 { 763 slash = len(importPath) 764 } 765 host := importPath[:slash] 766 if !strings.Contains(host, ".") { 767 return nil, errors.New("import path does not begin with hostname") 768 } 769 urlStr, body, err := web.GetMaybeInsecure(importPath, security) 770 if err != nil { 771 msg := "https fetch: %v" 772 if security == web.Insecure { 773 msg = "http/" + msg 774 } 775 return nil, fmt.Errorf(msg, err) 776 } 777 defer body.Close() 778 imports, err := parseMetaGoImports(body, mod) 779 if err != nil { 780 return nil, fmt.Errorf("parsing %s: %v", importPath, err) 781 } 782 // Find the matched meta import. 783 mmi, err := matchGoImport(imports, importPath) 784 if err != nil { 785 if _, ok := err.(ImportMismatchError); !ok { 786 return nil, fmt.Errorf("parse %s: %v", urlStr, err) 787 } 788 return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", urlStr, err) 789 } 790 if cfg.BuildV { 791 log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr) 792 } 793 // If the import was "uni.edu/bob/project", which said the 794 // prefix was "uni.edu" and the RepoRoot was "evilroot.com", 795 // make sure we don't trust Bob and check out evilroot.com to 796 // "uni.edu" yet (possibly overwriting/preempting another 797 // non-evil student). Instead, first verify the root and see 798 // if it matches Bob's claim. 799 if mmi.Prefix != importPath { 800 if cfg.BuildV { 801 log.Printf("get %q: verifying non-authoritative meta tag", importPath) 802 } 803 urlStr0 := urlStr 804 var imports []metaImport 805 urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, mod, security) 806 if err != nil { 807 return nil, err 808 } 809 metaImport2, err := matchGoImport(imports, importPath) 810 if err != nil || mmi != metaImport2 { 811 return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix) 812 } 813 } 814 815 if err := validateRepoRoot(mmi.RepoRoot); err != nil { 816 return nil, fmt.Errorf("%s: invalid repo root %q: %v", urlStr, mmi.RepoRoot, err) 817 } 818 vcs := vcsByCmd(mmi.VCS) 819 if vcs == nil && mmi.VCS != "mod" { 820 return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS) 821 } 822 823 rr := &RepoRoot{ 824 Repo: mmi.RepoRoot, 825 Root: mmi.Prefix, 826 IsCustom: true, 827 VCS: mmi.VCS, 828 vcs: vcs, 829 } 830 return rr, nil 831 } 832 833 // validateRepoRoot returns an error if repoRoot does not seem to be 834 // a valid URL with scheme. 835 func validateRepoRoot(repoRoot string) error { 836 url, err := url.Parse(repoRoot) 837 if err != nil { 838 return err 839 } 840 if url.Scheme == "" { 841 return errors.New("no scheme") 842 } 843 return nil 844 } 845 846 var fetchGroup singleflight.Group 847 var ( 848 fetchCacheMu sync.Mutex 849 fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix 850 ) 851 852 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag 853 // and returns its HTML discovery URL and the parsed metaImport lines 854 // found on the page. 855 // 856 // The importPath is of the form "golang.org/x/tools". 857 // It is an error if no imports are found. 858 // urlStr will still be valid if err != nil. 859 // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1" 860 func metaImportsForPrefix(importPrefix string, mod ModuleMode, security web.SecurityMode) (urlStr string, imports []metaImport, err error) { 861 setCache := func(res fetchResult) (fetchResult, error) { 862 fetchCacheMu.Lock() 863 defer fetchCacheMu.Unlock() 864 fetchCache[importPrefix] = res 865 return res, nil 866 } 867 868 resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { 869 fetchCacheMu.Lock() 870 if res, ok := fetchCache[importPrefix]; ok { 871 fetchCacheMu.Unlock() 872 return res, nil 873 } 874 fetchCacheMu.Unlock() 875 876 urlStr, body, err := web.GetMaybeInsecure(importPrefix, security) 877 if err != nil { 878 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)}) 879 } 880 imports, err := parseMetaGoImports(body, mod) 881 if err != nil { 882 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)}) 883 } 884 if len(imports) == 0 { 885 err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr) 886 } 887 return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err}) 888 }) 889 res := resi.(fetchResult) 890 return res.urlStr, res.imports, res.err 891 } 892 893 type fetchResult struct { 894 urlStr string // e.g. "https://foo.com/x/bar?go-get=1" 895 imports []metaImport 896 err error 897 } 898 899 // metaImport represents the parsed <meta name="go-import" 900 // content="prefix vcs reporoot" /> tags from HTML files. 901 type metaImport struct { 902 Prefix, VCS, RepoRoot string 903 } 904 905 // pathPrefix reports whether sub is a prefix of s, 906 // only considering entire path components. 907 func pathPrefix(s, sub string) bool { 908 // strings.HasPrefix is necessary but not sufficient. 909 if !strings.HasPrefix(s, sub) { 910 return false 911 } 912 // The remainder after the prefix must either be empty or start with a slash. 913 rem := s[len(sub):] 914 return rem == "" || rem[0] == '/' 915 } 916 917 // A ImportMismatchError is returned where metaImport/s are present 918 // but none match our import path. 919 type ImportMismatchError struct { 920 importPath string 921 mismatches []string // the meta imports that were discarded for not matching our importPath 922 } 923 924 func (m ImportMismatchError) Error() string { 925 formattedStrings := make([]string, len(m.mismatches)) 926 for i, pre := range m.mismatches { 927 formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath) 928 } 929 return strings.Join(formattedStrings, ", ") 930 } 931 932 // matchGoImport returns the metaImport from imports matching importPath. 933 // An error is returned if there are multiple matches. 934 // errNoMatch is returned if none match. 935 func matchGoImport(imports []metaImport, importPath string) (metaImport, error) { 936 match := -1 937 938 errImportMismatch := ImportMismatchError{importPath: importPath} 939 for i, im := range imports { 940 if !pathPrefix(importPath, im.Prefix) { 941 errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix) 942 continue 943 } 944 945 if match >= 0 { 946 if imports[match].VCS == "mod" && im.VCS != "mod" { 947 // All the mod entries precede all the non-mod entries. 948 // We have a mod entry and don't care about the rest, 949 // matching or not. 950 break 951 } 952 return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath) 953 } 954 match = i 955 } 956 957 if match == -1 { 958 return metaImport{}, errImportMismatch 959 } 960 return imports[match], nil 961 } 962 963 // expand rewrites s to replace {k} with match[k] for each key k in match. 964 func expand(match map[string]string, s string) string { 965 // We want to replace each match exactly once, and the result of expansion 966 // must not depend on the iteration order through the map. 967 // A strings.Replacer has exactly the properties we're looking for. 968 oldNew := make([]string, 0, 2*len(match)) 969 for k, v := range match { 970 oldNew = append(oldNew, "{"+k+"}", v) 971 } 972 return strings.NewReplacer(oldNew...).Replace(s) 973 } 974 975 // vcsPaths defines the meaning of import paths referring to 976 // commonly-used VCS hosting sites (github.com/user/dir) 977 // and import paths referring to a fully-qualified importPath 978 // containing a VCS type (foo.com/repo.git/dir) 979 var vcsPaths = []*vcsPath{ 980 // Github 981 { 982 prefix: "github.com/", 983 re: `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`, 984 vcs: "git", 985 repo: "https://{root}", 986 check: noVCSSuffix, 987 }, 988 989 // Bitbucket 990 { 991 prefix: "bitbucket.org/", 992 re: `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 993 repo: "https://{root}", 994 check: bitbucketVCS, 995 }, 996 997 // IBM DevOps Services (JazzHub) 998 { 999 prefix: "hub.jazz.net/git/", 1000 re: `^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 1001 vcs: "git", 1002 repo: "https://{root}", 1003 check: noVCSSuffix, 1004 }, 1005 1006 // Git at Apache 1007 { 1008 prefix: "git.apache.org/", 1009 re: `^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`, 1010 vcs: "git", 1011 repo: "https://{root}", 1012 }, 1013 1014 // Git at OpenStack 1015 { 1016 prefix: "git.openstack.org/", 1017 re: `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`, 1018 vcs: "git", 1019 repo: "https://{root}", 1020 }, 1021 1022 // chiselapp.com for fossil 1023 { 1024 prefix: "chiselapp.com/", 1025 re: `^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`, 1026 vcs: "fossil", 1027 repo: "https://{root}", 1028 }, 1029 1030 // General syntax for any server. 1031 // Must be last. 1032 { 1033 re: `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`, 1034 ping: true, 1035 }, 1036 } 1037 1038 // vcsPathsAfterDynamic gives additional vcsPaths entries 1039 // to try after the dynamic HTML check. 1040 // This gives those sites a chance to introduce <meta> tags 1041 // as part of a graceful transition away from the hard-coded logic. 1042 var vcsPathsAfterDynamic = []*vcsPath{ 1043 // Launchpad. See golang.org/issue/11436. 1044 { 1045 prefix: "launchpad.net/", 1046 re: `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 1047 vcs: "bzr", 1048 repo: "https://{root}", 1049 check: launchpadVCS, 1050 }, 1051 } 1052 1053 func init() { 1054 // fill in cached regexps. 1055 // Doing this eagerly discovers invalid regexp syntax 1056 // without having to run a command that needs that regexp. 1057 for _, srv := range vcsPaths { 1058 srv.regexp = regexp.MustCompile(srv.re) 1059 } 1060 for _, srv := range vcsPathsAfterDynamic { 1061 srv.regexp = regexp.MustCompile(srv.re) 1062 } 1063 } 1064 1065 // noVCSSuffix checks that the repository name does not 1066 // end in .foo for any version control system foo. 1067 // The usual culprit is ".git". 1068 func noVCSSuffix(match map[string]string) error { 1069 repo := match["repo"] 1070 for _, vcs := range vcsList { 1071 if strings.HasSuffix(repo, "."+vcs.cmd) { 1072 return fmt.Errorf("invalid version control suffix in %s path", match["prefix"]) 1073 } 1074 } 1075 return nil 1076 } 1077 1078 // bitbucketVCS determines the version control system for a 1079 // Bitbucket repository, by using the Bitbucket API. 1080 func bitbucketVCS(match map[string]string) error { 1081 if err := noVCSSuffix(match); err != nil { 1082 return err 1083 } 1084 1085 var resp struct { 1086 SCM string `json:"scm"` 1087 } 1088 url := expand(match, "https://api.bitbucket.org/2.0/repositories/{bitname}?fields=scm") 1089 data, err := web.Get(url) 1090 if err != nil { 1091 if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 { 1092 // this may be a private repository. If so, attempt to determine which 1093 // VCS it uses. See issue 5375. 1094 root := match["root"] 1095 for _, vcs := range []string{"git", "hg"} { 1096 if vcsByCmd(vcs).ping("https", root) == nil { 1097 resp.SCM = vcs 1098 break 1099 } 1100 } 1101 } 1102 1103 if resp.SCM == "" { 1104 return err 1105 } 1106 } else { 1107 if err := json.Unmarshal(data, &resp); err != nil { 1108 return fmt.Errorf("decoding %s: %v", url, err) 1109 } 1110 } 1111 1112 if vcsByCmd(resp.SCM) != nil { 1113 match["vcs"] = resp.SCM 1114 if resp.SCM == "git" { 1115 match["repo"] += ".git" 1116 } 1117 return nil 1118 } 1119 1120 return fmt.Errorf("unable to detect version control system for bitbucket.org/ path") 1121 } 1122 1123 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case, 1124 // "foo" could be a series name registered in Launchpad with its own branch, 1125 // and it could also be the name of a directory within the main project 1126 // branch one level up. 1127 func launchpadVCS(match map[string]string) error { 1128 if match["project"] == "" || match["series"] == "" { 1129 return nil 1130 } 1131 _, err := web.Get(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format")) 1132 if err != nil { 1133 match["root"] = expand(match, "launchpad.net/{project}") 1134 match["repo"] = expand(match, "https://{root}") 1135 } 1136 return nil 1137 }