github.com/mdempsky/go@v0.0.0-20151201204031-5dd372bd1e70/src/cmd/go/vcs.go (about) 1 // Copyright 2012 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "bytes" 9 "encoding/json" 10 "errors" 11 "fmt" 12 "internal/singleflight" 13 "log" 14 "net/url" 15 "os" 16 "os/exec" 17 "path/filepath" 18 "regexp" 19 "strings" 20 "sync" 21 ) 22 23 // A vcsCmd describes how to use a version control system 24 // like Mercurial, Git, or Subversion. 25 type vcsCmd struct { 26 name string 27 cmd string // name of binary to invoke command 28 29 createCmd []string // commands to download a fresh copy of a repository 30 downloadCmd []string // commands to download updates into an existing repository 31 32 tagCmd []tagCmd // commands to list tags 33 tagLookupCmd []tagCmd // commands to lookup tags before running tagSyncCmd 34 tagSyncCmd []string // commands to sync to specific tag 35 tagSyncDefault []string // commands to sync to default tag 36 37 scheme []string 38 pingCmd string 39 40 remoteRepo func(v *vcsCmd, rootDir string) (remoteRepo string, err error) 41 resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) 42 } 43 44 var isSecureScheme = map[string]bool{ 45 "https": true, 46 "git+ssh": true, 47 "bzr+ssh": true, 48 "svn+ssh": true, 49 "ssh": true, 50 } 51 52 func (v *vcsCmd) isSecure(repo string) bool { 53 u, err := url.Parse(repo) 54 if err != nil { 55 // If repo is not a URL, it's not secure. 56 return false 57 } 58 return isSecureScheme[u.Scheme] 59 } 60 61 // A tagCmd describes a command to list available tags 62 // that can be passed to tagSyncCmd. 63 type tagCmd struct { 64 cmd string // command to list tags 65 pattern string // regexp to extract tags from list 66 } 67 68 // vcsList lists the known version control systems 69 var vcsList = []*vcsCmd{ 70 vcsHg, 71 vcsGit, 72 vcsSvn, 73 vcsBzr, 74 } 75 76 // vcsByCmd returns the version control system for the given 77 // command name (hg, git, svn, bzr). 78 func vcsByCmd(cmd string) *vcsCmd { 79 for _, vcs := range vcsList { 80 if vcs.cmd == cmd { 81 return vcs 82 } 83 } 84 return nil 85 } 86 87 // vcsHg describes how to use Mercurial. 88 var vcsHg = &vcsCmd{ 89 name: "Mercurial", 90 cmd: "hg", 91 92 createCmd: []string{"clone -U {repo} {dir}"}, 93 downloadCmd: []string{"pull"}, 94 95 // We allow both tag and branch names as 'tags' 96 // for selecting a version. This lets people have 97 // a go.release.r60 branch and a go1 branch 98 // and make changes in both, without constantly 99 // editing .hgtags. 100 tagCmd: []tagCmd{ 101 {"tags", `^(\S+)`}, 102 {"branches", `^(\S+)`}, 103 }, 104 tagSyncCmd: []string{"update -r {tag}"}, 105 tagSyncDefault: []string{"update default"}, 106 107 scheme: []string{"https", "http", "ssh"}, 108 pingCmd: "identify {scheme}://{repo}", 109 remoteRepo: hgRemoteRepo, 110 } 111 112 func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) { 113 out, err := vcsHg.runOutput(rootDir, "paths default") 114 if err != nil { 115 return "", err 116 } 117 return strings.TrimSpace(string(out)), nil 118 } 119 120 // vcsGit describes how to use Git. 121 var vcsGit = &vcsCmd{ 122 name: "Git", 123 cmd: "git", 124 125 createCmd: []string{"clone {repo} {dir}", "--git-dir={dir}/.git submodule update --init --recursive"}, 126 downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"}, 127 128 tagCmd: []tagCmd{ 129 // tags/xxx matches a git tag named xxx 130 // origin/xxx matches a git branch named xxx on the default remote repository 131 {"show-ref", `(?:tags|origin)/(\S+)$`}, 132 }, 133 tagLookupCmd: []tagCmd{ 134 {"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`}, 135 }, 136 tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"}, 137 // both createCmd and downloadCmd update the working dir. 138 // No need to do more here. We used to 'checkout master' 139 // but that doesn't work if the default branch is not named master. 140 // See golang.org/issue/9032. 141 tagSyncDefault: []string{"checkout master", "submodule update --init --recursive"}, 142 143 scheme: []string{"git", "https", "http", "git+ssh", "ssh"}, 144 pingCmd: "ls-remote {scheme}://{repo}", 145 remoteRepo: gitRemoteRepo, 146 } 147 148 // scpSyntaxRe matches the SCP-like addresses used by Git to access 149 // repositories by SSH. 150 var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 151 152 func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) { 153 cmd := "config remote.origin.url" 154 errParse := errors.New("unable to parse output of git " + cmd) 155 errRemoteOriginNotFound := errors.New("remote origin not found") 156 outb, err := vcsGit.run1(rootDir, cmd, nil, false) 157 if err != nil { 158 // if it doesn't output any message, it means the config argument is correct, 159 // but the config value itself doesn't exist 160 if outb != nil && len(outb) == 0 { 161 return "", errRemoteOriginNotFound 162 } 163 return "", err 164 } 165 out := strings.TrimSpace(string(outb)) 166 167 var repoURL *url.URL 168 if m := scpSyntaxRe.FindStringSubmatch(out); m != nil { 169 // Match SCP-like syntax and convert it to a URL. 170 // Eg, "git@github.com:user/repo" becomes 171 // "ssh://git@github.com/user/repo". 172 repoURL = &url.URL{ 173 Scheme: "ssh", 174 User: url.User(m[1]), 175 Host: m[2], 176 RawPath: m[3], 177 } 178 } else { 179 repoURL, err = url.Parse(out) 180 if err != nil { 181 return "", err 182 } 183 } 184 185 // Iterate over insecure schemes too, because this function simply 186 // reports the state of the repo. If we can't see insecure schemes then 187 // we can't report the actual repo URL. 188 for _, s := range vcsGit.scheme { 189 if repoURL.Scheme == s { 190 return repoURL.String(), nil 191 } 192 } 193 return "", errParse 194 } 195 196 // vcsBzr describes how to use Bazaar. 197 var vcsBzr = &vcsCmd{ 198 name: "Bazaar", 199 cmd: "bzr", 200 201 createCmd: []string{"branch {repo} {dir}"}, 202 203 // Without --overwrite bzr will not pull tags that changed. 204 // Replace by --overwrite-tags after http://pad.lv/681792 goes in. 205 downloadCmd: []string{"pull --overwrite"}, 206 207 tagCmd: []tagCmd{{"tags", `^(\S+)`}}, 208 tagSyncCmd: []string{"update -r {tag}"}, 209 tagSyncDefault: []string{"update -r revno:-1"}, 210 211 scheme: []string{"https", "http", "bzr", "bzr+ssh"}, 212 pingCmd: "info {scheme}://{repo}", 213 remoteRepo: bzrRemoteRepo, 214 resolveRepo: bzrResolveRepo, 215 } 216 217 func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) { 218 outb, err := vcsBzr.runOutput(rootDir, "config parent_location") 219 if err != nil { 220 return "", err 221 } 222 return strings.TrimSpace(string(outb)), nil 223 } 224 225 func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) { 226 outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo) 227 if err != nil { 228 return "", err 229 } 230 out := string(outb) 231 232 // Expect: 233 // ... 234 // (branch root|repository branch): <URL> 235 // ... 236 237 found := false 238 for _, prefix := range []string{"\n branch root: ", "\n repository branch: "} { 239 i := strings.Index(out, prefix) 240 if i >= 0 { 241 out = out[i+len(prefix):] 242 found = true 243 break 244 } 245 } 246 if !found { 247 return "", fmt.Errorf("unable to parse output of bzr info") 248 } 249 250 i := strings.Index(out, "\n") 251 if i < 0 { 252 return "", fmt.Errorf("unable to parse output of bzr info") 253 } 254 out = out[:i] 255 return strings.TrimSpace(string(out)), nil 256 } 257 258 // vcsSvn describes how to use Subversion. 259 var vcsSvn = &vcsCmd{ 260 name: "Subversion", 261 cmd: "svn", 262 263 createCmd: []string{"checkout {repo} {dir}"}, 264 downloadCmd: []string{"update"}, 265 266 // There is no tag command in subversion. 267 // The branch information is all in the path names. 268 269 scheme: []string{"https", "http", "svn", "svn+ssh"}, 270 pingCmd: "info {scheme}://{repo}", 271 remoteRepo: svnRemoteRepo, 272 } 273 274 func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) { 275 outb, err := vcsSvn.runOutput(rootDir, "info") 276 if err != nil { 277 return "", err 278 } 279 out := string(outb) 280 281 // Expect: 282 // ... 283 // Repository Root: <URL> 284 // ... 285 286 i := strings.Index(out, "\nRepository Root: ") 287 if i < 0 { 288 return "", fmt.Errorf("unable to parse output of svn info") 289 } 290 out = out[i+len("\nRepository Root: "):] 291 i = strings.Index(out, "\n") 292 if i < 0 { 293 return "", fmt.Errorf("unable to parse output of svn info") 294 } 295 out = out[:i] 296 return strings.TrimSpace(string(out)), nil 297 } 298 299 func (v *vcsCmd) String() string { 300 return v.name 301 } 302 303 // run runs the command line cmd in the given directory. 304 // keyval is a list of key, value pairs. run expands 305 // instances of {key} in cmd into value, but only after 306 // splitting cmd into individual arguments. 307 // If an error occurs, run prints the command line and the 308 // command's combined stdout+stderr to standard error. 309 // Otherwise run discards the command's output. 310 func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error { 311 _, err := v.run1(dir, cmd, keyval, true) 312 return err 313 } 314 315 // runVerboseOnly is like run but only generates error output to standard error in verbose mode. 316 func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error { 317 _, err := v.run1(dir, cmd, keyval, false) 318 return err 319 } 320 321 // runOutput is like run but returns the output of the command. 322 func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) { 323 return v.run1(dir, cmd, keyval, true) 324 } 325 326 // run1 is the generalized implementation of run and runOutput. 327 func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) { 328 m := make(map[string]string) 329 for i := 0; i < len(keyval); i += 2 { 330 m[keyval[i]] = keyval[i+1] 331 } 332 args := strings.Fields(cmdline) 333 for i, arg := range args { 334 args[i] = expand(m, arg) 335 } 336 337 _, err := exec.LookPath(v.cmd) 338 if err != nil { 339 fmt.Fprintf(os.Stderr, 340 "go: missing %s command. See https://golang.org/s/gogetcmd\n", 341 v.name) 342 return nil, err 343 } 344 345 cmd := exec.Command(v.cmd, args...) 346 cmd.Dir = dir 347 cmd.Env = envForDir(cmd.Dir, os.Environ()) 348 if buildX { 349 fmt.Printf("cd %s\n", dir) 350 fmt.Printf("%s %s\n", v.cmd, strings.Join(args, " ")) 351 } 352 var buf bytes.Buffer 353 cmd.Stdout = &buf 354 cmd.Stderr = &buf 355 err = cmd.Run() 356 out := buf.Bytes() 357 if err != nil { 358 if verbose || buildV { 359 fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " ")) 360 os.Stderr.Write(out) 361 } 362 return out, err 363 } 364 return out, nil 365 } 366 367 // ping pings to determine scheme to use. 368 func (v *vcsCmd) ping(scheme, repo string) error { 369 return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo) 370 } 371 372 // create creates a new copy of repo in dir. 373 // The parent of dir must exist; dir must not. 374 func (v *vcsCmd) create(dir, repo string) error { 375 for _, cmd := range v.createCmd { 376 if !go15VendorExperiment && strings.Contains(cmd, "submodule") { 377 continue 378 } 379 if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil { 380 return err 381 } 382 } 383 return nil 384 } 385 386 // download downloads any new changes for the repo in dir. 387 func (v *vcsCmd) download(dir string) error { 388 if err := v.fixDetachedHead(dir); err != nil { 389 return err 390 } 391 for _, cmd := range v.downloadCmd { 392 if !go15VendorExperiment && strings.Contains(cmd, "submodule") { 393 continue 394 } 395 if err := v.run(dir, cmd); err != nil { 396 return err 397 } 398 } 399 return nil 400 } 401 402 // fixDetachedHead switches a Git repository in dir from a detached head to the master branch. 403 // Go versions before 1.2 downloaded Git repositories in an unfortunate way 404 // that resulted in the working tree state being on a detached head. 405 // That meant the repository was not usable for normal Git operations. 406 // Go 1.2 fixed that, but we can't pull into a detached head, so if this is 407 // a Git repository we check for being on a detached head and switch to the 408 // real branch, almost always called "master". 409 // TODO(dsymonds): Consider removing this for Go 1.3. 410 func (v *vcsCmd) fixDetachedHead(dir string) error { 411 if v != vcsGit { 412 return nil 413 } 414 415 // "git symbolic-ref HEAD" succeeds iff we are not on a detached head. 416 if err := v.runVerboseOnly(dir, "symbolic-ref HEAD"); err == nil { 417 // not on a detached head 418 return nil 419 } 420 if buildV { 421 log.Printf("%s on detached head; repairing", dir) 422 } 423 return v.run(dir, "checkout master") 424 } 425 426 // tags returns the list of available tags for the repo in dir. 427 func (v *vcsCmd) tags(dir string) ([]string, error) { 428 var tags []string 429 for _, tc := range v.tagCmd { 430 out, err := v.runOutput(dir, tc.cmd) 431 if err != nil { 432 return nil, err 433 } 434 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 435 for _, m := range re.FindAllStringSubmatch(string(out), -1) { 436 tags = append(tags, m[1]) 437 } 438 } 439 return tags, nil 440 } 441 442 // tagSync syncs the repo in dir to the named tag, 443 // which either is a tag returned by tags or is v.tagDefault. 444 func (v *vcsCmd) tagSync(dir, tag string) error { 445 if v.tagSyncCmd == nil { 446 return nil 447 } 448 if tag != "" { 449 for _, tc := range v.tagLookupCmd { 450 out, err := v.runOutput(dir, tc.cmd, "tag", tag) 451 if err != nil { 452 return err 453 } 454 re := regexp.MustCompile(`(?m-s)` + tc.pattern) 455 m := re.FindStringSubmatch(string(out)) 456 if len(m) > 1 { 457 tag = m[1] 458 break 459 } 460 } 461 } 462 463 if tag == "" && v.tagSyncDefault != nil { 464 for _, cmd := range v.tagSyncDefault { 465 if !go15VendorExperiment && strings.Contains(cmd, "submodule") { 466 continue 467 } 468 if err := v.run(dir, cmd); err != nil { 469 return err 470 } 471 } 472 return nil 473 } 474 475 for _, cmd := range v.tagSyncCmd { 476 if !go15VendorExperiment && strings.Contains(cmd, "submodule") { 477 continue 478 } 479 if err := v.run(dir, cmd, "tag", tag); err != nil { 480 return err 481 } 482 } 483 return nil 484 } 485 486 // A vcsPath describes how to convert an import path into a 487 // version control system and repository name. 488 type vcsPath struct { 489 prefix string // prefix this description applies to 490 re string // pattern for import path 491 repo string // repository to use (expand with match of re) 492 vcs string // version control system to use (expand with match of re) 493 check func(match map[string]string) error // additional checks 494 ping bool // ping for scheme to use to download repo 495 496 regexp *regexp.Regexp // cached compiled form of re 497 } 498 499 // vcsForDir inspects dir and its parents to determine the 500 // version control system and code repository to use. 501 // On return, root is the import path 502 // corresponding to the root of the repository 503 // (thus root is a prefix of importPath). 504 func vcsForDir(p *Package) (vcs *vcsCmd, root string, err error) { 505 // Clean and double-check that dir is in (a subdirectory of) srcRoot. 506 dir := filepath.Clean(p.Dir) 507 srcRoot := filepath.Clean(p.build.SrcRoot) 508 if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator { 509 return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot) 510 } 511 512 origDir := dir 513 for len(dir) > len(srcRoot) { 514 for _, vcs := range vcsList { 515 if fi, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil && fi.IsDir() { 516 return vcs, dir[len(srcRoot)+1:], nil 517 } 518 } 519 520 // Move to parent. 521 ndir := filepath.Dir(dir) 522 if len(ndir) >= len(dir) { 523 // Shouldn't happen, but just in case, stop. 524 break 525 } 526 dir = ndir 527 } 528 529 return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir) 530 } 531 532 // repoRoot represents a version control system, a repo, and a root of 533 // where to put it on disk. 534 type repoRoot struct { 535 vcs *vcsCmd 536 537 // repo is the repository URL, including scheme 538 repo string 539 540 // root is the import path corresponding to the root of the 541 // repository 542 root string 543 } 544 545 var httpPrefixRE = regexp.MustCompile(`^https?:`) 546 547 // securityMode specifies whether a function should make network 548 // calls using insecure transports (eg, plain text HTTP). 549 // The zero value is "secure". 550 type securityMode int 551 552 const ( 553 secure securityMode = iota 554 insecure 555 ) 556 557 // repoRootForImportPath analyzes importPath to determine the 558 // version control system, and code repository to use. 559 func repoRootForImportPath(importPath string, security securityMode) (*repoRoot, error) { 560 rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths) 561 if err == errUnknownSite { 562 // If there are wildcards, look up the thing before the wildcard, 563 // hoping it applies to the wildcarded parts too. 564 // This makes 'go get rsc.io/pdf/...' work in a fresh GOPATH. 565 lookup := strings.TrimSuffix(importPath, "/...") 566 if i := strings.Index(lookup, "/.../"); i >= 0 { 567 lookup = lookup[:i] 568 } 569 rr, err = repoRootForImportDynamic(lookup, security) 570 if err != nil { 571 err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err) 572 } 573 } 574 if err != nil { 575 rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic) 576 if err1 == nil { 577 rr = rr1 578 err = nil 579 } 580 } 581 582 if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.root, "...") { 583 // Do not allow wildcards in the repo root. 584 rr = nil 585 err = fmt.Errorf("cannot expand ... in %q", importPath) 586 } 587 return rr, err 588 } 589 590 var errUnknownSite = errors.New("dynamic lookup required to find mapping") 591 592 // repoRootFromVCSPaths attempts to map importPath to a repoRoot 593 // using the mappings defined in vcsPaths. 594 // If scheme is non-empty, that scheme is forced. 595 func repoRootFromVCSPaths(importPath, scheme string, security securityMode, vcsPaths []*vcsPath) (*repoRoot, error) { 596 // A common error is to use https://packagepath because that's what 597 // hg and git require. Diagnose this helpfully. 598 if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil { 599 // The importPath has been cleaned, so has only one slash. The pattern 600 // ignores the slashes; the error message puts them back on the RHS at least. 601 return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//") 602 } 603 for _, srv := range vcsPaths { 604 if !strings.HasPrefix(importPath, srv.prefix) { 605 continue 606 } 607 m := srv.regexp.FindStringSubmatch(importPath) 608 if m == nil { 609 if srv.prefix != "" { 610 return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath) 611 } 612 continue 613 } 614 615 // Build map of named subexpression matches for expand. 616 match := map[string]string{ 617 "prefix": srv.prefix, 618 "import": importPath, 619 } 620 for i, name := range srv.regexp.SubexpNames() { 621 if name != "" && match[name] == "" { 622 match[name] = m[i] 623 } 624 } 625 if srv.vcs != "" { 626 match["vcs"] = expand(match, srv.vcs) 627 } 628 if srv.repo != "" { 629 match["repo"] = expand(match, srv.repo) 630 } 631 if srv.check != nil { 632 if err := srv.check(match); err != nil { 633 return nil, err 634 } 635 } 636 vcs := vcsByCmd(match["vcs"]) 637 if vcs == nil { 638 return nil, fmt.Errorf("unknown version control system %q", match["vcs"]) 639 } 640 if srv.ping { 641 if scheme != "" { 642 match["repo"] = scheme + "://" + match["repo"] 643 } else { 644 for _, scheme := range vcs.scheme { 645 if security == secure && !isSecureScheme[scheme] { 646 continue 647 } 648 if vcs.ping(scheme, match["repo"]) == nil { 649 match["repo"] = scheme + "://" + match["repo"] 650 break 651 } 652 } 653 } 654 } 655 rr := &repoRoot{ 656 vcs: vcs, 657 repo: match["repo"], 658 root: match["root"], 659 } 660 return rr, nil 661 } 662 return nil, errUnknownSite 663 } 664 665 // repoRootForImportDynamic finds a *repoRoot for a custom domain that's not 666 // statically known by repoRootForImportPathStatic. 667 // 668 // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld". 669 func repoRootForImportDynamic(importPath string, security securityMode) (*repoRoot, error) { 670 slash := strings.Index(importPath, "/") 671 if slash < 0 { 672 slash = len(importPath) 673 } 674 host := importPath[:slash] 675 if !strings.Contains(host, ".") { 676 return nil, errors.New("import path does not begin with hostname") 677 } 678 urlStr, body, err := httpsOrHTTP(importPath, security) 679 if err != nil { 680 msg := "https fetch: %v" 681 if security == insecure { 682 msg = "http/" + msg 683 } 684 return nil, fmt.Errorf(msg, err) 685 } 686 defer body.Close() 687 imports, err := parseMetaGoImports(body) 688 if err != nil { 689 return nil, fmt.Errorf("parsing %s: %v", importPath, err) 690 } 691 // Find the matched meta import. 692 mmi, err := matchGoImport(imports, importPath) 693 if err != nil { 694 if err != errNoMatch { 695 return nil, fmt.Errorf("parse %s: %v", urlStr, err) 696 } 697 return nil, fmt.Errorf("parse %s: no go-import meta tags", urlStr) 698 } 699 if buildV { 700 log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr) 701 } 702 // If the import was "uni.edu/bob/project", which said the 703 // prefix was "uni.edu" and the RepoRoot was "evilroot.com", 704 // make sure we don't trust Bob and check out evilroot.com to 705 // "uni.edu" yet (possibly overwriting/preempting another 706 // non-evil student). Instead, first verify the root and see 707 // if it matches Bob's claim. 708 if mmi.Prefix != importPath { 709 if buildV { 710 log.Printf("get %q: verifying non-authoritative meta tag", importPath) 711 } 712 urlStr0 := urlStr 713 var imports []metaImport 714 urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, security) 715 if err != nil { 716 return nil, err 717 } 718 metaImport2, err := matchGoImport(imports, importPath) 719 if err != nil || mmi != metaImport2 { 720 return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix) 721 } 722 } 723 724 if !strings.Contains(mmi.RepoRoot, "://") { 725 return nil, fmt.Errorf("%s: invalid repo root %q; no scheme", urlStr, mmi.RepoRoot) 726 } 727 rr := &repoRoot{ 728 vcs: vcsByCmd(mmi.VCS), 729 repo: mmi.RepoRoot, 730 root: mmi.Prefix, 731 } 732 if rr.vcs == nil { 733 return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS) 734 } 735 return rr, nil 736 } 737 738 var fetchGroup singleflight.Group 739 var ( 740 fetchCacheMu sync.Mutex 741 fetchCache = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix 742 ) 743 744 // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag 745 // and returns its HTML discovery URL and the parsed metaImport lines 746 // found on the page. 747 // 748 // The importPath is of the form "golang.org/x/tools". 749 // It is an error if no imports are found. 750 // urlStr will still be valid if err != nil. 751 // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1" 752 func metaImportsForPrefix(importPrefix string, security securityMode) (urlStr string, imports []metaImport, err error) { 753 setCache := func(res fetchResult) (fetchResult, error) { 754 fetchCacheMu.Lock() 755 defer fetchCacheMu.Unlock() 756 fetchCache[importPrefix] = res 757 return res, nil 758 } 759 760 resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) { 761 fetchCacheMu.Lock() 762 if res, ok := fetchCache[importPrefix]; ok { 763 fetchCacheMu.Unlock() 764 return res, nil 765 } 766 fetchCacheMu.Unlock() 767 768 urlStr, body, err := httpsOrHTTP(importPrefix, security) 769 if err != nil { 770 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)}) 771 } 772 imports, err := parseMetaGoImports(body) 773 if err != nil { 774 return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)}) 775 } 776 if len(imports) == 0 { 777 err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr) 778 } 779 return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err}) 780 }) 781 res := resi.(fetchResult) 782 return res.urlStr, res.imports, res.err 783 } 784 785 type fetchResult struct { 786 urlStr string // e.g. "https://foo.com/x/bar?go-get=1" 787 imports []metaImport 788 err error 789 } 790 791 // metaImport represents the parsed <meta name="go-import" 792 // content="prefix vcs reporoot" /> tags from HTML files. 793 type metaImport struct { 794 Prefix, VCS, RepoRoot string 795 } 796 797 // errNoMatch is returned from matchGoImport when there's no applicable match. 798 var errNoMatch = errors.New("no import match") 799 800 // matchGoImport returns the metaImport from imports matching importPath. 801 // An error is returned if there are multiple matches. 802 // errNoMatch is returned if none match. 803 func matchGoImport(imports []metaImport, importPath string) (_ metaImport, err error) { 804 match := -1 805 for i, im := range imports { 806 if !strings.HasPrefix(importPath, im.Prefix) { 807 continue 808 } 809 if match != -1 { 810 err = fmt.Errorf("multiple meta tags match import path %q", importPath) 811 return 812 } 813 match = i 814 } 815 if match == -1 { 816 err = errNoMatch 817 return 818 } 819 return imports[match], nil 820 } 821 822 // expand rewrites s to replace {k} with match[k] for each key k in match. 823 func expand(match map[string]string, s string) string { 824 for k, v := range match { 825 s = strings.Replace(s, "{"+k+"}", v, -1) 826 } 827 return s 828 } 829 830 // vcsPaths defines the meaning of import paths referring to 831 // commonly-used VCS hosting sites (github.com/user/dir) 832 // and import paths referring to a fully-qualified importPath 833 // containing a VCS type (foo.com/repo.git/dir) 834 var vcsPaths = []*vcsPath{ 835 // Google Code - new syntax 836 { 837 prefix: "code.google.com/", 838 re: `^(?P<root>code\.google\.com/p/(?P<project>[a-z0-9\-]+)(\.(?P<subrepo>[a-z0-9\-]+))?)(/[A-Za-z0-9_.\-]+)*$`, 839 repo: "https://{root}", 840 check: googleCodeVCS, 841 }, 842 843 // Google Code - old syntax 844 { 845 re: `^(?P<project>[a-z0-9_\-.]+)\.googlecode\.com/(git|hg|svn)(?P<path>/.*)?$`, 846 check: oldGoogleCode, 847 }, 848 849 // Github 850 { 851 prefix: "github.com/", 852 re: `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 853 vcs: "git", 854 repo: "https://{root}", 855 check: noVCSSuffix, 856 }, 857 858 // Bitbucket 859 { 860 prefix: "bitbucket.org/", 861 re: `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 862 repo: "https://{root}", 863 check: bitbucketVCS, 864 }, 865 866 // IBM DevOps Services (JazzHub) 867 { 868 prefix: "hub.jazz.net/git", 869 re: `^(?P<root>hub.jazz.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`, 870 vcs: "git", 871 repo: "https://{root}", 872 check: noVCSSuffix, 873 }, 874 875 // Git at Apache 876 { 877 prefix: "git.apache.org", 878 re: `^(?P<root>git.apache.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`, 879 vcs: "git", 880 repo: "https://{root}", 881 }, 882 883 // General syntax for any server. 884 // Must be last. 885 { 886 re: `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?/[A-Za-z0-9_.\-/]*?)\.(?P<vcs>bzr|git|hg|svn))(/[A-Za-z0-9_.\-]+)*$`, 887 ping: true, 888 }, 889 } 890 891 // vcsPathsAfterDynamic gives additional vcsPaths entries 892 // to try after the dynamic HTML check. 893 // This gives those sites a chance to introduce <meta> tags 894 // as part of a graceful transition away from the hard-coded logic. 895 var vcsPathsAfterDynamic = []*vcsPath{ 896 // Launchpad. See golang.org/issue/11436. 897 { 898 prefix: "launchpad.net/", 899 re: `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`, 900 vcs: "bzr", 901 repo: "https://{root}", 902 check: launchpadVCS, 903 }, 904 } 905 906 func init() { 907 // fill in cached regexps. 908 // Doing this eagerly discovers invalid regexp syntax 909 // without having to run a command that needs that regexp. 910 for _, srv := range vcsPaths { 911 srv.regexp = regexp.MustCompile(srv.re) 912 } 913 for _, srv := range vcsPathsAfterDynamic { 914 srv.regexp = regexp.MustCompile(srv.re) 915 } 916 } 917 918 // noVCSSuffix checks that the repository name does not 919 // end in .foo for any version control system foo. 920 // The usual culprit is ".git". 921 func noVCSSuffix(match map[string]string) error { 922 repo := match["repo"] 923 for _, vcs := range vcsList { 924 if strings.HasSuffix(repo, "."+vcs.cmd) { 925 return fmt.Errorf("invalid version control suffix in %s path", match["prefix"]) 926 } 927 } 928 return nil 929 } 930 931 var googleCheckout = regexp.MustCompile(`id="checkoutcmd">(hg|git|svn)`) 932 933 // googleCodeVCS determines the version control system for 934 // a code.google.com repository, by scraping the project's 935 // /source/checkout page. 936 func googleCodeVCS(match map[string]string) error { 937 if err := noVCSSuffix(match); err != nil { 938 return err 939 } 940 data, err := httpGET(expand(match, "https://code.google.com/p/{project}/source/checkout?repo={subrepo}")) 941 if err != nil { 942 return err 943 } 944 945 if m := googleCheckout.FindSubmatch(data); m != nil { 946 if vcs := vcsByCmd(string(m[1])); vcs != nil { 947 // Subversion requires the old URLs. 948 // TODO: Test. 949 if vcs == vcsSvn { 950 if match["subrepo"] != "" { 951 return fmt.Errorf("sub-repositories not supported in Google Code Subversion projects") 952 } 953 match["repo"] = expand(match, "https://{project}.googlecode.com/svn") 954 } 955 match["vcs"] = vcs.cmd 956 return nil 957 } 958 } 959 960 return fmt.Errorf("unable to detect version control system for code.google.com/ path") 961 } 962 963 // oldGoogleCode is invoked for old-style foo.googlecode.com paths. 964 // It prints an error giving the equivalent new path. 965 func oldGoogleCode(match map[string]string) error { 966 return fmt.Errorf("invalid Google Code import path: use %s instead", 967 expand(match, "code.google.com/p/{project}{path}")) 968 } 969 970 // bitbucketVCS determines the version control system for a 971 // Bitbucket repository, by using the Bitbucket API. 972 func bitbucketVCS(match map[string]string) error { 973 if err := noVCSSuffix(match); err != nil { 974 return err 975 } 976 977 var resp struct { 978 SCM string `json:"scm"` 979 } 980 url := expand(match, "https://api.bitbucket.org/1.0/repositories/{bitname}") 981 data, err := httpGET(url) 982 if err != nil { 983 if httpErr, ok := err.(*httpError); ok && httpErr.statusCode == 403 { 984 // this may be a private repository. If so, attempt to determine which 985 // VCS it uses. See issue 5375. 986 root := match["root"] 987 for _, vcs := range []string{"git", "hg"} { 988 if vcsByCmd(vcs).ping("https", root) == nil { 989 resp.SCM = vcs 990 break 991 } 992 } 993 } 994 995 if resp.SCM == "" { 996 return err 997 } 998 } else { 999 if err := json.Unmarshal(data, &resp); err != nil { 1000 return fmt.Errorf("decoding %s: %v", url, err) 1001 } 1002 } 1003 1004 if vcsByCmd(resp.SCM) != nil { 1005 match["vcs"] = resp.SCM 1006 if resp.SCM == "git" { 1007 match["repo"] += ".git" 1008 } 1009 return nil 1010 } 1011 1012 return fmt.Errorf("unable to detect version control system for bitbucket.org/ path") 1013 } 1014 1015 // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case, 1016 // "foo" could be a series name registered in Launchpad with its own branch, 1017 // and it could also be the name of a directory within the main project 1018 // branch one level up. 1019 func launchpadVCS(match map[string]string) error { 1020 if match["project"] == "" || match["series"] == "" { 1021 return nil 1022 } 1023 _, err := httpGET(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format")) 1024 if err != nil { 1025 match["root"] = expand(match, "launchpad.net/{project}") 1026 match["repo"] = expand(match, "https://{root}") 1027 } 1028 return nil 1029 }