github.com/gernest/nezuko@v0.1.2/internal/get/vcs.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package get
     6  
     7  import (
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"log"
    12  	"net/url"
    13  	"os"
    14  	"os/exec"
    15  	"path/filepath"
    16  	"regexp"
    17  	"strings"
    18  	"sync"
    19  
    20  	"github.com/gernest/nezuko/internal/singleflight"
    21  
    22  	"github.com/gernest/nezuko/internal/base"
    23  	"github.com/gernest/nezuko/internal/cfg"
    24  	"github.com/gernest/nezuko/internal/web"
    25  )
    26  
    27  var (
    28  	Insecure bool
    29  )
    30  
    31  // A vcsCmd describes how to use a version control system
    32  // like Mercurial, Git, or Subversion.
    33  type vcsCmd struct {
    34  	name string
    35  	cmd  string // name of binary to invoke command
    36  
    37  	createCmd   []string // commands to download a fresh copy of a repository
    38  	downloadCmd []string // commands to download updates into an existing repository
    39  
    40  	tagCmd         []tagCmd // commands to list tags
    41  	tagLookupCmd   []tagCmd // commands to lookup tags before running tagSyncCmd
    42  	tagSyncCmd     []string // commands to sync to specific tag
    43  	tagSyncDefault []string // commands to sync to default tag
    44  
    45  	scheme  []string
    46  	pingCmd string
    47  
    48  	remoteRepo  func(v *vcsCmd, rootDir string) (remoteRepo string, err error)
    49  	resolveRepo func(v *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error)
    50  }
    51  
    52  var defaultSecureScheme = map[string]bool{
    53  	"https":   true,
    54  	"git+ssh": true,
    55  	"bzr+ssh": true,
    56  	"svn+ssh": true,
    57  	"ssh":     true,
    58  }
    59  
    60  func (v *vcsCmd) isSecure(repo string) bool {
    61  	u, err := url.Parse(repo)
    62  	if err != nil {
    63  		// If repo is not a URL, it's not secure.
    64  		return false
    65  	}
    66  	return v.isSecureScheme(u.Scheme)
    67  }
    68  
    69  func (v *vcsCmd) isSecureScheme(scheme string) bool {
    70  	switch v.cmd {
    71  	case "git":
    72  		// GIT_ALLOW_PROTOCOL is an environment variable defined by Git. It is a
    73  		// colon-separated list of schemes that are allowed to be used with git
    74  		// fetch/clone. Any scheme not mentioned will be considered insecure.
    75  		if allow := os.Getenv("GIT_ALLOW_PROTOCOL"); allow != "" {
    76  			for _, s := range strings.Split(allow, ":") {
    77  				if s == scheme {
    78  					return true
    79  				}
    80  			}
    81  			return false
    82  		}
    83  	}
    84  	return defaultSecureScheme[scheme]
    85  }
    86  
    87  // A tagCmd describes a command to list available tags
    88  // that can be passed to tagSyncCmd.
    89  type tagCmd struct {
    90  	cmd     string // command to list tags
    91  	pattern string // regexp to extract tags from list
    92  }
    93  
    94  // vcsList lists the known version control systems
    95  var vcsList = []*vcsCmd{
    96  	vcsHg,
    97  	vcsGit,
    98  	vcsSvn,
    99  	vcsBzr,
   100  	vcsFossil,
   101  }
   102  
   103  // vcsByCmd returns the version control system for the given
   104  // command name (hg, git, svn, bzr).
   105  func vcsByCmd(cmd string) *vcsCmd {
   106  	for _, vcs := range vcsList {
   107  		if vcs.cmd == cmd {
   108  			return vcs
   109  		}
   110  	}
   111  	return nil
   112  }
   113  
   114  // vcsHg describes how to use Mercurial.
   115  var vcsHg = &vcsCmd{
   116  	name: "Mercurial",
   117  	cmd:  "hg",
   118  
   119  	createCmd:   []string{"clone -U {repo} {dir}"},
   120  	downloadCmd: []string{"pull"},
   121  
   122  	// We allow both tag and branch names as 'tags'
   123  	// for selecting a version. This lets people have
   124  	// a go.release.r60 branch and a go1 branch
   125  	// and make changes in both, without constantly
   126  	// editing .hgtags.
   127  	tagCmd: []tagCmd{
   128  		{"tags", `^(\S+)`},
   129  		{"branches", `^(\S+)`},
   130  	},
   131  	tagSyncCmd:     []string{"update -r {tag}"},
   132  	tagSyncDefault: []string{"update default"},
   133  
   134  	scheme:     []string{"https", "http", "ssh"},
   135  	pingCmd:    "identify {scheme}://{repo}",
   136  	remoteRepo: hgRemoteRepo,
   137  }
   138  
   139  func hgRemoteRepo(vcsHg *vcsCmd, rootDir string) (remoteRepo string, err error) {
   140  	out, err := vcsHg.runOutput(rootDir, "paths default")
   141  	if err != nil {
   142  		return "", err
   143  	}
   144  	return strings.TrimSpace(string(out)), nil
   145  }
   146  
   147  // vcsGit describes how to use Git.
   148  var vcsGit = &vcsCmd{
   149  	name: "Git",
   150  	cmd:  "git",
   151  
   152  	createCmd:   []string{"clone {repo} {dir}", "-go-internal-cd {dir} submodule update --init --recursive"},
   153  	downloadCmd: []string{"pull --ff-only", "submodule update --init --recursive"},
   154  
   155  	tagCmd: []tagCmd{
   156  		// tags/xxx matches a git tag named xxx
   157  		// origin/xxx matches a git branch named xxx on the default remote repository
   158  		{"show-ref", `(?:tags|origin)/(\S+)$`},
   159  	},
   160  	tagLookupCmd: []tagCmd{
   161  		{"show-ref tags/{tag} origin/{tag}", `((?:tags|origin)/\S+)$`},
   162  	},
   163  	tagSyncCmd: []string{"checkout {tag}", "submodule update --init --recursive"},
   164  	// both createCmd and downloadCmd update the working dir.
   165  	// No need to do more here. We used to 'checkout master'
   166  	// but that doesn't work if the default branch is not named master.
   167  	// DO NOT add 'checkout master' here.
   168  	// See golang.org/issue/9032.
   169  	tagSyncDefault: []string{"submodule update --init --recursive"},
   170  
   171  	scheme:     []string{"git", "https", "http", "git+ssh", "ssh"},
   172  	pingCmd:    "ls-remote {scheme}://{repo}",
   173  	remoteRepo: gitRemoteRepo,
   174  }
   175  
   176  // scpSyntaxRe matches the SCP-like addresses used by Git to access
   177  // repositories by SSH.
   178  var scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
   179  
   180  func gitRemoteRepo(vcsGit *vcsCmd, rootDir string) (remoteRepo string, err error) {
   181  	cmd := "config remote.origin.url"
   182  	errParse := errors.New("unable to parse output of git " + cmd)
   183  	errRemoteOriginNotFound := errors.New("remote origin not found")
   184  	outb, err := vcsGit.run1(rootDir, cmd, nil, false)
   185  	if err != nil {
   186  		// if it doesn't output any message, it means the config argument is correct,
   187  		// but the config value itself doesn't exist
   188  		if outb != nil && len(outb) == 0 {
   189  			return "", errRemoteOriginNotFound
   190  		}
   191  		return "", err
   192  	}
   193  	out := strings.TrimSpace(string(outb))
   194  
   195  	var repoURL *url.URL
   196  	if m := scpSyntaxRe.FindStringSubmatch(out); m != nil {
   197  		// Match SCP-like syntax and convert it to a URL.
   198  		// Eg, "git@github.com:user/repo" becomes
   199  		// "ssh://git@github.com/user/repo".
   200  		repoURL = &url.URL{
   201  			Scheme: "ssh",
   202  			User:   url.User(m[1]),
   203  			Host:   m[2],
   204  			Path:   m[3],
   205  		}
   206  	} else {
   207  		repoURL, err = url.Parse(out)
   208  		if err != nil {
   209  			return "", err
   210  		}
   211  	}
   212  
   213  	// Iterate over insecure schemes too, because this function simply
   214  	// reports the state of the repo. If we can't see insecure schemes then
   215  	// we can't report the actual repo URL.
   216  	for _, s := range vcsGit.scheme {
   217  		if repoURL.Scheme == s {
   218  			return repoURL.String(), nil
   219  		}
   220  	}
   221  	return "", errParse
   222  }
   223  
   224  // vcsBzr describes how to use Bazaar.
   225  var vcsBzr = &vcsCmd{
   226  	name: "Bazaar",
   227  	cmd:  "bzr",
   228  
   229  	createCmd: []string{"branch {repo} {dir}"},
   230  
   231  	// Without --overwrite bzr will not pull tags that changed.
   232  	// Replace by --overwrite-tags after http://pad.lv/681792 goes in.
   233  	downloadCmd: []string{"pull --overwrite"},
   234  
   235  	tagCmd:         []tagCmd{{"tags", `^(\S+)`}},
   236  	tagSyncCmd:     []string{"update -r {tag}"},
   237  	tagSyncDefault: []string{"update -r revno:-1"},
   238  
   239  	scheme:      []string{"https", "http", "bzr", "bzr+ssh"},
   240  	pingCmd:     "info {scheme}://{repo}",
   241  	remoteRepo:  bzrRemoteRepo,
   242  	resolveRepo: bzrResolveRepo,
   243  }
   244  
   245  func bzrRemoteRepo(vcsBzr *vcsCmd, rootDir string) (remoteRepo string, err error) {
   246  	outb, err := vcsBzr.runOutput(rootDir, "config parent_location")
   247  	if err != nil {
   248  		return "", err
   249  	}
   250  	return strings.TrimSpace(string(outb)), nil
   251  }
   252  
   253  func bzrResolveRepo(vcsBzr *vcsCmd, rootDir, remoteRepo string) (realRepo string, err error) {
   254  	outb, err := vcsBzr.runOutput(rootDir, "info "+remoteRepo)
   255  	if err != nil {
   256  		return "", err
   257  	}
   258  	out := string(outb)
   259  
   260  	// Expect:
   261  	// ...
   262  	//   (branch root|repository branch): <URL>
   263  	// ...
   264  
   265  	found := false
   266  	for _, prefix := range []string{"\n  branch root: ", "\n  repository branch: "} {
   267  		i := strings.Index(out, prefix)
   268  		if i >= 0 {
   269  			out = out[i+len(prefix):]
   270  			found = true
   271  			break
   272  		}
   273  	}
   274  	if !found {
   275  		return "", fmt.Errorf("unable to parse output of bzr info")
   276  	}
   277  
   278  	i := strings.Index(out, "\n")
   279  	if i < 0 {
   280  		return "", fmt.Errorf("unable to parse output of bzr info")
   281  	}
   282  	out = out[:i]
   283  	return strings.TrimSpace(out), nil
   284  }
   285  
   286  // vcsSvn describes how to use Subversion.
   287  var vcsSvn = &vcsCmd{
   288  	name: "Subversion",
   289  	cmd:  "svn",
   290  
   291  	createCmd:   []string{"checkout {repo} {dir}"},
   292  	downloadCmd: []string{"update"},
   293  
   294  	// There is no tag command in subversion.
   295  	// The branch information is all in the path names.
   296  
   297  	scheme:     []string{"https", "http", "svn", "svn+ssh"},
   298  	pingCmd:    "info {scheme}://{repo}",
   299  	remoteRepo: svnRemoteRepo,
   300  }
   301  
   302  func svnRemoteRepo(vcsSvn *vcsCmd, rootDir string) (remoteRepo string, err error) {
   303  	outb, err := vcsSvn.runOutput(rootDir, "info")
   304  	if err != nil {
   305  		return "", err
   306  	}
   307  	out := string(outb)
   308  
   309  	// Expect:
   310  	//
   311  	//	 ...
   312  	// 	URL: <URL>
   313  	// 	...
   314  	//
   315  	// Note that we're not using the Repository Root line,
   316  	// because svn allows checking out subtrees.
   317  	// The URL will be the URL of the subtree (what we used with 'svn co')
   318  	// while the Repository Root may be a much higher parent.
   319  	i := strings.Index(out, "\nURL: ")
   320  	if i < 0 {
   321  		return "", fmt.Errorf("unable to parse output of svn info")
   322  	}
   323  	out = out[i+len("\nURL: "):]
   324  	i = strings.Index(out, "\n")
   325  	if i < 0 {
   326  		return "", fmt.Errorf("unable to parse output of svn info")
   327  	}
   328  	out = out[:i]
   329  	return strings.TrimSpace(out), nil
   330  }
   331  
   332  // fossilRepoName is the name go get associates with a fossil repository. In the
   333  // real world the file can be named anything.
   334  const fossilRepoName = ".fossil"
   335  
   336  // vcsFossil describes how to use Fossil (fossil-scm.org)
   337  var vcsFossil = &vcsCmd{
   338  	name: "Fossil",
   339  	cmd:  "fossil",
   340  
   341  	createCmd:   []string{"-go-internal-mkdir {dir} clone {repo} " + filepath.Join("{dir}", fossilRepoName), "-go-internal-cd {dir} open .fossil"},
   342  	downloadCmd: []string{"up"},
   343  
   344  	tagCmd:         []tagCmd{{"tag ls", `(.*)`}},
   345  	tagSyncCmd:     []string{"up tag:{tag}"},
   346  	tagSyncDefault: []string{"up trunk"},
   347  
   348  	scheme:     []string{"https", "http"},
   349  	remoteRepo: fossilRemoteRepo,
   350  }
   351  
   352  func fossilRemoteRepo(vcsFossil *vcsCmd, rootDir string) (remoteRepo string, err error) {
   353  	out, err := vcsFossil.runOutput(rootDir, "remote-url")
   354  	if err != nil {
   355  		return "", err
   356  	}
   357  	return strings.TrimSpace(string(out)), nil
   358  }
   359  
   360  func (v *vcsCmd) String() string {
   361  	return v.name
   362  }
   363  
   364  // run runs the command line cmd in the given directory.
   365  // keyval is a list of key, value pairs. run expands
   366  // instances of {key} in cmd into value, but only after
   367  // splitting cmd into individual arguments.
   368  // If an error occurs, run prints the command line and the
   369  // command's combined stdout+stderr to standard error.
   370  // Otherwise run discards the command's output.
   371  func (v *vcsCmd) run(dir string, cmd string, keyval ...string) error {
   372  	_, err := v.run1(dir, cmd, keyval, true)
   373  	return err
   374  }
   375  
   376  // runVerboseOnly is like run but only generates error output to standard error in verbose mode.
   377  func (v *vcsCmd) runVerboseOnly(dir string, cmd string, keyval ...string) error {
   378  	_, err := v.run1(dir, cmd, keyval, false)
   379  	return err
   380  }
   381  
   382  // runOutput is like run but returns the output of the command.
   383  func (v *vcsCmd) runOutput(dir string, cmd string, keyval ...string) ([]byte, error) {
   384  	return v.run1(dir, cmd, keyval, true)
   385  }
   386  
   387  // run1 is the generalized implementation of run and runOutput.
   388  func (v *vcsCmd) run1(dir string, cmdline string, keyval []string, verbose bool) ([]byte, error) {
   389  	m := make(map[string]string)
   390  	for i := 0; i < len(keyval); i += 2 {
   391  		m[keyval[i]] = keyval[i+1]
   392  	}
   393  	args := strings.Fields(cmdline)
   394  	for i, arg := range args {
   395  		args[i] = expand(m, arg)
   396  	}
   397  
   398  	if len(args) >= 2 && args[0] == "-go-internal-mkdir" {
   399  		var err error
   400  		if filepath.IsAbs(args[1]) {
   401  			err = os.Mkdir(args[1], os.ModePerm)
   402  		} else {
   403  			err = os.Mkdir(filepath.Join(dir, args[1]), os.ModePerm)
   404  		}
   405  		if err != nil {
   406  			return nil, err
   407  		}
   408  		args = args[2:]
   409  	}
   410  
   411  	if len(args) >= 2 && args[0] == "-go-internal-cd" {
   412  		if filepath.IsAbs(args[1]) {
   413  			dir = args[1]
   414  		} else {
   415  			dir = filepath.Join(dir, args[1])
   416  		}
   417  		args = args[2:]
   418  	}
   419  
   420  	_, err := exec.LookPath(v.cmd)
   421  	if err != nil {
   422  		fmt.Fprintf(os.Stderr,
   423  			"z: missing %s command. See https://golang.org/s/gogetcmd\n",
   424  			v.name)
   425  		return nil, err
   426  	}
   427  
   428  	cmd := exec.Command(v.cmd, args...)
   429  	cmd.Dir = dir
   430  	cmd.Env = base.EnvForDir(cmd.Dir, os.Environ())
   431  	if cfg.BuildX {
   432  		fmt.Fprintf(os.Stderr, "cd %s\n", dir)
   433  		fmt.Fprintf(os.Stderr, "%s %s\n", v.cmd, strings.Join(args, " "))
   434  	}
   435  	out, err := cmd.Output()
   436  	if err != nil {
   437  		if verbose || cfg.BuildV {
   438  			fmt.Fprintf(os.Stderr, "# cd %s; %s %s\n", dir, v.cmd, strings.Join(args, " "))
   439  			if ee, ok := err.(*exec.ExitError); ok && len(ee.Stderr) > 0 {
   440  				os.Stderr.Write(ee.Stderr)
   441  			} else {
   442  				fmt.Fprintf(os.Stderr, err.Error())
   443  			}
   444  		}
   445  	}
   446  	return out, err
   447  }
   448  
   449  // ping pings to determine scheme to use.
   450  func (v *vcsCmd) ping(scheme, repo string) error {
   451  	return v.runVerboseOnly(".", v.pingCmd, "scheme", scheme, "repo", repo)
   452  }
   453  
   454  // create creates a new copy of repo in dir.
   455  // The parent of dir must exist; dir must not.
   456  func (v *vcsCmd) create(dir, repo string) error {
   457  	for _, cmd := range v.createCmd {
   458  		if err := v.run(".", cmd, "dir", dir, "repo", repo); err != nil {
   459  			return err
   460  		}
   461  	}
   462  	return nil
   463  }
   464  
   465  // download downloads any new changes for the repo in dir.
   466  func (v *vcsCmd) download(dir string) error {
   467  	for _, cmd := range v.downloadCmd {
   468  		if err := v.run(dir, cmd); err != nil {
   469  			return err
   470  		}
   471  	}
   472  	return nil
   473  }
   474  
   475  // tags returns the list of available tags for the repo in dir.
   476  func (v *vcsCmd) tags(dir string) ([]string, error) {
   477  	var tags []string
   478  	for _, tc := range v.tagCmd {
   479  		out, err := v.runOutput(dir, tc.cmd)
   480  		if err != nil {
   481  			return nil, err
   482  		}
   483  		re := regexp.MustCompile(`(?m-s)` + tc.pattern)
   484  		for _, m := range re.FindAllStringSubmatch(string(out), -1) {
   485  			tags = append(tags, m[1])
   486  		}
   487  	}
   488  	return tags, nil
   489  }
   490  
   491  // tagSync syncs the repo in dir to the named tag,
   492  // which either is a tag returned by tags or is v.tagDefault.
   493  func (v *vcsCmd) tagSync(dir, tag string) error {
   494  	if v.tagSyncCmd == nil {
   495  		return nil
   496  	}
   497  	if tag != "" {
   498  		for _, tc := range v.tagLookupCmd {
   499  			out, err := v.runOutput(dir, tc.cmd, "tag", tag)
   500  			if err != nil {
   501  				return err
   502  			}
   503  			re := regexp.MustCompile(`(?m-s)` + tc.pattern)
   504  			m := re.FindStringSubmatch(string(out))
   505  			if len(m) > 1 {
   506  				tag = m[1]
   507  				break
   508  			}
   509  		}
   510  	}
   511  
   512  	if tag == "" && v.tagSyncDefault != nil {
   513  		for _, cmd := range v.tagSyncDefault {
   514  			if err := v.run(dir, cmd); err != nil {
   515  				return err
   516  			}
   517  		}
   518  		return nil
   519  	}
   520  
   521  	for _, cmd := range v.tagSyncCmd {
   522  		if err := v.run(dir, cmd, "tag", tag); err != nil {
   523  			return err
   524  		}
   525  	}
   526  	return nil
   527  }
   528  
   529  // A vcsPath describes how to convert an import path into a
   530  // version control system and repository name.
   531  type vcsPath struct {
   532  	prefix string                              // prefix this description applies to
   533  	re     string                              // pattern for import path
   534  	repo   string                              // repository to use (expand with match of re)
   535  	vcs    string                              // version control system to use (expand with match of re)
   536  	check  func(match map[string]string) error // additional checks
   537  	ping   bool                                // ping for scheme to use to download repo
   538  
   539  	regexp *regexp.Regexp // cached compiled form of re
   540  }
   541  
   542  // vcsFromDir inspects dir and its parents to determine the
   543  // version control system and code repository to use.
   544  // On return, root is the import path
   545  // corresponding to the root of the repository.
   546  func vcsFromDir(dir, srcRoot string) (vcs *vcsCmd, root string, err error) {
   547  	// Clean and double-check that dir is in (a subdirectory of) srcRoot.
   548  	dir = filepath.Clean(dir)
   549  	srcRoot = filepath.Clean(srcRoot)
   550  	if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
   551  		return nil, "", fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
   552  	}
   553  
   554  	var vcsRet *vcsCmd
   555  	var rootRet string
   556  
   557  	origDir := dir
   558  	for len(dir) > len(srcRoot) {
   559  		for _, vcs := range vcsList {
   560  			if _, err := os.Stat(filepath.Join(dir, "."+vcs.cmd)); err == nil {
   561  				root := filepath.ToSlash(dir[len(srcRoot)+1:])
   562  				// Record first VCS we find, but keep looking,
   563  				// to detect mistakes like one kind of VCS inside another.
   564  				if vcsRet == nil {
   565  					vcsRet = vcs
   566  					rootRet = root
   567  					continue
   568  				}
   569  				// Allow .git inside .git, which can arise due to submodules.
   570  				if vcsRet == vcs && vcs.cmd == "git" {
   571  					continue
   572  				}
   573  				// Otherwise, we have one VCS inside a different VCS.
   574  				return nil, "", fmt.Errorf("directory %q uses %s, but parent %q uses %s",
   575  					filepath.Join(srcRoot, rootRet), vcsRet.cmd, filepath.Join(srcRoot, root), vcs.cmd)
   576  			}
   577  		}
   578  
   579  		// Move to parent.
   580  		ndir := filepath.Dir(dir)
   581  		if len(ndir) >= len(dir) {
   582  			// Shouldn't happen, but just in case, stop.
   583  			break
   584  		}
   585  		dir = ndir
   586  	}
   587  
   588  	if vcsRet != nil {
   589  		return vcsRet, rootRet, nil
   590  	}
   591  
   592  	return nil, "", fmt.Errorf("directory %q is not using a known version control system", origDir)
   593  }
   594  
   595  // checkNestedVCS checks for an incorrectly-nested VCS-inside-VCS
   596  // situation for dir, checking parents up until srcRoot.
   597  func checkNestedVCS(vcs *vcsCmd, dir, srcRoot string) error {
   598  	if len(dir) <= len(srcRoot) || dir[len(srcRoot)] != filepath.Separator {
   599  		return fmt.Errorf("directory %q is outside source root %q", dir, srcRoot)
   600  	}
   601  
   602  	otherDir := dir
   603  	for len(otherDir) > len(srcRoot) {
   604  		for _, otherVCS := range vcsList {
   605  			if _, err := os.Stat(filepath.Join(otherDir, "."+otherVCS.cmd)); err == nil {
   606  				// Allow expected vcs in original dir.
   607  				if otherDir == dir && otherVCS == vcs {
   608  					continue
   609  				}
   610  				// Allow .git inside .git, which can arise due to submodules.
   611  				if otherVCS == vcs && vcs.cmd == "git" {
   612  					continue
   613  				}
   614  				// Otherwise, we have one VCS inside a different VCS.
   615  				return fmt.Errorf("directory %q uses %s, but parent %q uses %s", dir, vcs.cmd, otherDir, otherVCS.cmd)
   616  			}
   617  		}
   618  		// Move to parent.
   619  		newDir := filepath.Dir(otherDir)
   620  		if len(newDir) >= len(otherDir) {
   621  			// Shouldn't happen, but just in case, stop.
   622  			break
   623  		}
   624  		otherDir = newDir
   625  	}
   626  
   627  	return nil
   628  }
   629  
   630  // RepoRoot describes the repository root for a tree of source code.
   631  type RepoRoot struct {
   632  	Repo     string // repository URL, including scheme
   633  	Root     string // import path corresponding to root of repo
   634  	IsCustom bool   // defined by served <meta> tags (as opposed to hard-coded pattern)
   635  	VCS      string // vcs type ("mod", "git", ...)
   636  
   637  	vcs *vcsCmd // internal: vcs command access
   638  }
   639  
   640  var httpPrefixRE = regexp.MustCompile(`^https?:`)
   641  
   642  // ModuleMode specifies whether to prefer modules when looking up code sources.
   643  type ModuleMode int
   644  
   645  const (
   646  	IgnoreMod ModuleMode = iota
   647  	PreferMod
   648  )
   649  
   650  // RepoRootForImportPath analyzes importPath to determine the
   651  // version control system, and code repository to use.
   652  func RepoRootForImportPath(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) {
   653  	rr, err := repoRootFromVCSPaths(importPath, "", security, vcsPaths)
   654  	if err == errUnknownSite {
   655  		rr, err = repoRootForImportDynamic(importPath, mod, security)
   656  		if err != nil {
   657  			err = fmt.Errorf("unrecognized import path %q (%v)", importPath, err)
   658  		}
   659  	}
   660  	if err != nil {
   661  		rr1, err1 := repoRootFromVCSPaths(importPath, "", security, vcsPathsAfterDynamic)
   662  		if err1 == nil {
   663  			rr = rr1
   664  			err = nil
   665  		}
   666  	}
   667  
   668  	// Should have been taken care of above, but make sure.
   669  	if err == nil && strings.Contains(importPath, "...") && strings.Contains(rr.Root, "...") {
   670  		// Do not allow wildcards in the repo root.
   671  		rr = nil
   672  		err = fmt.Errorf("cannot expand ... in %q", importPath)
   673  	}
   674  	return rr, err
   675  }
   676  
   677  var errUnknownSite = errors.New("dynamic lookup required to find mapping")
   678  
   679  // repoRootFromVCSPaths attempts to map importPath to a repoRoot
   680  // using the mappings defined in vcsPaths.
   681  // If scheme is non-empty, that scheme is forced.
   682  func repoRootFromVCSPaths(importPath, scheme string, security web.SecurityMode, vcsPaths []*vcsPath) (*RepoRoot, error) {
   683  	// A common error is to use https://packagepath because that's what
   684  	// hg and git require. Diagnose this helpfully.
   685  	if loc := httpPrefixRE.FindStringIndex(importPath); loc != nil {
   686  		// The importPath has been cleaned, so has only one slash. The pattern
   687  		// ignores the slashes; the error message puts them back on the RHS at least.
   688  		return nil, fmt.Errorf("%q not allowed in import path", importPath[loc[0]:loc[1]]+"//")
   689  	}
   690  	for _, srv := range vcsPaths {
   691  		if !strings.HasPrefix(importPath, srv.prefix) {
   692  			continue
   693  		}
   694  		m := srv.regexp.FindStringSubmatch(importPath)
   695  		if m == nil {
   696  			if srv.prefix != "" {
   697  				return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath)
   698  			}
   699  			continue
   700  		}
   701  
   702  		// Build map of named subexpression matches for expand.
   703  		match := map[string]string{
   704  			"prefix": srv.prefix,
   705  			"import": importPath,
   706  		}
   707  		for i, name := range srv.regexp.SubexpNames() {
   708  			if name != "" && match[name] == "" {
   709  				match[name] = m[i]
   710  			}
   711  		}
   712  		if srv.vcs != "" {
   713  			match["vcs"] = expand(match, srv.vcs)
   714  		}
   715  		if srv.repo != "" {
   716  			match["repo"] = expand(match, srv.repo)
   717  		}
   718  		if srv.check != nil {
   719  			if err := srv.check(match); err != nil {
   720  				return nil, err
   721  			}
   722  		}
   723  		vcs := vcsByCmd(match["vcs"])
   724  		if vcs == nil {
   725  			return nil, fmt.Errorf("unknown version control system %q", match["vcs"])
   726  		}
   727  		if srv.ping {
   728  			if scheme != "" {
   729  				match["repo"] = scheme + "://" + match["repo"]
   730  			} else {
   731  				for _, scheme := range vcs.scheme {
   732  					if security == web.Secure && !vcs.isSecureScheme(scheme) {
   733  						continue
   734  					}
   735  					if vcs.pingCmd != "" && vcs.ping(scheme, match["repo"]) == nil {
   736  						match["repo"] = scheme + "://" + match["repo"]
   737  						goto Found
   738  					}
   739  				}
   740  				// No scheme found. Fall back to the first one.
   741  				match["repo"] = vcs.scheme[0] + "://" + match["repo"]
   742  			Found:
   743  			}
   744  		}
   745  		rr := &RepoRoot{
   746  			Repo: match["repo"],
   747  			Root: match["root"],
   748  			VCS:  vcs.cmd,
   749  			vcs:  vcs,
   750  		}
   751  		return rr, nil
   752  	}
   753  	return nil, errUnknownSite
   754  }
   755  
   756  // repoRootForImportDynamic finds a *RepoRoot for a custom domain that's not
   757  // statically known by repoRootForImportPathStatic.
   758  //
   759  // This handles custom import paths like "name.tld/pkg/foo" or just "name.tld".
   760  func repoRootForImportDynamic(importPath string, mod ModuleMode, security web.SecurityMode) (*RepoRoot, error) {
   761  	slash := strings.Index(importPath, "/")
   762  	if slash < 0 {
   763  		slash = len(importPath)
   764  	}
   765  	host := importPath[:slash]
   766  	if !strings.Contains(host, ".") {
   767  		return nil, errors.New("import path does not begin with hostname")
   768  	}
   769  	urlStr, body, err := web.GetMaybeInsecure(importPath, security)
   770  	if err != nil {
   771  		msg := "https fetch: %v"
   772  		if security == web.Insecure {
   773  			msg = "http/" + msg
   774  		}
   775  		return nil, fmt.Errorf(msg, err)
   776  	}
   777  	defer body.Close()
   778  	imports, err := parseMetaGoImports(body, mod)
   779  	if err != nil {
   780  		return nil, fmt.Errorf("parsing %s: %v", importPath, err)
   781  	}
   782  	// Find the matched meta import.
   783  	mmi, err := matchGoImport(imports, importPath)
   784  	if err != nil {
   785  		if _, ok := err.(ImportMismatchError); !ok {
   786  			return nil, fmt.Errorf("parse %s: %v", urlStr, err)
   787  		}
   788  		return nil, fmt.Errorf("parse %s: no go-import meta tags (%s)", urlStr, err)
   789  	}
   790  	if cfg.BuildV {
   791  		log.Printf("get %q: found meta tag %#v at %s", importPath, mmi, urlStr)
   792  	}
   793  	// If the import was "uni.edu/bob/project", which said the
   794  	// prefix was "uni.edu" and the RepoRoot was "evilroot.com",
   795  	// make sure we don't trust Bob and check out evilroot.com to
   796  	// "uni.edu" yet (possibly overwriting/preempting another
   797  	// non-evil student). Instead, first verify the root and see
   798  	// if it matches Bob's claim.
   799  	if mmi.Prefix != importPath {
   800  		if cfg.BuildV {
   801  			log.Printf("get %q: verifying non-authoritative meta tag", importPath)
   802  		}
   803  		urlStr0 := urlStr
   804  		var imports []metaImport
   805  		urlStr, imports, err = metaImportsForPrefix(mmi.Prefix, mod, security)
   806  		if err != nil {
   807  			return nil, err
   808  		}
   809  		metaImport2, err := matchGoImport(imports, importPath)
   810  		if err != nil || mmi != metaImport2 {
   811  			return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, mmi.Prefix)
   812  		}
   813  	}
   814  
   815  	if err := validateRepoRoot(mmi.RepoRoot); err != nil {
   816  		return nil, fmt.Errorf("%s: invalid repo root %q: %v", urlStr, mmi.RepoRoot, err)
   817  	}
   818  	vcs := vcsByCmd(mmi.VCS)
   819  	if vcs == nil && mmi.VCS != "mod" {
   820  		return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, mmi.VCS)
   821  	}
   822  
   823  	rr := &RepoRoot{
   824  		Repo:     mmi.RepoRoot,
   825  		Root:     mmi.Prefix,
   826  		IsCustom: true,
   827  		VCS:      mmi.VCS,
   828  		vcs:      vcs,
   829  	}
   830  	return rr, nil
   831  }
   832  
   833  // validateRepoRoot returns an error if repoRoot does not seem to be
   834  // a valid URL with scheme.
   835  func validateRepoRoot(repoRoot string) error {
   836  	url, err := url.Parse(repoRoot)
   837  	if err != nil {
   838  		return err
   839  	}
   840  	if url.Scheme == "" {
   841  		return errors.New("no scheme")
   842  	}
   843  	return nil
   844  }
   845  
   846  var fetchGroup singleflight.Group
   847  var (
   848  	fetchCacheMu sync.Mutex
   849  	fetchCache   = map[string]fetchResult{} // key is metaImportsForPrefix's importPrefix
   850  )
   851  
   852  // metaImportsForPrefix takes a package's root import path as declared in a <meta> tag
   853  // and returns its HTML discovery URL and the parsed metaImport lines
   854  // found on the page.
   855  //
   856  // The importPath is of the form "golang.org/x/tools".
   857  // It is an error if no imports are found.
   858  // urlStr will still be valid if err != nil.
   859  // The returned urlStr will be of the form "https://golang.org/x/tools?go-get=1"
   860  func metaImportsForPrefix(importPrefix string, mod ModuleMode, security web.SecurityMode) (urlStr string, imports []metaImport, err error) {
   861  	setCache := func(res fetchResult) (fetchResult, error) {
   862  		fetchCacheMu.Lock()
   863  		defer fetchCacheMu.Unlock()
   864  		fetchCache[importPrefix] = res
   865  		return res, nil
   866  	}
   867  
   868  	resi, _, _ := fetchGroup.Do(importPrefix, func() (resi interface{}, err error) {
   869  		fetchCacheMu.Lock()
   870  		if res, ok := fetchCache[importPrefix]; ok {
   871  			fetchCacheMu.Unlock()
   872  			return res, nil
   873  		}
   874  		fetchCacheMu.Unlock()
   875  
   876  		urlStr, body, err := web.GetMaybeInsecure(importPrefix, security)
   877  		if err != nil {
   878  			return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("fetch %s: %v", urlStr, err)})
   879  		}
   880  		imports, err := parseMetaGoImports(body, mod)
   881  		if err != nil {
   882  			return setCache(fetchResult{urlStr: urlStr, err: fmt.Errorf("parsing %s: %v", urlStr, err)})
   883  		}
   884  		if len(imports) == 0 {
   885  			err = fmt.Errorf("fetch %s: no go-import meta tag", urlStr)
   886  		}
   887  		return setCache(fetchResult{urlStr: urlStr, imports: imports, err: err})
   888  	})
   889  	res := resi.(fetchResult)
   890  	return res.urlStr, res.imports, res.err
   891  }
   892  
   893  type fetchResult struct {
   894  	urlStr  string // e.g. "https://foo.com/x/bar?go-get=1"
   895  	imports []metaImport
   896  	err     error
   897  }
   898  
   899  // metaImport represents the parsed <meta name="go-import"
   900  // content="prefix vcs reporoot" /> tags from HTML files.
   901  type metaImport struct {
   902  	Prefix, VCS, RepoRoot string
   903  }
   904  
   905  // pathPrefix reports whether sub is a prefix of s,
   906  // only considering entire path components.
   907  func pathPrefix(s, sub string) bool {
   908  	// strings.HasPrefix is necessary but not sufficient.
   909  	if !strings.HasPrefix(s, sub) {
   910  		return false
   911  	}
   912  	// The remainder after the prefix must either be empty or start with a slash.
   913  	rem := s[len(sub):]
   914  	return rem == "" || rem[0] == '/'
   915  }
   916  
   917  // A ImportMismatchError is returned where metaImport/s are present
   918  // but none match our import path.
   919  type ImportMismatchError struct {
   920  	importPath string
   921  	mismatches []string // the meta imports that were discarded for not matching our importPath
   922  }
   923  
   924  func (m ImportMismatchError) Error() string {
   925  	formattedStrings := make([]string, len(m.mismatches))
   926  	for i, pre := range m.mismatches {
   927  		formattedStrings[i] = fmt.Sprintf("meta tag %s did not match import path %s", pre, m.importPath)
   928  	}
   929  	return strings.Join(formattedStrings, ", ")
   930  }
   931  
   932  // matchGoImport returns the metaImport from imports matching importPath.
   933  // An error is returned if there are multiple matches.
   934  // errNoMatch is returned if none match.
   935  func matchGoImport(imports []metaImport, importPath string) (metaImport, error) {
   936  	match := -1
   937  
   938  	errImportMismatch := ImportMismatchError{importPath: importPath}
   939  	for i, im := range imports {
   940  		if !pathPrefix(importPath, im.Prefix) {
   941  			errImportMismatch.mismatches = append(errImportMismatch.mismatches, im.Prefix)
   942  			continue
   943  		}
   944  
   945  		if match >= 0 {
   946  			if imports[match].VCS == "mod" && im.VCS != "mod" {
   947  				// All the mod entries precede all the non-mod entries.
   948  				// We have a mod entry and don't care about the rest,
   949  				// matching or not.
   950  				break
   951  			}
   952  			return metaImport{}, fmt.Errorf("multiple meta tags match import path %q", importPath)
   953  		}
   954  		match = i
   955  	}
   956  
   957  	if match == -1 {
   958  		return metaImport{}, errImportMismatch
   959  	}
   960  	return imports[match], nil
   961  }
   962  
   963  // expand rewrites s to replace {k} with match[k] for each key k in match.
   964  func expand(match map[string]string, s string) string {
   965  	// We want to replace each match exactly once, and the result of expansion
   966  	// must not depend on the iteration order through the map.
   967  	// A strings.Replacer has exactly the properties we're looking for.
   968  	oldNew := make([]string, 0, 2*len(match))
   969  	for k, v := range match {
   970  		oldNew = append(oldNew, "{"+k+"}", v)
   971  	}
   972  	return strings.NewReplacer(oldNew...).Replace(s)
   973  }
   974  
   975  // vcsPaths defines the meaning of import paths referring to
   976  // commonly-used VCS hosting sites (github.com/user/dir)
   977  // and import paths referring to a fully-qualified importPath
   978  // containing a VCS type (foo.com/repo.git/dir)
   979  var vcsPaths = []*vcsPath{
   980  	// Github
   981  	{
   982  		prefix: "github.com/",
   983  		re:     `^(?P<root>github\.com/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(/[\p{L}0-9_.\-]+)*$`,
   984  		vcs:    "git",
   985  		repo:   "https://{root}",
   986  		check:  noVCSSuffix,
   987  	},
   988  
   989  	// Bitbucket
   990  	{
   991  		prefix: "bitbucket.org/",
   992  		re:     `^(?P<root>bitbucket\.org/(?P<bitname>[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
   993  		repo:   "https://{root}",
   994  		check:  bitbucketVCS,
   995  	},
   996  
   997  	// IBM DevOps Services (JazzHub)
   998  	{
   999  		prefix: "hub.jazz.net/git/",
  1000  		re:     `^(?P<root>hub\.jazz\.net/git/[a-z0-9]+/[A-Za-z0-9_.\-]+)(/[A-Za-z0-9_.\-]+)*$`,
  1001  		vcs:    "git",
  1002  		repo:   "https://{root}",
  1003  		check:  noVCSSuffix,
  1004  	},
  1005  
  1006  	// Git at Apache
  1007  	{
  1008  		prefix: "git.apache.org/",
  1009  		re:     `^(?P<root>git\.apache\.org/[a-z0-9_.\-]+\.git)(/[A-Za-z0-9_.\-]+)*$`,
  1010  		vcs:    "git",
  1011  		repo:   "https://{root}",
  1012  	},
  1013  
  1014  	// Git at OpenStack
  1015  	{
  1016  		prefix: "git.openstack.org/",
  1017  		re:     `^(?P<root>git\.openstack\.org/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+)(\.git)?(/[A-Za-z0-9_.\-]+)*$`,
  1018  		vcs:    "git",
  1019  		repo:   "https://{root}",
  1020  	},
  1021  
  1022  	// chiselapp.com for fossil
  1023  	{
  1024  		prefix: "chiselapp.com/",
  1025  		re:     `^(?P<root>chiselapp\.com/user/[A-Za-z0-9]+/repository/[A-Za-z0-9_.\-]+)$`,
  1026  		vcs:    "fossil",
  1027  		repo:   "https://{root}",
  1028  	},
  1029  
  1030  	// General syntax for any server.
  1031  	// Must be last.
  1032  	{
  1033  		re:   `^(?P<root>(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(?P<vcs>bzr|fossil|git|hg|svn))(/~?[A-Za-z0-9_.\-]+)*$`,
  1034  		ping: true,
  1035  	},
  1036  }
  1037  
  1038  // vcsPathsAfterDynamic gives additional vcsPaths entries
  1039  // to try after the dynamic HTML check.
  1040  // This gives those sites a chance to introduce <meta> tags
  1041  // as part of a graceful transition away from the hard-coded logic.
  1042  var vcsPathsAfterDynamic = []*vcsPath{
  1043  	// Launchpad. See golang.org/issue/11436.
  1044  	{
  1045  		prefix: "launchpad.net/",
  1046  		re:     `^(?P<root>launchpad\.net/((?P<project>[A-Za-z0-9_.\-]+)(?P<series>/[A-Za-z0-9_.\-]+)?|~[A-Za-z0-9_.\-]+/(\+junk|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+))(/[A-Za-z0-9_.\-]+)*$`,
  1047  		vcs:    "bzr",
  1048  		repo:   "https://{root}",
  1049  		check:  launchpadVCS,
  1050  	},
  1051  }
  1052  
  1053  func init() {
  1054  	// fill in cached regexps.
  1055  	// Doing this eagerly discovers invalid regexp syntax
  1056  	// without having to run a command that needs that regexp.
  1057  	for _, srv := range vcsPaths {
  1058  		srv.regexp = regexp.MustCompile(srv.re)
  1059  	}
  1060  	for _, srv := range vcsPathsAfterDynamic {
  1061  		srv.regexp = regexp.MustCompile(srv.re)
  1062  	}
  1063  }
  1064  
  1065  // noVCSSuffix checks that the repository name does not
  1066  // end in .foo for any version control system foo.
  1067  // The usual culprit is ".git".
  1068  func noVCSSuffix(match map[string]string) error {
  1069  	repo := match["repo"]
  1070  	for _, vcs := range vcsList {
  1071  		if strings.HasSuffix(repo, "."+vcs.cmd) {
  1072  			return fmt.Errorf("invalid version control suffix in %s path", match["prefix"])
  1073  		}
  1074  	}
  1075  	return nil
  1076  }
  1077  
  1078  // bitbucketVCS determines the version control system for a
  1079  // Bitbucket repository, by using the Bitbucket API.
  1080  func bitbucketVCS(match map[string]string) error {
  1081  	if err := noVCSSuffix(match); err != nil {
  1082  		return err
  1083  	}
  1084  
  1085  	var resp struct {
  1086  		SCM string `json:"scm"`
  1087  	}
  1088  	url := expand(match, "https://api.bitbucket.org/2.0/repositories/{bitname}?fields=scm")
  1089  	data, err := web.Get(url)
  1090  	if err != nil {
  1091  		if httpErr, ok := err.(*web.HTTPError); ok && httpErr.StatusCode == 403 {
  1092  			// this may be a private repository. If so, attempt to determine which
  1093  			// VCS it uses. See issue 5375.
  1094  			root := match["root"]
  1095  			for _, vcs := range []string{"git", "hg"} {
  1096  				if vcsByCmd(vcs).ping("https", root) == nil {
  1097  					resp.SCM = vcs
  1098  					break
  1099  				}
  1100  			}
  1101  		}
  1102  
  1103  		if resp.SCM == "" {
  1104  			return err
  1105  		}
  1106  	} else {
  1107  		if err := json.Unmarshal(data, &resp); err != nil {
  1108  			return fmt.Errorf("decoding %s: %v", url, err)
  1109  		}
  1110  	}
  1111  
  1112  	if vcsByCmd(resp.SCM) != nil {
  1113  		match["vcs"] = resp.SCM
  1114  		if resp.SCM == "git" {
  1115  			match["repo"] += ".git"
  1116  		}
  1117  		return nil
  1118  	}
  1119  
  1120  	return fmt.Errorf("unable to detect version control system for bitbucket.org/ path")
  1121  }
  1122  
  1123  // launchpadVCS solves the ambiguity for "lp.net/project/foo". In this case,
  1124  // "foo" could be a series name registered in Launchpad with its own branch,
  1125  // and it could also be the name of a directory within the main project
  1126  // branch one level up.
  1127  func launchpadVCS(match map[string]string) error {
  1128  	if match["project"] == "" || match["series"] == "" {
  1129  		return nil
  1130  	}
  1131  	_, err := web.Get(expand(match, "https://code.launchpad.net/{project}{series}/.bzr/branch-format"))
  1132  	if err != nil {
  1133  		match["root"] = expand(match, "launchpad.net/{project}")
  1134  		match["repo"] = expand(match, "https://{root}")
  1135  	}
  1136  	return nil
  1137  }