github.com/golang/dep@v0.5.4/gps/deduce.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gps
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"net/http"
    13  	"net/url"
    14  	"os"
    15  	"path"
    16  	"path/filepath"
    17  	"regexp"
    18  	"runtime"
    19  	"strconv"
    20  	"strings"
    21  	"sync"
    22  
    23  	radix "github.com/armon/go-radix"
    24  	"github.com/pkg/errors"
    25  )
    26  
    27  var (
    28  	gitSchemes     = []string{"https", "ssh", "git", "http"}
    29  	bzrSchemes     = []string{"https", "bzr+ssh", "bzr", "http"}
    30  	hgSchemes      = []string{"https", "ssh", "http"}
    31  	svnSchemes     = []string{"https", "http", "svn", "svn+ssh"}
    32  	gopkginSchemes = []string{"https", "http"}
    33  	netrc          []netrcLine
    34  	readNetrcOnce  sync.Once
    35  )
    36  
    37  const gopkgUnstableSuffix = "-unstable"
    38  
    39  func validateVCSScheme(scheme, typ string) bool {
    40  	// everything allows plain ssh
    41  	if scheme == "ssh" {
    42  		return true
    43  	}
    44  
    45  	var schemes []string
    46  	switch typ {
    47  	case "git":
    48  		schemes = gitSchemes
    49  	case "bzr":
    50  		schemes = bzrSchemes
    51  	case "hg":
    52  		schemes = hgSchemes
    53  	case "svn":
    54  		schemes = svnSchemes
    55  	default:
    56  		panic(fmt.Sprint("unsupported vcs type", scheme))
    57  	}
    58  
    59  	for _, valid := range schemes {
    60  		if scheme == valid {
    61  			return true
    62  		}
    63  	}
    64  	return false
    65  }
    66  
    67  // Regexes for the different known import path flavors
    68  var (
    69  	// This regex allows some usernames that github currently disallows. They
    70  	// have allowed them in the past.
    71  	ghRegex      = regexp.MustCompile(`^(?P<root>github\.com(/[A-Za-z0-9][-A-Za-z0-9]*/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
    72  	gpinNewRegex = regexp.MustCompile(`^(?P<root>gopkg\.in(?:(/[a-zA-Z0-9][-a-zA-Z0-9]+)?)(/[a-zA-Z][-.a-zA-Z0-9]*)\.((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(?:-unstable)?)(?:\.git)?)((?:/[a-zA-Z0-9][-.a-zA-Z0-9]*)*)$`)
    73  	//gpinOldRegex = regexp.MustCompile(`^(?P<root>gopkg\.in/(?:([a-z0-9][-a-z0-9]+)/)?((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)/([a-zA-Z][-a-zA-Z0-9]*)(?:\.git)?)((?:/[a-zA-Z][-a-zA-Z0-9]*)*)$`)
    74  	bbRegex = regexp.MustCompile(`^(?P<root>bitbucket\.org(?P<bitname>/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
    75  	//lpRegex = regexp.MustCompile(`^(?P<root>launchpad\.net/([A-Za-z0-9-._]+)(/[A-Za-z0-9-._]+)?)(/.+)?`)
    76  	lpRegex = regexp.MustCompile(`^(?P<root>launchpad\.net(/[A-Za-z0-9-._]+))((?:/[A-Za-z0-9_.\-]+)*)?$`)
    77  	//glpRegex = regexp.MustCompile(`^(?P<root>git\.launchpad\.net/([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+)$`)
    78  	glpRegex = regexp.MustCompile(`^(?P<root>git\.launchpad\.net(/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
    79  	//gcRegex      = regexp.MustCompile(`^(?P<root>code\.google\.com/[pr]/(?P<project>[a-z0-9\-]+)(\.(?P<subrepo>[a-z0-9\-]+))?)(/[A-Za-z0-9_.\-]+)*$`)
    80  	jazzRegex         = regexp.MustCompile(`^(?P<root>hub\.jazz\.net(/git/[a-z0-9]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`)
    81  	apacheRegex       = regexp.MustCompile(`^(?P<root>git\.apache\.org(/[a-z0-9_.\-]+\.git))((?:/[A-Za-z0-9_.\-]+)*)$`)
    82  	vcsExtensionRegex = regexp.MustCompile(`^(?P<root>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?/[A-Za-z0-9_.\-/~]*?\.(?P<vcs>bzr|git|hg|svn))((?:/[A-Za-z0-9_.\-]+)*)$`)
    83  )
    84  
    85  // Other helper regexes
    86  var (
    87  	scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`)
    88  	pathvld     = regexp.MustCompile(`^([A-Za-z0-9-]+)(\.[A-Za-z0-9-]+)+(/[A-Za-z0-9-_.~]+)*$`)
    89  )
    90  
    91  func pathDeducerTrie() *deducerTrie {
    92  	dxt := newDeducerTrie()
    93  
    94  	dxt.Insert("github.com/", githubDeducer{regexp: ghRegex})
    95  	dxt.Insert("gopkg.in/", gopkginDeducer{regexp: gpinNewRegex})
    96  	dxt.Insert("bitbucket.org/", bitbucketDeducer{regexp: bbRegex})
    97  	dxt.Insert("launchpad.net/", launchpadDeducer{regexp: lpRegex})
    98  	dxt.Insert("git.launchpad.net/", launchpadGitDeducer{regexp: glpRegex})
    99  	dxt.Insert("hub.jazz.net/", jazzDeducer{regexp: jazzRegex})
   100  	dxt.Insert("git.apache.org/", apacheDeducer{regexp: apacheRegex})
   101  
   102  	return dxt
   103  }
   104  
   105  type pathDeducer interface {
   106  	// deduceRoot takes an import path such as
   107  	// "github.com/some-user/some-package/some-subpackage"
   108  	// and returns the root folder to where the version control
   109  	// system exists. For example, the root folder where .git exists.
   110  	// So the return of the above string would be
   111  	// "github.com/some-user/some-package"
   112  	deduceRoot(string) (string, error)
   113  	deduceSource(string, *url.URL) (maybeSources, error)
   114  }
   115  
   116  type githubDeducer struct {
   117  	regexp *regexp.Regexp
   118  }
   119  
   120  func (m githubDeducer) deduceRoot(path string) (string, error) {
   121  	v := m.regexp.FindStringSubmatch(path)
   122  	if v == nil {
   123  		return "", fmt.Errorf("%s is not a valid path for a source on github.com", path)
   124  	}
   125  
   126  	return "github.com" + v[2], nil
   127  }
   128  
   129  func (m githubDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   130  	v := m.regexp.FindStringSubmatch(path)
   131  	if v == nil {
   132  		return nil, fmt.Errorf("%s is not a valid path for a source on github.com", path)
   133  	}
   134  
   135  	u.Host = "github.com"
   136  	u.Path = v[2]
   137  
   138  	if u.Scheme == "ssh" && u.User != nil && u.User.Username() != "git" {
   139  		return nil, fmt.Errorf("github ssh must be accessed via the 'git' user; %s was provided", u.User.Username())
   140  	} else if u.Scheme != "" {
   141  		if !validateVCSScheme(u.Scheme, "git") {
   142  			return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme)
   143  		}
   144  		if u.Scheme == "ssh" {
   145  			u.User = url.User("git")
   146  		}
   147  		return maybeSources{maybeGitSource{url: u}}, nil
   148  	}
   149  
   150  	mb := make(maybeSources, len(gitSchemes))
   151  	for k, scheme := range gitSchemes {
   152  		u2 := *u
   153  		if scheme == "ssh" {
   154  			u2.User = url.User("git")
   155  		}
   156  		u2.Scheme = scheme
   157  		mb[k] = maybeGitSource{url: &u2}
   158  	}
   159  
   160  	return mb, nil
   161  }
   162  
   163  type bitbucketDeducer struct {
   164  	regexp *regexp.Regexp
   165  }
   166  
   167  func (m bitbucketDeducer) deduceRoot(path string) (string, error) {
   168  	v := m.regexp.FindStringSubmatch(path)
   169  	if v == nil {
   170  		return "", fmt.Errorf("%s is not a valid path for a source on bitbucket.org", path)
   171  	}
   172  
   173  	return "bitbucket.org" + v[2], nil
   174  }
   175  
   176  func (m bitbucketDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   177  	v := m.regexp.FindStringSubmatch(path)
   178  	if v == nil {
   179  		return nil, fmt.Errorf("%s is not a valid path for a source on bitbucket.org", path)
   180  	}
   181  
   182  	u.Host = "bitbucket.org"
   183  	u.Path = v[2]
   184  
   185  	// This isn't definitive, but it'll probably catch most
   186  	isgit := strings.HasSuffix(u.Path, ".git") || (u.User != nil && u.User.Username() == "git")
   187  	ishg := strings.HasSuffix(u.Path, ".hg") || (u.User != nil && u.User.Username() == "hg")
   188  
   189  	// TODO(sdboyer) resolve scm ambiguity if needed by querying bitbucket's REST API
   190  	if u.Scheme != "" {
   191  		validgit, validhg := validateVCSScheme(u.Scheme, "git"), validateVCSScheme(u.Scheme, "hg")
   192  		if isgit {
   193  			if !validgit {
   194  				// This is unreachable for now, as the git schemes are a
   195  				// superset of the hg schemes
   196  				return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme)
   197  			}
   198  			return maybeSources{maybeGitSource{url: u}}, nil
   199  		} else if ishg {
   200  			if !validhg {
   201  				return nil, fmt.Errorf("%s is not a valid scheme for accessing an hg repository", u.Scheme)
   202  			}
   203  			return maybeSources{maybeHgSource{url: u}}, nil
   204  		} else if !validgit && !validhg {
   205  			return nil, fmt.Errorf("%s is not a valid scheme for accessing either a git or hg repository", u.Scheme)
   206  		}
   207  
   208  		// No other choice, make an option for both git and hg
   209  		return maybeSources{
   210  			maybeHgSource{url: u},
   211  			maybeGitSource{url: u},
   212  		}, nil
   213  	}
   214  
   215  	mb := make(maybeSources, 0)
   216  	// git is probably more common, even on bitbucket. however, bitbucket
   217  	// appears to fail _extremely_ slowly on git pings (ls-remote) when the
   218  	// underlying repository is actually an hg repository, so it's better
   219  	// to try hg first.
   220  	if !isgit {
   221  		for _, scheme := range hgSchemes {
   222  			u2 := *u
   223  			if scheme == "ssh" {
   224  				u2.User = url.User("hg")
   225  			}
   226  			u2.Scheme = scheme
   227  			mb = append(mb, maybeHgSource{url: &u2})
   228  		}
   229  	}
   230  
   231  	if !ishg {
   232  		for _, scheme := range gitSchemes {
   233  			u2 := *u
   234  			if scheme == "ssh" {
   235  				u2.User = url.User("git")
   236  			}
   237  			u2.Scheme = scheme
   238  			mb = append(mb, maybeGitSource{url: &u2})
   239  		}
   240  	}
   241  
   242  	return mb, nil
   243  }
   244  
   245  type gopkginDeducer struct {
   246  	regexp *regexp.Regexp
   247  }
   248  
   249  func (m gopkginDeducer) deduceRoot(p string) (string, error) {
   250  	v, err := m.parseAndValidatePath(p)
   251  	if err != nil {
   252  		return "", err
   253  	}
   254  
   255  	return v[1], nil
   256  }
   257  
   258  func (m gopkginDeducer) parseAndValidatePath(p string) ([]string, error) {
   259  	v := m.regexp.FindStringSubmatch(p)
   260  	if v == nil {
   261  		return nil, fmt.Errorf("%s is not a valid path for a source on gopkg.in", p)
   262  	}
   263  
   264  	// We duplicate some logic from the gopkg.in server in order to validate the
   265  	// import path string without having to make a network request
   266  	if strings.Contains(v[4], ".") {
   267  		return nil, fmt.Errorf("%s is not a valid import path; gopkg.in only allows major versions (%q instead of %q)",
   268  			p, v[4][:strings.Index(v[4], ".")], v[4])
   269  	}
   270  
   271  	return v, nil
   272  }
   273  
   274  func (m gopkginDeducer) deduceSource(p string, u *url.URL) (maybeSources, error) {
   275  	// Reuse root detection logic for initial validation
   276  	v, err := m.parseAndValidatePath(p)
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  
   281  	// Putting a scheme on gopkg.in would be really weird, disallow it
   282  	if u.Scheme != "" {
   283  		return nil, fmt.Errorf("specifying alternate schemes on gopkg.in imports is not permitted")
   284  	}
   285  
   286  	// gopkg.in is always backed by github
   287  	u.Host = "github.com"
   288  	if v[2] == "" {
   289  		elem := v[3][1:]
   290  		u.Path = path.Join("/go-"+elem, elem)
   291  	} else {
   292  		u.Path = path.Join(v[2], v[3])
   293  	}
   294  
   295  	unstable := false
   296  	majorStr := v[4]
   297  
   298  	if strings.HasSuffix(majorStr, gopkgUnstableSuffix) {
   299  		unstable = true
   300  		majorStr = strings.TrimSuffix(majorStr, gopkgUnstableSuffix)
   301  	}
   302  	major, err := strconv.ParseUint(majorStr[1:], 10, 64)
   303  	if err != nil {
   304  		// this should only be reachable if there's an error in the regex
   305  		return nil, fmt.Errorf("could not parse %q as a gopkg.in major version", majorStr[1:])
   306  	}
   307  
   308  	mb := make(maybeSources, len(gopkginSchemes))
   309  	for k, scheme := range gopkginSchemes {
   310  		u2 := *u
   311  		u2.Scheme = scheme
   312  		mb[k] = maybeGopkginSource{
   313  			opath:    v[1],
   314  			url:      &u2,
   315  			major:    major,
   316  			unstable: unstable,
   317  		}
   318  	}
   319  
   320  	return mb, nil
   321  }
   322  
   323  type launchpadDeducer struct {
   324  	regexp *regexp.Regexp
   325  }
   326  
   327  func (m launchpadDeducer) deduceRoot(path string) (string, error) {
   328  	// TODO(sdboyer) lp handling is nasty - there's ambiguities which can only really
   329  	// be resolved with a metadata request. See https://github.com/golang/go/issues/11436
   330  	v := m.regexp.FindStringSubmatch(path)
   331  	if v == nil {
   332  		return "", fmt.Errorf("%s is not a valid path for a source on launchpad.net", path)
   333  	}
   334  
   335  	return "launchpad.net" + v[2], nil
   336  }
   337  
   338  func (m launchpadDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   339  	v := m.regexp.FindStringSubmatch(path)
   340  	if v == nil {
   341  		return nil, fmt.Errorf("%s is not a valid path for a source on launchpad.net", path)
   342  	}
   343  
   344  	u.Host = "launchpad.net"
   345  	u.Path = v[2]
   346  
   347  	if u.Scheme != "" {
   348  		if !validateVCSScheme(u.Scheme, "bzr") {
   349  			return nil, fmt.Errorf("%s is not a valid scheme for accessing a bzr repository", u.Scheme)
   350  		}
   351  		return maybeSources{maybeBzrSource{url: u}}, nil
   352  	}
   353  
   354  	mb := make(maybeSources, len(bzrSchemes))
   355  	for k, scheme := range bzrSchemes {
   356  		u2 := *u
   357  		u2.Scheme = scheme
   358  		mb[k] = maybeBzrSource{url: &u2}
   359  	}
   360  
   361  	return mb, nil
   362  }
   363  
   364  type launchpadGitDeducer struct {
   365  	regexp *regexp.Regexp
   366  }
   367  
   368  func (m launchpadGitDeducer) deduceRoot(path string) (string, error) {
   369  	// TODO(sdboyer) same ambiguity issues as with normal bzr lp
   370  	v := m.regexp.FindStringSubmatch(path)
   371  	if v == nil {
   372  		return "", fmt.Errorf("%s is not a valid path for a source on git.launchpad.net", path)
   373  	}
   374  
   375  	return "git.launchpad.net" + v[2], nil
   376  }
   377  
   378  func (m launchpadGitDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   379  	v := m.regexp.FindStringSubmatch(path)
   380  	if v == nil {
   381  		return nil, fmt.Errorf("%s is not a valid path for a source on git.launchpad.net", path)
   382  	}
   383  
   384  	u.Host = "git.launchpad.net"
   385  	u.Path = v[2]
   386  
   387  	if u.Scheme != "" {
   388  		if !validateVCSScheme(u.Scheme, "git") {
   389  			return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme)
   390  		}
   391  		return maybeSources{maybeGitSource{url: u}}, nil
   392  	}
   393  
   394  	mb := make(maybeSources, len(gitSchemes))
   395  	for k, scheme := range gitSchemes {
   396  		u2 := *u
   397  		u2.Scheme = scheme
   398  		mb[k] = maybeGitSource{url: &u2}
   399  	}
   400  
   401  	return mb, nil
   402  }
   403  
   404  type jazzDeducer struct {
   405  	regexp *regexp.Regexp
   406  }
   407  
   408  func (m jazzDeducer) deduceRoot(path string) (string, error) {
   409  	v := m.regexp.FindStringSubmatch(path)
   410  	if v == nil {
   411  		return "", fmt.Errorf("%s is not a valid path for a source on hub.jazz.net", path)
   412  	}
   413  
   414  	return "hub.jazz.net" + v[2], nil
   415  }
   416  
   417  func (m jazzDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   418  	v := m.regexp.FindStringSubmatch(path)
   419  	if v == nil {
   420  		return nil, fmt.Errorf("%s is not a valid path for a source on hub.jazz.net", path)
   421  	}
   422  
   423  	u.Host = "hub.jazz.net"
   424  	u.Path = v[2]
   425  
   426  	switch u.Scheme {
   427  	case "":
   428  		u.Scheme = "https"
   429  		fallthrough
   430  	case "https":
   431  		return maybeSources{maybeGitSource{url: u}}, nil
   432  	default:
   433  		return nil, fmt.Errorf("IBM's jazz hub only supports https, %s is not allowed", u.String())
   434  	}
   435  }
   436  
   437  type apacheDeducer struct {
   438  	regexp *regexp.Regexp
   439  }
   440  
   441  func (m apacheDeducer) deduceRoot(path string) (string, error) {
   442  	v := m.regexp.FindStringSubmatch(path)
   443  	if v == nil {
   444  		return "", fmt.Errorf("%s is not a valid path for a source on git.apache.org", path)
   445  	}
   446  
   447  	return "git.apache.org" + v[2], nil
   448  }
   449  
   450  func (m apacheDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   451  	v := m.regexp.FindStringSubmatch(path)
   452  	if v == nil {
   453  		return nil, fmt.Errorf("%s is not a valid path for a source on git.apache.org", path)
   454  	}
   455  
   456  	u.Host = "git.apache.org"
   457  	u.Path = v[2]
   458  
   459  	if u.Scheme != "" {
   460  		if !validateVCSScheme(u.Scheme, "git") {
   461  			return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme)
   462  		}
   463  		return maybeSources{maybeGitSource{url: u}}, nil
   464  	}
   465  
   466  	mb := make(maybeSources, len(gitSchemes))
   467  	for k, scheme := range gitSchemes {
   468  		u2 := *u
   469  		u2.Scheme = scheme
   470  		mb[k] = maybeGitSource{url: &u2}
   471  	}
   472  
   473  	return mb, nil
   474  }
   475  
   476  type vcsExtensionDeducer struct {
   477  	regexp *regexp.Regexp
   478  }
   479  
   480  func (m vcsExtensionDeducer) deduceRoot(path string) (string, error) {
   481  	v := m.regexp.FindStringSubmatch(path)
   482  	if v == nil {
   483  		return "", fmt.Errorf("%s contains no vcs extension hints for matching", path)
   484  	}
   485  
   486  	return v[1], nil
   487  }
   488  
   489  func (m vcsExtensionDeducer) deduceSource(path string, u *url.URL) (maybeSources, error) {
   490  	v := m.regexp.FindStringSubmatch(path)
   491  	if v == nil {
   492  		return nil, fmt.Errorf("%s contains no vcs extension hints for matching", path)
   493  	}
   494  
   495  	switch v[4] {
   496  	case "git", "hg", "bzr":
   497  		x := strings.SplitN(v[1], "/", 2)
   498  		// TODO(sdboyer) is this actually correct for bzr?
   499  		u.Host = x[0]
   500  		u.Path = "/" + x[1]
   501  
   502  		if u.Scheme != "" {
   503  			if !validateVCSScheme(u.Scheme, v[4]) {
   504  				return nil, fmt.Errorf("%s is not a valid scheme for accessing %s repositories (path %s)", u.Scheme, v[4], path)
   505  			}
   506  
   507  			switch v[4] {
   508  			case "git":
   509  				return maybeSources{maybeGitSource{url: u}}, nil
   510  			case "bzr":
   511  				return maybeSources{maybeBzrSource{url: u}}, nil
   512  			case "hg":
   513  				return maybeSources{maybeHgSource{url: u}}, nil
   514  			}
   515  		}
   516  
   517  		var schemes []string
   518  		var mb maybeSources
   519  		var f func(k int, u *url.URL)
   520  
   521  		switch v[4] {
   522  		case "git":
   523  			schemes = gitSchemes
   524  			f = func(k int, u *url.URL) {
   525  				mb[k] = maybeGitSource{url: u}
   526  			}
   527  		case "bzr":
   528  			schemes = bzrSchemes
   529  			f = func(k int, u *url.URL) {
   530  				mb[k] = maybeBzrSource{url: u}
   531  			}
   532  		case "hg":
   533  			schemes = hgSchemes
   534  			f = func(k int, u *url.URL) {
   535  				mb[k] = maybeHgSource{url: u}
   536  			}
   537  		}
   538  
   539  		mb = make(maybeSources, len(schemes))
   540  		for k, scheme := range schemes {
   541  			u2 := *u
   542  			u2.Scheme = scheme
   543  			f(k, &u2)
   544  		}
   545  
   546  		return mb, nil
   547  	default:
   548  		return nil, fmt.Errorf("unknown repository type: %q", v[4])
   549  	}
   550  }
   551  
   552  // A deducer takes an import path and inspects it to determine where the
   553  // corresponding project root should be. It applies a number of matching
   554  // techniques, eventually falling back to an HTTP request for go-get metadata if
   555  // none of the explicit rules succeed.
   556  //
   557  // The only real implementation is deductionCoordinator. The interface is
   558  // primarily intended for testing purposes.
   559  type deducer interface {
   560  	deduceRootPath(ctx context.Context, path string) (pathDeduction, error)
   561  }
   562  
   563  type deductionCoordinator struct {
   564  	suprvsr  *supervisor
   565  	mut      sync.RWMutex
   566  	rootxt   *radix.Tree
   567  	deducext *deducerTrie
   568  }
   569  
   570  func newDeductionCoordinator(superv *supervisor) *deductionCoordinator {
   571  	dc := &deductionCoordinator{
   572  		suprvsr:  superv,
   573  		rootxt:   radix.New(),
   574  		deducext: pathDeducerTrie(),
   575  	}
   576  
   577  	return dc
   578  }
   579  
   580  // deduceRootPath takes an import path and attempts to deduce various
   581  // metadata about it - what type of source should handle it, and where its
   582  // "root" is (for vcs repositories, the repository root).
   583  //
   584  // If no errors are encountered, the returned pathDeduction will contain both
   585  // the root path and a list of maybeSources, which can be subsequently used to
   586  // create a handler that will manage the particular source.
   587  func (dc *deductionCoordinator) deduceRootPath(ctx context.Context, path string) (pathDeduction, error) {
   588  	if err := dc.suprvsr.ctx.Err(); err != nil {
   589  		return pathDeduction{}, err
   590  	}
   591  
   592  	// First, check the rootxt to see if there's a prefix match - if so, we
   593  	// can return that and move on.
   594  	dc.mut.RLock()
   595  	prefix, data, has := dc.rootxt.LongestPrefix(path)
   596  	dc.mut.RUnlock()
   597  	if has && isPathPrefixOrEqual(prefix, path) {
   598  		switch d := data.(type) {
   599  		case maybeSources:
   600  			return pathDeduction{root: prefix, mb: d}, nil
   601  		case *httpMetadataDeducer:
   602  			// Multiple calls have come in for a similar path shape during
   603  			// the window in which the HTTP request to retrieve go get
   604  			// metadata is in flight. Fold this request in with the existing
   605  			// one(s) by calling the deduction method, which will avoid
   606  			// duplication of work through a sync.Once.
   607  			return d.deduce(ctx, path)
   608  		}
   609  
   610  		panic(fmt.Sprintf("unexpected %T in deductionCoordinator.rootxt: %v", data, data))
   611  	}
   612  
   613  	// No match. Try known path deduction first.
   614  	pd, err := dc.deduceKnownPaths(path)
   615  	if err == nil {
   616  		// Deduction worked; store it in the rootxt, send on retchan and
   617  		// terminate.
   618  		// FIXME(sdboyer) deal with changing path vs. root. Probably needs
   619  		// to be predeclared and reused in the hmd returnFunc
   620  		dc.mut.Lock()
   621  		dc.rootxt.Insert(pd.root, pd.mb)
   622  		dc.mut.Unlock()
   623  		return pd, nil
   624  	}
   625  
   626  	if err != errNoKnownPathMatch {
   627  		return pathDeduction{}, err
   628  	}
   629  
   630  	// The err indicates no known path matched. It's still possible that
   631  	// retrieving go get metadata might do the trick.
   632  	hmd := &httpMetadataDeducer{
   633  		basePath: path,
   634  		suprvsr:  dc.suprvsr,
   635  		// The vanity deducer will call this func with a completed
   636  		// pathDeduction if it succeeds in finding one. We process it
   637  		// back through the action channel to ensure serialized
   638  		// access to the rootxt map.
   639  		returnFunc: func(pd pathDeduction) {
   640  			dc.mut.Lock()
   641  			dc.rootxt.Insert(pd.root, pd.mb)
   642  			dc.mut.Unlock()
   643  		},
   644  	}
   645  
   646  	// Save the hmd in the rootxt so that calls checking on similar
   647  	// paths made while the request is in flight can be folded together.
   648  	dc.mut.Lock()
   649  	dc.rootxt.Insert(path, hmd)
   650  	dc.mut.Unlock()
   651  
   652  	// Trigger the HTTP-backed deduction process for this requestor.
   653  	return hmd.deduce(ctx, path)
   654  }
   655  
   656  // pathDeduction represents the results of a successful import path deduction -
   657  // a root path, plus a maybeSource that can be used to attempt to connect to
   658  // the source.
   659  type pathDeduction struct {
   660  	root string
   661  	mb   maybeSources
   662  }
   663  
   664  var errNoKnownPathMatch = errors.New("no known path match")
   665  
   666  func (dc *deductionCoordinator) deduceKnownPaths(path string) (pathDeduction, error) {
   667  	u, path, err := normalizeURI(path)
   668  	if err != nil {
   669  		return pathDeduction{}, err
   670  	}
   671  
   672  	// First, try the root path-based matches
   673  	if _, mtch, has := dc.deducext.LongestPrefix(path); has {
   674  		root, err := mtch.deduceRoot(path)
   675  		if err != nil {
   676  			return pathDeduction{}, err
   677  		}
   678  		mb, err := mtch.deduceSource(path, u)
   679  		if err != nil {
   680  			return pathDeduction{}, err
   681  		}
   682  
   683  		return pathDeduction{
   684  			root: root,
   685  			mb:   mb,
   686  		}, nil
   687  	}
   688  
   689  	// Next, try the vcs extension-based (infix) matcher
   690  	exm := vcsExtensionDeducer{regexp: vcsExtensionRegex}
   691  	if root, err := exm.deduceRoot(path); err == nil {
   692  		mb, err := exm.deduceSource(path, u)
   693  		if err != nil {
   694  			return pathDeduction{}, err
   695  		}
   696  
   697  		return pathDeduction{
   698  			root: root,
   699  			mb:   mb,
   700  		}, nil
   701  	}
   702  
   703  	return pathDeduction{}, errNoKnownPathMatch
   704  }
   705  
   706  type httpMetadataDeducer struct {
   707  	once       sync.Once
   708  	deduced    pathDeduction
   709  	deduceErr  error
   710  	basePath   string
   711  	returnFunc func(pathDeduction)
   712  	suprvsr    *supervisor
   713  }
   714  
   715  func (hmd *httpMetadataDeducer) deduce(ctx context.Context, path string) (pathDeduction, error) {
   716  	hmd.once.Do(func() {
   717  		opath := path
   718  		u, path, err := normalizeURI(path)
   719  		if err != nil {
   720  			err = errors.Wrapf(err, "unable to normalize URI")
   721  			hmd.deduceErr = err
   722  			return
   723  		}
   724  
   725  		pd := pathDeduction{}
   726  
   727  		// Make the HTTP call to attempt to retrieve go-get metadata
   728  		var root, vcs, reporoot string
   729  		err = hmd.suprvsr.do(ctx, path, ctHTTPMetadata, func(ctx context.Context) error {
   730  			root, vcs, reporoot, err = getMetadata(ctx, path, u.Scheme)
   731  			if err != nil {
   732  				err = errors.Wrapf(err, "unable to read metadata")
   733  			}
   734  			return err
   735  		})
   736  		if err != nil {
   737  			err = errors.Wrapf(err, "unable to deduce repository and source type for %q", opath)
   738  			hmd.deduceErr = err
   739  			return
   740  		}
   741  		pd.root = root
   742  
   743  		// If we got something back at all, then it supersedes the actual input for
   744  		// the real URL to hit
   745  		repoURL, err := url.Parse(reporoot)
   746  		if err != nil {
   747  			err = errors.Wrapf(err, "server returned bad URL in go-get metadata, reporoot=%q", reporoot)
   748  			hmd.deduceErr = err
   749  			return
   750  		}
   751  
   752  		// If the input path specified a scheme, then try to honor it.
   753  		if u.Scheme != "" && repoURL.Scheme != u.Scheme {
   754  			// If the input scheme was http, but the go-get metadata
   755  			// nevertheless indicated https should be used for the repo, then
   756  			// trust the metadata and use https.
   757  			//
   758  			// To err on the secure side, do NOT allow the same in the other
   759  			// direction (https -> http).
   760  			if u.Scheme != "http" || repoURL.Scheme != "https" {
   761  				hmd.deduceErr = errors.Errorf("scheme mismatch for %q: input asked for %q, but go-get metadata specified %q", path, u.Scheme, repoURL.Scheme)
   762  				return
   763  			}
   764  		}
   765  
   766  		switch vcs {
   767  		case "git":
   768  			pd.mb = maybeSources{maybeGitSource{url: repoURL}}
   769  		case "bzr":
   770  			pd.mb = maybeSources{maybeBzrSource{url: repoURL}}
   771  		case "hg":
   772  			pd.mb = maybeSources{maybeHgSource{url: repoURL}}
   773  		default:
   774  			hmd.deduceErr = errors.Errorf("unsupported vcs type %s in go-get metadata from %s", vcs, path)
   775  			return
   776  		}
   777  
   778  		hmd.deduced = pd
   779  		// All data is assigned for other goroutines that may be waiting. Now,
   780  		// send the pathDeduction back to the deductionCoordinator by calling
   781  		// the returnFunc. This will also remove the reference to this hmd in
   782  		// the coordinator's trie.
   783  		//
   784  		// When this call finishes, it is guaranteed the coordinator will have
   785  		// at least begun running the action to insert the path deduction, which
   786  		// means no other deduction request will be able to interleave and
   787  		// request the same path before the pathDeduction can be processed, but
   788  		// after this hmd has been dereferenced from the trie.
   789  		hmd.returnFunc(pd)
   790  	})
   791  
   792  	return hmd.deduced, hmd.deduceErr
   793  }
   794  
   795  // normalizeURI takes a path string - which can be a plain import path, or a
   796  // proper URI, or something SCP-shaped - performs basic validity checks, and
   797  // returns both a full URL and just the path portion.
   798  func normalizeURI(p string) (*url.URL, string, error) {
   799  	var u *url.URL
   800  	var newpath string
   801  	if m := scpSyntaxRe.FindStringSubmatch(p); m != nil {
   802  		// Match SCP-like syntax and convert it to a URL.
   803  		// Eg, "git@github.com:user/repo" becomes
   804  		// "ssh://git@github.com/user/repo".
   805  		u = &url.URL{
   806  			Scheme: "ssh",
   807  			User:   url.User(m[1]),
   808  			Host:   m[2],
   809  			Path:   "/" + m[3],
   810  			// TODO(sdboyer) This is what stdlib sets; grok why better
   811  			//RawPath: m[3],
   812  		}
   813  	} else {
   814  		var err error
   815  		u, err = url.Parse(p)
   816  		if err != nil {
   817  			return nil, "", errors.Errorf("%q is not a valid URI", p)
   818  		}
   819  	}
   820  
   821  	// If no scheme was passed, then the entire path will have been put into
   822  	// u.Path. Either way, construct the normalized path correctly.
   823  	if u.Host == "" {
   824  		newpath = p
   825  	} else {
   826  		newpath = path.Join(u.Host, u.Path)
   827  	}
   828  
   829  	return u, newpath, nil
   830  }
   831  
   832  // fetchMetadata fetches the remote metadata for path.
   833  func fetchMetadata(ctx context.Context, path, scheme string) (rc io.ReadCloser, err error) {
   834  	if scheme == "http" {
   835  		rc, err = doFetchMetadata(ctx, "http", path)
   836  		return
   837  	}
   838  
   839  	rc, err = doFetchMetadata(ctx, "https", path)
   840  	if err == nil {
   841  		return
   842  	}
   843  
   844  	rc, err = doFetchMetadata(ctx, "http", path)
   845  	return
   846  }
   847  
   848  func doFetchMetadata(ctx context.Context, scheme, path string) (io.ReadCloser, error) {
   849  	url := fmt.Sprintf("%s://%s?go-get=1", scheme, path)
   850  	switch scheme {
   851  	case "https", "http":
   852  		req, err := http.NewRequest("GET", url, nil)
   853  		if err != nil {
   854  			return nil, errors.Wrapf(err, "unable to build HTTP request for URL %q", url)
   855  		}
   856  
   857  		req = addAuthFromNetrc(url, req)
   858  
   859  		resp, err := http.DefaultClient.Do(req.WithContext(ctx))
   860  		if err != nil {
   861  			return nil, errors.Wrapf(err, "failed HTTP request to URL %q", url)
   862  		}
   863  
   864  		return resp.Body, nil
   865  	default:
   866  		return nil, errors.Errorf("unknown remote protocol scheme: %q", scheme)
   867  	}
   868  }
   869  
   870  // See https://github.com/golang/go/blob/master/src/cmd/go/internal/web2/web.go
   871  // for implementation
   872  // Temporary netrc reader until https://github.com/golang/go/issues/31334 is solved
   873  type netrcLine struct {
   874  	machine  string
   875  	login    string
   876  	password string
   877  }
   878  
   879  func parseNetrc(data string) []netrcLine {
   880  	// See https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html
   881  	// for documentation on the .netrc format.
   882  	var nrc []netrcLine
   883  	var l netrcLine
   884  	inMacro := false
   885  	for _, line := range strings.Split(data, "\n") {
   886  		if inMacro {
   887  			if line == "" {
   888  				inMacro = false
   889  			}
   890  			continue
   891  		}
   892  
   893  		f := strings.Fields(line)
   894  		i := 0
   895  		for ; i < len(f)-1; i += 2 {
   896  			// Reset at each "machine" token.
   897  			// “The auto-login process searches the .netrc file for a machine token
   898  			// that matches […]. Once a match is made, the subsequent .netrc tokens
   899  			// are processed, stopping when the end of file is reached or another
   900  			// machine or a default token is encountered.”
   901  			switch f[i] {
   902  			case "machine":
   903  				l = netrcLine{machine: f[i+1]}
   904  			case "login":
   905  				l.login = f[i+1]
   906  			case "password":
   907  				l.password = f[i+1]
   908  			case "macdef":
   909  				// “A macro is defined with the specified name; its contents begin with
   910  				// the next .netrc line and continue until a null line (consecutive
   911  				// new-line characters) is encountered.”
   912  				inMacro = true
   913  			}
   914  			if l.machine != "" && l.login != "" && l.password != "" {
   915  				nrc = append(nrc, l)
   916  				l = netrcLine{}
   917  			}
   918  		}
   919  
   920  		if i < len(f) && f[i] == "default" {
   921  			// “There can be only one default token, and it must be after all machine tokens.”
   922  			break
   923  		}
   924  	}
   925  
   926  	return nrc
   927  }
   928  
   929  func netrcPath() (string, error) {
   930  	if env := os.Getenv("NETRC"); env != "" {
   931  		return env, nil
   932  	}
   933  
   934  	dir := os.Getenv("HOME")
   935  
   936  	base := ".netrc"
   937  	if runtime.GOOS == "windows" {
   938  		base = "_netrc"
   939  	}
   940  	return filepath.Join(dir, base), nil
   941  }
   942  
   943  // readNetrc parses a user's netrc file, ignoring any errors that occur.
   944  func readNetrc() {
   945  	path, err := netrcPath()
   946  	if err != nil {
   947  		return
   948  	}
   949  
   950  	data, err := ioutil.ReadFile(path)
   951  	if err != nil {
   952  		return
   953  	}
   954  
   955  	netrc = parseNetrc(string(data))
   956  }
   957  
   958  // addAuthFromNetrc uses basic authentication on go-get requests
   959  // for private repositories.
   960  func addAuthFromNetrc(rawurl string, req *http.Request) *http.Request {
   961  	readNetrcOnce.Do(readNetrc)
   962  	for _, m := range netrc {
   963  		u, err := url.Parse(rawurl)
   964  		if err != nil {
   965  			continue
   966  		}
   967  
   968  		if u.Host == m.machine {
   969  			req.SetBasicAuth(m.login, m.password)
   970  			break
   971  		}
   972  	}
   973  
   974  	return req
   975  }
   976  
   977  // getMetadata fetches and decodes remote metadata for path.
   978  //
   979  // scheme is optional. If it's http, only http will be attempted for fetching.
   980  // Any other scheme (including none) will first try https, then fall back to
   981  // http.
   982  func getMetadata(ctx context.Context, path, scheme string) (string, string, string, error) {
   983  	rc, err := fetchMetadata(ctx, path, scheme)
   984  	if err != nil {
   985  		return "", "", "", errors.Wrapf(err, "unable to fetch raw metadata")
   986  	}
   987  	defer rc.Close()
   988  
   989  	imports, err := parseMetaGoImports(rc)
   990  	if err != nil {
   991  		return "", "", "", errors.Wrapf(err, "unable to parse go-import metadata")
   992  	}
   993  	match := -1
   994  	for i, im := range imports {
   995  		if !strings.HasPrefix(path, im.Prefix) {
   996  			continue
   997  		}
   998  		if match != -1 {
   999  			return "", "", "", errors.Errorf("multiple meta tags match import path %q", path)
  1000  		}
  1001  		match = i
  1002  	}
  1003  	if match == -1 {
  1004  		return "", "", "", errors.Errorf("go-import metadata not found")
  1005  	}
  1006  	return imports[match].Prefix, imports[match].VCS, imports[match].RepoRoot, nil
  1007  }