github.com/sdboyer/gps@v0.16.3/deduce.go (about) 1 package gps 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "net/http" 9 "net/url" 10 "path" 11 "regexp" 12 "strconv" 13 "strings" 14 "sync" 15 16 radix "github.com/armon/go-radix" 17 ) 18 19 var ( 20 gitSchemes = []string{"https", "ssh", "git", "http"} 21 bzrSchemes = []string{"https", "bzr+ssh", "bzr", "http"} 22 hgSchemes = []string{"https", "ssh", "http"} 23 svnSchemes = []string{"https", "http", "svn", "svn+ssh"} 24 ) 25 26 func validateVCSScheme(scheme, typ string) bool { 27 // everything allows plain ssh 28 if scheme == "ssh" { 29 return true 30 } 31 32 var schemes []string 33 switch typ { 34 case "git": 35 schemes = gitSchemes 36 case "bzr": 37 schemes = bzrSchemes 38 case "hg": 39 schemes = hgSchemes 40 case "svn": 41 schemes = svnSchemes 42 default: 43 panic(fmt.Sprint("unsupported vcs type", scheme)) 44 } 45 46 for _, valid := range schemes { 47 if scheme == valid { 48 return true 49 } 50 } 51 return false 52 } 53 54 // Regexes for the different known import path flavors 55 var ( 56 // This regex allows some usernames that github currently disallows. They 57 // have allowed them in the past. 58 ghRegex = regexp.MustCompile(`^(?P<root>github\.com(/[A-Za-z0-9][-A-Za-z0-9]*/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) 59 gpinNewRegex = regexp.MustCompile(`^(?P<root>gopkg\.in(?:(/[a-zA-Z0-9][-a-zA-Z0-9]+)?)(/[a-zA-Z][-.a-zA-Z0-9]*)\.((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(?:-unstable)?)(?:\.git)?)((?:/[a-zA-Z0-9][-.a-zA-Z0-9]*)*)$`) 60 //gpinOldRegex = regexp.MustCompile(`^(?P<root>gopkg\.in/(?:([a-z0-9][-a-z0-9]+)/)?((?:v0|v[1-9][0-9]*)(?:\.0|\.[1-9][0-9]*){0,2}(-unstable)?)/([a-zA-Z][-a-zA-Z0-9]*)(?:\.git)?)((?:/[a-zA-Z][-a-zA-Z0-9]*)*)$`) 61 bbRegex = regexp.MustCompile(`^(?P<root>bitbucket\.org(?P<bitname>/[A-Za-z0-9_.\-]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) 62 //lpRegex = regexp.MustCompile(`^(?P<root>launchpad\.net/([A-Za-z0-9-._]+)(/[A-Za-z0-9-._]+)?)(/.+)?`) 63 lpRegex = regexp.MustCompile(`^(?P<root>launchpad\.net(/[A-Za-z0-9-._]+))((?:/[A-Za-z0-9_.\-]+)*)?`) 64 //glpRegex = regexp.MustCompile(`^(?P<root>git\.launchpad\.net/([A-Za-z0-9_.\-]+)|~[A-Za-z0-9_.\-]+/(\+git|[A-Za-z0-9_.\-]+)/[A-Za-z0-9_.\-]+)$`) 65 glpRegex = regexp.MustCompile(`^(?P<root>git\.launchpad\.net(/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) 66 //gcRegex = regexp.MustCompile(`^(?P<root>code\.google\.com/[pr]/(?P<project>[a-z0-9\-]+)(\.(?P<subrepo>[a-z0-9\-]+))?)(/[A-Za-z0-9_.\-]+)*$`) 67 jazzRegex = regexp.MustCompile(`^(?P<root>hub\.jazz\.net(/git/[a-z0-9]+/[A-Za-z0-9_.\-]+))((?:/[A-Za-z0-9_.\-]+)*)$`) 68 apacheRegex = regexp.MustCompile(`^(?P<root>git\.apache\.org(/[a-z0-9_.\-]+\.git))((?:/[A-Za-z0-9_.\-]+)*)$`) 69 vcsExtensionRegex = regexp.MustCompile(`^(?P<root>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?/[A-Za-z0-9_.\-/~]*?\.(?P<vcs>bzr|git|hg|svn))((?:/[A-Za-z0-9_.\-]+)*)$`) 70 ) 71 72 // Other helper regexes 73 var ( 74 scpSyntaxRe = regexp.MustCompile(`^([a-zA-Z0-9_]+)@([a-zA-Z0-9._-]+):(.*)$`) 75 pathvld = regexp.MustCompile(`^([A-Za-z0-9-]+)(\.[A-Za-z0-9-]+)+(/[A-Za-z0-9-_.~]+)*$`) 76 ) 77 78 func pathDeducerTrie() *deducerTrie { 79 dxt := newDeducerTrie() 80 81 dxt.Insert("github.com/", githubDeducer{regexp: ghRegex}) 82 dxt.Insert("gopkg.in/", gopkginDeducer{regexp: gpinNewRegex}) 83 dxt.Insert("bitbucket.org/", bitbucketDeducer{regexp: bbRegex}) 84 dxt.Insert("launchpad.net/", launchpadDeducer{regexp: lpRegex}) 85 dxt.Insert("git.launchpad.net/", launchpadGitDeducer{regexp: glpRegex}) 86 dxt.Insert("hub.jazz.net/", jazzDeducer{regexp: jazzRegex}) 87 dxt.Insert("git.apache.org/", apacheDeducer{regexp: apacheRegex}) 88 89 return dxt 90 } 91 92 type pathDeducer interface { 93 deduceRoot(string) (string, error) 94 deduceSource(string, *url.URL) (maybeSource, error) 95 } 96 97 type githubDeducer struct { 98 regexp *regexp.Regexp 99 } 100 101 func (m githubDeducer) deduceRoot(path string) (string, error) { 102 v := m.regexp.FindStringSubmatch(path) 103 if v == nil { 104 return "", fmt.Errorf("%s is not a valid path for a source on github.com", path) 105 } 106 107 return "github.com" + v[2], nil 108 } 109 110 func (m githubDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 111 v := m.regexp.FindStringSubmatch(path) 112 if v == nil { 113 return nil, fmt.Errorf("%s is not a valid path for a source on github.com", path) 114 } 115 116 u.Host = "github.com" 117 u.Path = v[2] 118 119 if u.Scheme == "ssh" && u.User != nil && u.User.Username() != "git" { 120 return nil, fmt.Errorf("github ssh must be accessed via the 'git' user; %s was provided", u.User.Username()) 121 } else if u.Scheme != "" { 122 if !validateVCSScheme(u.Scheme, "git") { 123 return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) 124 } 125 if u.Scheme == "ssh" { 126 u.User = url.User("git") 127 } 128 return maybeGitSource{url: u}, nil 129 } 130 131 mb := make(maybeSources, len(gitSchemes)) 132 for k, scheme := range gitSchemes { 133 u2 := *u 134 if scheme == "ssh" { 135 u2.User = url.User("git") 136 } 137 u2.Scheme = scheme 138 mb[k] = maybeGitSource{url: &u2} 139 } 140 141 return mb, nil 142 } 143 144 type bitbucketDeducer struct { 145 regexp *regexp.Regexp 146 } 147 148 func (m bitbucketDeducer) deduceRoot(path string) (string, error) { 149 v := m.regexp.FindStringSubmatch(path) 150 if v == nil { 151 return "", fmt.Errorf("%s is not a valid path for a source on bitbucket.org", path) 152 } 153 154 return "bitbucket.org" + v[2], nil 155 } 156 157 func (m bitbucketDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 158 v := m.regexp.FindStringSubmatch(path) 159 if v == nil { 160 return nil, fmt.Errorf("%s is not a valid path for a source on bitbucket.org", path) 161 } 162 163 u.Host = "bitbucket.org" 164 u.Path = v[2] 165 166 // This isn't definitive, but it'll probably catch most 167 isgit := strings.HasSuffix(u.Path, ".git") || (u.User != nil && u.User.Username() == "git") 168 ishg := strings.HasSuffix(u.Path, ".hg") || (u.User != nil && u.User.Username() == "hg") 169 170 // TODO(sdboyer) resolve scm ambiguity if needed by querying bitbucket's REST API 171 if u.Scheme != "" { 172 validgit, validhg := validateVCSScheme(u.Scheme, "git"), validateVCSScheme(u.Scheme, "hg") 173 if isgit { 174 if !validgit { 175 // This is unreachable for now, as the git schemes are a 176 // superset of the hg schemes 177 return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) 178 } 179 return maybeGitSource{url: u}, nil 180 } else if ishg { 181 if !validhg { 182 return nil, fmt.Errorf("%s is not a valid scheme for accessing an hg repository", u.Scheme) 183 } 184 return maybeHgSource{url: u}, nil 185 } else if !validgit && !validhg { 186 return nil, fmt.Errorf("%s is not a valid scheme for accessing either a git or hg repository", u.Scheme) 187 } 188 189 // No other choice, make an option for both git and hg 190 return maybeSources{ 191 maybeHgSource{url: u}, 192 maybeGitSource{url: u}, 193 }, nil 194 } 195 196 mb := make(maybeSources, 0) 197 // git is probably more common, even on bitbucket. however, bitbucket 198 // appears to fail _extremely_ slowly on git pings (ls-remote) when the 199 // underlying repository is actually an hg repository, so it's better 200 // to try hg first. 201 if !isgit { 202 for _, scheme := range hgSchemes { 203 u2 := *u 204 if scheme == "ssh" { 205 u2.User = url.User("hg") 206 } 207 u2.Scheme = scheme 208 mb = append(mb, maybeHgSource{url: &u2}) 209 } 210 } 211 212 if !ishg { 213 for _, scheme := range gitSchemes { 214 u2 := *u 215 if scheme == "ssh" { 216 u2.User = url.User("git") 217 } 218 u2.Scheme = scheme 219 mb = append(mb, maybeGitSource{url: &u2}) 220 } 221 } 222 223 return mb, nil 224 } 225 226 type gopkginDeducer struct { 227 regexp *regexp.Regexp 228 } 229 230 func (m gopkginDeducer) deduceRoot(p string) (string, error) { 231 v, err := m.parseAndValidatePath(p) 232 if err != nil { 233 return "", err 234 } 235 236 return v[1], nil 237 } 238 239 func (m gopkginDeducer) parseAndValidatePath(p string) ([]string, error) { 240 v := m.regexp.FindStringSubmatch(p) 241 if v == nil { 242 return nil, fmt.Errorf("%s is not a valid path for a source on gopkg.in", p) 243 } 244 245 // We duplicate some logic from the gopkg.in server in order to validate the 246 // import path string without having to make a network request 247 if strings.Contains(v[4], ".") { 248 return nil, fmt.Errorf("%s is not a valid import path; gopkg.in only allows major versions (%q instead of %q)", 249 p, v[4][:strings.Index(v[4], ".")], v[4]) 250 } 251 252 return v, nil 253 } 254 255 func (m gopkginDeducer) deduceSource(p string, u *url.URL) (maybeSource, error) { 256 // Reuse root detection logic for initial validation 257 v, err := m.parseAndValidatePath(p) 258 if err != nil { 259 return nil, err 260 } 261 262 // Putting a scheme on gopkg.in would be really weird, disallow it 263 if u.Scheme != "" { 264 return nil, fmt.Errorf("specifying alternate schemes on gopkg.in imports is not permitted") 265 } 266 267 // gopkg.in is always backed by github 268 u.Host = "github.com" 269 if v[2] == "" { 270 elem := v[3][1:] 271 u.Path = path.Join("/go-"+elem, elem) 272 } else { 273 u.Path = path.Join(v[2], v[3]) 274 } 275 major, err := strconv.ParseUint(v[4][1:], 10, 64) 276 if err != nil { 277 // this should only be reachable if there's an error in the regex 278 return nil, fmt.Errorf("could not parse %q as a gopkg.in major version", v[4][1:]) 279 } 280 281 mb := make(maybeSources, len(gitSchemes)) 282 for k, scheme := range gitSchemes { 283 u2 := *u 284 if scheme == "ssh" { 285 u2.User = url.User("git") 286 } 287 u2.Scheme = scheme 288 mb[k] = maybeGopkginSource{ 289 opath: v[1], 290 url: &u2, 291 major: major, 292 } 293 } 294 295 return mb, nil 296 } 297 298 type launchpadDeducer struct { 299 regexp *regexp.Regexp 300 } 301 302 func (m launchpadDeducer) deduceRoot(path string) (string, error) { 303 // TODO(sdboyer) lp handling is nasty - there's ambiguities which can only really 304 // be resolved with a metadata request. See https://github.com/golang/go/issues/11436 305 v := m.regexp.FindStringSubmatch(path) 306 if v == nil { 307 return "", fmt.Errorf("%s is not a valid path for a source on launchpad.net", path) 308 } 309 310 return "launchpad.net" + v[2], nil 311 } 312 313 func (m launchpadDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 314 v := m.regexp.FindStringSubmatch(path) 315 if v == nil { 316 return nil, fmt.Errorf("%s is not a valid path for a source on launchpad.net", path) 317 } 318 319 u.Host = "launchpad.net" 320 u.Path = v[2] 321 322 if u.Scheme != "" { 323 if !validateVCSScheme(u.Scheme, "bzr") { 324 return nil, fmt.Errorf("%s is not a valid scheme for accessing a bzr repository", u.Scheme) 325 } 326 return maybeBzrSource{url: u}, nil 327 } 328 329 mb := make(maybeSources, len(bzrSchemes)) 330 for k, scheme := range bzrSchemes { 331 u2 := *u 332 u2.Scheme = scheme 333 mb[k] = maybeBzrSource{url: &u2} 334 } 335 336 return mb, nil 337 } 338 339 type launchpadGitDeducer struct { 340 regexp *regexp.Regexp 341 } 342 343 func (m launchpadGitDeducer) deduceRoot(path string) (string, error) { 344 // TODO(sdboyer) same ambiguity issues as with normal bzr lp 345 v := m.regexp.FindStringSubmatch(path) 346 if v == nil { 347 return "", fmt.Errorf("%s is not a valid path for a source on git.launchpad.net", path) 348 } 349 350 return "git.launchpad.net" + v[2], nil 351 } 352 353 func (m launchpadGitDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 354 v := m.regexp.FindStringSubmatch(path) 355 if v == nil { 356 return nil, fmt.Errorf("%s is not a valid path for a source on git.launchpad.net", path) 357 } 358 359 u.Host = "git.launchpad.net" 360 u.Path = v[2] 361 362 if u.Scheme != "" { 363 if !validateVCSScheme(u.Scheme, "git") { 364 return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) 365 } 366 return maybeGitSource{url: u}, nil 367 } 368 369 mb := make(maybeSources, len(gitSchemes)) 370 for k, scheme := range gitSchemes { 371 u2 := *u 372 u2.Scheme = scheme 373 mb[k] = maybeGitSource{url: &u2} 374 } 375 376 return mb, nil 377 } 378 379 type jazzDeducer struct { 380 regexp *regexp.Regexp 381 } 382 383 func (m jazzDeducer) deduceRoot(path string) (string, error) { 384 v := m.regexp.FindStringSubmatch(path) 385 if v == nil { 386 return "", fmt.Errorf("%s is not a valid path for a source on hub.jazz.net", path) 387 } 388 389 return "hub.jazz.net" + v[2], nil 390 } 391 392 func (m jazzDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 393 v := m.regexp.FindStringSubmatch(path) 394 if v == nil { 395 return nil, fmt.Errorf("%s is not a valid path for a source on hub.jazz.net", path) 396 } 397 398 u.Host = "hub.jazz.net" 399 u.Path = v[2] 400 401 switch u.Scheme { 402 case "": 403 u.Scheme = "https" 404 fallthrough 405 case "https": 406 return maybeGitSource{url: u}, nil 407 default: 408 return nil, fmt.Errorf("IBM's jazz hub only supports https, %s is not allowed", u.String()) 409 } 410 } 411 412 type apacheDeducer struct { 413 regexp *regexp.Regexp 414 } 415 416 func (m apacheDeducer) deduceRoot(path string) (string, error) { 417 v := m.regexp.FindStringSubmatch(path) 418 if v == nil { 419 return "", fmt.Errorf("%s is not a valid path for a source on git.apache.org", path) 420 } 421 422 return "git.apache.org" + v[2], nil 423 } 424 425 func (m apacheDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 426 v := m.regexp.FindStringSubmatch(path) 427 if v == nil { 428 return nil, fmt.Errorf("%s is not a valid path for a source on git.apache.org", path) 429 } 430 431 u.Host = "git.apache.org" 432 u.Path = v[2] 433 434 if u.Scheme != "" { 435 if !validateVCSScheme(u.Scheme, "git") { 436 return nil, fmt.Errorf("%s is not a valid scheme for accessing a git repository", u.Scheme) 437 } 438 return maybeGitSource{url: u}, nil 439 } 440 441 mb := make(maybeSources, len(gitSchemes)) 442 for k, scheme := range gitSchemes { 443 u2 := *u 444 u2.Scheme = scheme 445 mb[k] = maybeGitSource{url: &u2} 446 } 447 448 return mb, nil 449 } 450 451 type vcsExtensionDeducer struct { 452 regexp *regexp.Regexp 453 } 454 455 func (m vcsExtensionDeducer) deduceRoot(path string) (string, error) { 456 v := m.regexp.FindStringSubmatch(path) 457 if v == nil { 458 return "", fmt.Errorf("%s contains no vcs extension hints for matching", path) 459 } 460 461 return v[1], nil 462 } 463 464 func (m vcsExtensionDeducer) deduceSource(path string, u *url.URL) (maybeSource, error) { 465 v := m.regexp.FindStringSubmatch(path) 466 if v == nil { 467 return nil, fmt.Errorf("%s contains no vcs extension hints for matching", path) 468 } 469 470 switch v[4] { 471 case "git", "hg", "bzr": 472 x := strings.SplitN(v[1], "/", 2) 473 // TODO(sdboyer) is this actually correct for bzr? 474 u.Host = x[0] 475 u.Path = "/" + x[1] 476 477 if u.Scheme != "" { 478 if !validateVCSScheme(u.Scheme, v[4]) { 479 return nil, fmt.Errorf("%s is not a valid scheme for accessing %s repositories (path %s)", u.Scheme, v[4], path) 480 } 481 482 switch v[4] { 483 case "git": 484 return maybeGitSource{url: u}, nil 485 case "bzr": 486 return maybeBzrSource{url: u}, nil 487 case "hg": 488 return maybeHgSource{url: u}, nil 489 } 490 } 491 492 var schemes []string 493 var mb maybeSources 494 var f func(k int, u *url.URL) 495 496 switch v[4] { 497 case "git": 498 schemes = gitSchemes 499 f = func(k int, u *url.URL) { 500 mb[k] = maybeGitSource{url: u} 501 } 502 case "bzr": 503 schemes = bzrSchemes 504 f = func(k int, u *url.URL) { 505 mb[k] = maybeBzrSource{url: u} 506 } 507 case "hg": 508 schemes = hgSchemes 509 f = func(k int, u *url.URL) { 510 mb[k] = maybeHgSource{url: u} 511 } 512 } 513 514 mb = make(maybeSources, len(schemes)) 515 for k, scheme := range schemes { 516 u2 := *u 517 u2.Scheme = scheme 518 f(k, &u2) 519 } 520 521 return mb, nil 522 default: 523 return nil, fmt.Errorf("unknown repository type: %q", v[4]) 524 } 525 } 526 527 // A deducer takes an import path and inspects it to determine where the 528 // corresponding project root should be. It applies a number of matching 529 // techniques, eventually falling back to an HTTP request for go-get metadata if 530 // none of the explicit rules succeed. 531 // 532 // The only real implementation is deductionCoordinator. The interface is 533 // primarily intended for testing purposes. 534 type deducer interface { 535 deduceRootPath(ctx context.Context, path string) (pathDeduction, error) 536 } 537 538 type deductionCoordinator struct { 539 suprvsr *supervisor 540 mut sync.RWMutex 541 rootxt *radix.Tree 542 deducext *deducerTrie 543 } 544 545 func newDeductionCoordinator(superv *supervisor) *deductionCoordinator { 546 dc := &deductionCoordinator{ 547 suprvsr: superv, 548 rootxt: radix.New(), 549 deducext: pathDeducerTrie(), 550 } 551 552 return dc 553 } 554 555 // deduceRootPath takes an import path and attempts to deduce various 556 // metadata about it - what type of source should handle it, and where its 557 // "root" is (for vcs repositories, the repository root). 558 // 559 // If no errors are encountered, the returned pathDeduction will contain both 560 // the root path and a list of maybeSources, which can be subsequently used to 561 // create a handler that will manage the particular source. 562 func (dc *deductionCoordinator) deduceRootPath(ctx context.Context, path string) (pathDeduction, error) { 563 if dc.suprvsr.getLifetimeContext().Err() != nil { 564 return pathDeduction{}, errors.New("deductionCoordinator has been terminated") 565 } 566 567 // First, check the rootxt to see if there's a prefix match - if so, we 568 // can return that and move on. 569 dc.mut.RLock() 570 prefix, data, has := dc.rootxt.LongestPrefix(path) 571 dc.mut.RUnlock() 572 if has && isPathPrefixOrEqual(prefix, path) { 573 switch d := data.(type) { 574 case maybeSource: 575 return pathDeduction{root: prefix, mb: d}, nil 576 case *httpMetadataDeducer: 577 // Multiple calls have come in for a similar path shape during 578 // the window in which the HTTP request to retrieve go get 579 // metadata is in flight. Fold this request in with the existing 580 // one(s) by calling the deduction method, which will avoid 581 // duplication of work through a sync.Once. 582 return d.deduce(ctx, path) 583 } 584 585 panic(fmt.Sprintf("unexpected %T in deductionCoordinator.rootxt: %v", data, data)) 586 } 587 588 // No match. Try known path deduction first. 589 pd, err := dc.deduceKnownPaths(path) 590 if err == nil { 591 // Deduction worked; store it in the rootxt, send on retchan and 592 // terminate. 593 // FIXME(sdboyer) deal with changing path vs. root. Probably needs 594 // to be predeclared and reused in the hmd returnFunc 595 dc.mut.Lock() 596 dc.rootxt.Insert(pd.root, pd.mb) 597 dc.mut.Unlock() 598 return pd, nil 599 } 600 601 if err != errNoKnownPathMatch { 602 return pathDeduction{}, err 603 } 604 605 // The err indicates no known path matched. It's still possible that 606 // retrieving go get metadata might do the trick. 607 hmd := &httpMetadataDeducer{ 608 basePath: path, 609 suprvsr: dc.suprvsr, 610 // The vanity deducer will call this func with a completed 611 // pathDeduction if it succeeds in finding one. We process it 612 // back through the action channel to ensure serialized 613 // access to the rootxt map. 614 returnFunc: func(pd pathDeduction) { 615 dc.mut.Lock() 616 dc.rootxt.Insert(pd.root, pd.mb) 617 dc.mut.Unlock() 618 }, 619 } 620 621 // Save the hmd in the rootxt so that calls checking on similar 622 // paths made while the request is in flight can be folded together. 623 dc.mut.Lock() 624 dc.rootxt.Insert(path, hmd) 625 dc.mut.Unlock() 626 627 // Trigger the HTTP-backed deduction process for this requestor. 628 return hmd.deduce(ctx, path) 629 } 630 631 // pathDeduction represents the results of a successful import path deduction - 632 // a root path, plus a maybeSource that can be used to attempt to connect to 633 // the source. 634 type pathDeduction struct { 635 root string 636 mb maybeSource 637 } 638 639 var errNoKnownPathMatch = errors.New("no known path match") 640 641 func (dc *deductionCoordinator) deduceKnownPaths(path string) (pathDeduction, error) { 642 u, path, err := normalizeURI(path) 643 if err != nil { 644 return pathDeduction{}, err 645 } 646 647 // First, try the root path-based matches 648 if _, mtch, has := dc.deducext.LongestPrefix(path); has { 649 root, err := mtch.deduceRoot(path) 650 if err != nil { 651 return pathDeduction{}, err 652 } 653 mb, err := mtch.deduceSource(path, u) 654 if err != nil { 655 return pathDeduction{}, err 656 } 657 658 return pathDeduction{ 659 root: root, 660 mb: mb, 661 }, nil 662 } 663 664 // Next, try the vcs extension-based (infix) matcher 665 exm := vcsExtensionDeducer{regexp: vcsExtensionRegex} 666 if root, err := exm.deduceRoot(path); err == nil { 667 mb, err := exm.deduceSource(path, u) 668 if err != nil { 669 return pathDeduction{}, err 670 } 671 672 return pathDeduction{ 673 root: root, 674 mb: mb, 675 }, nil 676 } 677 678 return pathDeduction{}, errNoKnownPathMatch 679 } 680 681 type httpMetadataDeducer struct { 682 once sync.Once 683 deduced pathDeduction 684 deduceErr error 685 basePath string 686 returnFunc func(pathDeduction) 687 suprvsr *supervisor 688 } 689 690 func (hmd *httpMetadataDeducer) deduce(ctx context.Context, path string) (pathDeduction, error) { 691 hmd.once.Do(func() { 692 opath := path 693 u, path, err := normalizeURI(path) 694 if err != nil { 695 hmd.deduceErr = err 696 return 697 } 698 699 pd := pathDeduction{} 700 701 // Make the HTTP call to attempt to retrieve go-get metadata 702 var root, vcs, reporoot string 703 err = hmd.suprvsr.do(ctx, path, ctHTTPMetadata, func(ctx context.Context) error { 704 root, vcs, reporoot, err = parseMetadata(ctx, path, u.Scheme) 705 return err 706 }) 707 if err != nil { 708 hmd.deduceErr = fmt.Errorf("unable to deduce repository and source type for: %q", opath) 709 return 710 } 711 pd.root = root 712 713 // If we got something back at all, then it supercedes the actual input for 714 // the real URL to hit 715 repoURL, err := url.Parse(reporoot) 716 if err != nil { 717 hmd.deduceErr = fmt.Errorf("server returned bad URL in go-get metadata: %q", reporoot) 718 return 719 } 720 721 // If the input path specified a scheme, then try to honor it. 722 if u.Scheme != "" && repoURL.Scheme != u.Scheme { 723 // If the input scheme was http, but the go-get metadata 724 // nevertheless indicated https should be used for the repo, then 725 // trust the metadata and use https. 726 // 727 // To err on the secure side, do NOT allow the same in the other 728 // direction (https -> http). 729 if u.Scheme != "http" || repoURL.Scheme != "https" { 730 hmd.deduceErr = fmt.Errorf("scheme mismatch for %q: input asked for %q, but go-get metadata specified %q", path, u.Scheme, repoURL.Scheme) 731 return 732 } 733 } 734 735 switch vcs { 736 case "git": 737 pd.mb = maybeGitSource{url: repoURL} 738 case "bzr": 739 pd.mb = maybeBzrSource{url: repoURL} 740 case "hg": 741 pd.mb = maybeHgSource{url: repoURL} 742 default: 743 hmd.deduceErr = fmt.Errorf("unsupported vcs type %s in go-get metadata from %s", vcs, path) 744 return 745 } 746 747 hmd.deduced = pd 748 // All data is assigned for other goroutines that may be waiting. Now, 749 // send the pathDeduction back to the deductionCoordinator by calling 750 // the returnFunc. This will also remove the reference to this hmd in 751 // the coordinator's trie. 752 // 753 // When this call finishes, it is guaranteed the coordinator will have 754 // at least begun running the action to insert the path deduction, which 755 // means no other deduction request will be able to interleave and 756 // request the same path before the pathDeduction can be processed, but 757 // after this hmd has been dereferenced from the trie. 758 hmd.returnFunc(pd) 759 }) 760 761 return hmd.deduced, hmd.deduceErr 762 } 763 764 func normalizeURI(p string) (u *url.URL, newpath string, err error) { 765 if m := scpSyntaxRe.FindStringSubmatch(p); m != nil { 766 // Match SCP-like syntax and convert it to a URL. 767 // Eg, "git@github.com:user/repo" becomes 768 // "ssh://git@github.com/user/repo". 769 u = &url.URL{ 770 Scheme: "ssh", 771 User: url.User(m[1]), 772 Host: m[2], 773 Path: "/" + m[3], 774 // TODO(sdboyer) This is what stdlib sets; grok why better 775 //RawPath: m[3], 776 } 777 } else { 778 u, err = url.Parse(p) 779 if err != nil { 780 return nil, "", fmt.Errorf("%q is not a valid URI", p) 781 } 782 } 783 784 // If no scheme was passed, then the entire path will have been put into 785 // u.Path. Either way, construct the normalized path correctly. 786 if u.Host == "" { 787 newpath = p 788 } else { 789 newpath = path.Join(u.Host, u.Path) 790 } 791 792 if !pathvld.MatchString(newpath) { 793 return nil, "", fmt.Errorf("%q is not a valid import path", newpath) 794 } 795 796 return 797 } 798 799 // fetchMetadata fetches the remote metadata for path. 800 func fetchMetadata(ctx context.Context, path, scheme string) (rc io.ReadCloser, err error) { 801 defer func() { 802 if err != nil { 803 err = fmt.Errorf("unable to determine remote metadata protocol: %s", err) 804 } 805 }() 806 807 if scheme == "http" { 808 rc, err = doFetchMetadata(ctx, "http", path) 809 return 810 } 811 812 rc, err = doFetchMetadata(ctx, "https", path) 813 if err == nil { 814 return 815 } 816 817 rc, err = doFetchMetadata(ctx, "http", path) 818 return 819 } 820 821 func doFetchMetadata(ctx context.Context, scheme, path string) (io.ReadCloser, error) { 822 url := fmt.Sprintf("%s://%s?go-get=1", scheme, path) 823 switch scheme { 824 case "https", "http": 825 req, err := http.NewRequest("GET", url, nil) 826 if err != nil { 827 return nil, fmt.Errorf("failed to access url %q", url) 828 } 829 830 resp, err := http.DefaultClient.Do(req.WithContext(ctx)) 831 if err != nil { 832 return nil, fmt.Errorf("failed to access url %q", url) 833 } 834 835 return resp.Body, nil 836 default: 837 return nil, fmt.Errorf("unknown remote protocol scheme: %q", scheme) 838 } 839 } 840 841 // parseMetadata fetches and decodes remote metadata for path. 842 // 843 // scheme is optional. If it's http, only http will be attempted for fetching. 844 // Any other scheme (including none) will first try https, then fall back to 845 // http. 846 func parseMetadata(ctx context.Context, path, scheme string) (string, string, string, error) { 847 rc, err := fetchMetadata(ctx, path, scheme) 848 if err != nil { 849 return "", "", "", err 850 } 851 defer rc.Close() 852 853 imports, err := parseMetaGoImports(rc) 854 if err != nil { 855 return "", "", "", err 856 } 857 match := -1 858 for i, im := range imports { 859 if !strings.HasPrefix(path, im.Prefix) { 860 continue 861 } 862 if match != -1 { 863 return "", "", "", fmt.Errorf("multiple meta tags match import path %q", path) 864 } 865 match = i 866 } 867 if match == -1 { 868 return "", "", "", fmt.Errorf("go-import metadata not found") 869 } 870 return imports[match].Prefix, imports[match].VCS, imports[match].RepoRoot, nil 871 }