github.com/bazelbuild/bazel-gazelle@v0.36.1-0.20240520142334-61b277ba6fed/repo/remote.go (about) 1 /* Copyright 2018 The Bazel Authors. All rights reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 */ 15 16 package repo 17 18 import ( 19 "bytes" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "os" 24 "os/exec" 25 "path" 26 "path/filepath" 27 "regexp" 28 "runtime" 29 "strings" 30 "sync" 31 32 "github.com/bazelbuild/bazel-gazelle/label" 33 "github.com/bazelbuild/bazel-gazelle/pathtools" 34 "golang.org/x/mod/modfile" 35 "golang.org/x/tools/go/vcs" 36 ) 37 38 // RemoteCache stores information about external repositories. The cache may 39 // be initialized with information about known repositories, i.e., those listed 40 // in the WORKSPACE file and mentioned on the command line. Other information 41 // is retrieved over the network. 42 // 43 // Public methods of RemoteCache may be slow in cases where a network fetch 44 // is needed. Public methods may be called concurrently. 45 // 46 // TODO(jayconrod): this is very Go-centric. It should be moved to language/go. 47 // Unfortunately, doing so would break the resolve.Resolver interface. 48 type RemoteCache struct { 49 // RepoRootForImportPath is vcs.RepoRootForImportPath by default. It may 50 // be overridden so that tests may avoid accessing the network. 51 RepoRootForImportPath func(string, bool) (*vcs.RepoRoot, error) 52 53 // HeadCmd returns the latest commit on the default branch in the given 54 // repository. This is used by Head. It may be stubbed out for tests. 55 HeadCmd func(remote, vcs string) (string, error) 56 57 // ModInfo returns the module path and version that provides the package 58 // with the given import path. This is used by Mod. It may be stubbed 59 // out for tests. 60 ModInfo func(importPath string) (modPath string, err error) 61 62 // ModVersionInfo returns the module path, true version, and sum for 63 // the module that provides the package with the given import path. 64 // This is used by ModVersion. It may be stubbed out for tests. 65 ModVersionInfo func(modPath, query string) (version, sum string, err error) 66 67 root, remote, head, mod, modVersion remoteCacheMap 68 69 tmpOnce sync.Once 70 tmpDir string 71 tmpErr error 72 } 73 74 // remoteCacheMap is a thread-safe, idempotent cache. It is used to store 75 // information which should be fetched over the network no more than once. 76 // This follows the Memo pattern described in The Go Programming Language, 77 // section 9.7. 78 type remoteCacheMap struct { 79 mu sync.Mutex 80 cache map[string]*remoteCacheEntry 81 } 82 83 type remoteCacheEntry struct { 84 value interface{} 85 err error 86 87 // ready is nil for entries that were added when the cache was initialized. 88 // It is non-nil for other entries. It is closed when an entry is ready, 89 // i.e., the operation loading the entry completed. 90 ready chan struct{} 91 } 92 93 type rootValue struct { 94 root, name string 95 } 96 97 type remoteValue struct { 98 remote, vcs string 99 } 100 101 type headValue struct { 102 commit, tag string 103 } 104 105 type modValue struct { 106 path, name string 107 known bool 108 } 109 110 type modVersionValue struct { 111 path, version, sum string 112 } 113 114 // Repo describes details of a Go repository known in advance. It is used to 115 // initialize RemoteCache so that some repositories don't need to be looked up. 116 // 117 // DEPRECATED: Go-specific details should be removed from RemoteCache, and 118 // lookup logic should be moved to language/go. This means RemoteCache will 119 // need to be initialized in a different way. 120 type Repo struct { 121 Name, GoPrefix, Remote, VCS string 122 } 123 124 // NewRemoteCache creates a new RemoteCache with a set of known repositories. 125 // The Root and Remote methods will return information about repositories listed 126 // here without accessing the network. However, the Head method will still 127 // access the network for these repositories to retrieve information about new 128 // versions. 129 // 130 // A cleanup function is also returned. The caller must call this when 131 // RemoteCache is no longer needed. RemoteCache may write files to a temporary 132 // directory. This will delete them. 133 func NewRemoteCache(knownRepos []Repo) (r *RemoteCache, cleanup func() error) { 134 r = &RemoteCache{ 135 RepoRootForImportPath: vcs.RepoRootForImportPath, 136 HeadCmd: defaultHeadCmd, 137 root: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 138 remote: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 139 head: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 140 mod: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 141 modVersion: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 142 } 143 r.ModInfo = func(importPath string) (string, error) { 144 return defaultModInfo(r, importPath) 145 } 146 r.ModVersionInfo = func(modPath, query string) (string, string, error) { 147 return defaultModVersionInfo(r, modPath, query) 148 } 149 for _, repo := range knownRepos { 150 r.root.cache[repo.GoPrefix] = &remoteCacheEntry{ 151 value: rootValue{ 152 root: repo.GoPrefix, 153 name: repo.Name, 154 }, 155 } 156 if repo.Remote != "" { 157 r.remote.cache[repo.GoPrefix] = &remoteCacheEntry{ 158 value: remoteValue{ 159 remote: repo.Remote, 160 vcs: repo.VCS, 161 }, 162 } 163 } 164 r.mod.cache[repo.GoPrefix] = &remoteCacheEntry{ 165 value: modValue{ 166 path: repo.GoPrefix, 167 name: repo.Name, 168 known: true, 169 }, 170 } 171 } 172 173 // Augment knownRepos with additional prefixes for 174 // minimal module compatibility. For example, if repo "com_example_foo_v2" 175 // has prefix "example.com/foo/v2", map "example.com/foo" to the same 176 // entry. 177 // TODO(jayconrod): there should probably be some control over whether 178 // callers can use these mappings: packages within modules should not be 179 // allowed to use them. However, we'll return the same result nearly all 180 // the time, and simpler is better. 181 for _, repo := range knownRepos { 182 newPath := pathWithoutSemver(repo.GoPrefix) 183 if newPath == "" { 184 continue 185 } 186 // Avoid adding the semver-less path for this module if there 187 // is another known module which already covers this path. 188 // See https://github.com/bazelbuild/bazel-gazelle/issues/1595. 189 found := false 190 for prefix := newPath; prefix != "." && prefix != "/"; prefix = path.Dir(prefix) { 191 if _, ok := r.root.cache[prefix]; ok { 192 found = true 193 break 194 } 195 } 196 if found { 197 continue 198 } 199 r.root.cache[newPath] = r.root.cache[repo.GoPrefix] 200 if e := r.remote.cache[repo.GoPrefix]; e != nil { 201 r.remote.cache[newPath] = e 202 } 203 r.mod.cache[newPath] = r.mod.cache[repo.GoPrefix] 204 } 205 206 return r, r.cleanup 207 } 208 209 func (r *RemoteCache) cleanup() error { 210 if r.tmpDir == "" { 211 return nil 212 } 213 return os.RemoveAll(r.tmpDir) 214 } 215 216 // PopulateFromGoMod reads a go.mod file and adds entries to the r.root 217 // map based on the file's require directives. PopulateFromGoMod does not 218 // override entries already in the cache. This should help avoid going 219 // out to the network for external dependency resolution, and it should 220 // let static dependency resolution succeed more often. 221 func (r *RemoteCache) PopulateFromGoMod(goModPath string) (err error) { 222 defer func() { 223 if err != nil { 224 err = fmt.Errorf("reading module paths from %s: %w", goModPath, err) 225 } 226 }() 227 228 data, err := os.ReadFile(goModPath) 229 if err != nil { 230 return err 231 } 232 var versionFixer modfile.VersionFixer 233 f, err := modfile.Parse(goModPath, data, versionFixer) 234 if err != nil { 235 return err 236 } 237 for _, req := range f.Require { 238 r.root.ensure(req.Mod.Path, func() (any, error) { 239 return rootValue{ 240 root: req.Mod.Path, 241 name: label.ImportPathToBazelRepoName(req.Mod.Path), 242 }, nil 243 }) 244 } 245 return nil 246 } 247 248 var gopkginPattern = regexp.MustCompile(`^(gopkg.in/(?:[^/]+/)?[^/]+\.v\d+)(?:/|$)`) 249 250 var knownPrefixes = []struct { 251 prefix string 252 missing int 253 }{ 254 {prefix: "golang.org/x", missing: 1}, 255 {prefix: "google.golang.org", missing: 1}, 256 {prefix: "cloud.google.com", missing: 1}, 257 {prefix: "github.com", missing: 2}, 258 } 259 260 // RootStatic checks the cache to see if the provided importpath matches any known roots. 261 // If no matches are found, rather than going out to the network to determine the root, 262 // nothing is returned. 263 func (r *RemoteCache) RootStatic(importPath string) (root, name string, err error) { 264 for prefix := importPath; prefix != "." && prefix != "/"; prefix = path.Dir(prefix) { 265 v, ok, err := r.root.get(prefix) 266 if ok { 267 if err != nil { 268 return "", "", err 269 } 270 value := v.(rootValue) 271 return value.root, value.name, nil 272 } 273 } 274 return "", "", nil 275 } 276 277 // Root returns the portion of an import path that corresponds to the root 278 // directory of the repository containing the given import path. For example, 279 // given "golang.org/x/tools/go/loader", this will return "golang.org/x/tools". 280 // The workspace name of the repository is also returned. This may be a custom 281 // name set in WORKSPACE, or it may be a generated name based on the root path. 282 func (r *RemoteCache) Root(importPath string) (root, name string, err error) { 283 // Try prefixes of the import path in the cache, but don't actually go out 284 // to vcs yet. We do this before handling known special cases because 285 // the cache is pre-populated with repository rules, and we want to use their 286 // names if we can. 287 prefix := importPath 288 for { 289 v, ok, err := r.root.get(prefix) 290 if ok { 291 if err != nil { 292 return "", "", err 293 } 294 value := v.(rootValue) 295 return value.root, value.name, nil 296 } 297 298 prefix = path.Dir(prefix) 299 if prefix == "." || prefix == "/" { 300 break 301 } 302 } 303 304 // Try known prefixes. 305 for _, p := range knownPrefixes { 306 if pathtools.HasPrefix(importPath, p.prefix) { 307 rest := pathtools.TrimPrefix(importPath, p.prefix) 308 var components []string 309 if rest != "" { 310 components = strings.Split(rest, "/") 311 } 312 if len(components) < p.missing { 313 return "", "", fmt.Errorf("import path %q is shorter than the known prefix %q", importPath, p.prefix) 314 } 315 root = p.prefix 316 for _, c := range components[:p.missing] { 317 root = path.Join(root, c) 318 } 319 name = label.ImportPathToBazelRepoName(root) 320 return root, name, nil 321 } 322 } 323 324 // gopkg.in is special, and might have either one or two levels of 325 // missing paths. See http://labix.org/gopkg.in for URL patterns. 326 if match := gopkginPattern.FindStringSubmatch(importPath); len(match) > 0 { 327 root = match[1] 328 name = label.ImportPathToBazelRepoName(root) 329 return root, name, nil 330 } 331 332 // Find the prefix using vcs and cache the result. 333 v, err := r.root.ensure(importPath, func() (interface{}, error) { 334 res, err := r.RepoRootForImportPath(importPath, false) 335 if err != nil { 336 return nil, err 337 } 338 return rootValue{res.Root, label.ImportPathToBazelRepoName(res.Root)}, nil 339 }) 340 if err != nil { 341 return "", "", err 342 } 343 value := v.(rootValue) 344 return value.root, value.name, nil 345 } 346 347 // Remote returns the VCS name and the remote URL for a repository with the 348 // given root import path. This is suitable for creating new repository rules. 349 func (r *RemoteCache) Remote(root string) (remote, vcs string, err error) { 350 v, err := r.remote.ensure(root, func() (interface{}, error) { 351 repo, err := r.RepoRootForImportPath(root, false) 352 if err != nil { 353 return nil, err 354 } 355 return remoteValue{remote: repo.Repo, vcs: repo.VCS.Cmd}, nil 356 }) 357 if err != nil { 358 return "", "", err 359 } 360 value := v.(remoteValue) 361 return value.remote, value.vcs, nil 362 } 363 364 // Head returns the most recent commit id on the default branch and latest 365 // version tag for the given remote repository. The tag "" is returned if 366 // no latest version was found. 367 // 368 // TODO(jayconrod): support VCS other than git. 369 // TODO(jayconrod): support version tags. "" is always returned. 370 func (r *RemoteCache) Head(remote, vcs string) (commit, tag string, err error) { 371 if vcs != "git" { 372 return "", "", fmt.Errorf("could not locate recent commit in repo %q with unknown version control scheme %q", remote, vcs) 373 } 374 375 v, err := r.head.ensure(remote, func() (interface{}, error) { 376 commit, err := r.HeadCmd(remote, vcs) 377 if err != nil { 378 return nil, err 379 } 380 return headValue{commit: commit}, nil 381 }) 382 if err != nil { 383 return "", "", err 384 } 385 value := v.(headValue) 386 return value.commit, value.tag, nil 387 } 388 389 func defaultHeadCmd(remote, vcs string) (string, error) { 390 switch vcs { 391 case "local": 392 return "", nil 393 394 case "git": 395 // Old versions of git ls-remote exit with code 129 when "--" is passed. 396 // We'll try to validate the argument here instead. 397 if strings.HasPrefix(remote, "-") { 398 return "", fmt.Errorf("remote must not start with '-': %q", remote) 399 } 400 cmd := exec.Command("git", "ls-remote", remote, "HEAD") 401 out, err := cmd.Output() 402 if err != nil { 403 return "", fmt.Errorf("git ls-remote for %s: %v", remote, cleanCmdError(err)) 404 } 405 ix := bytes.IndexByte(out, '\t') 406 if ix < 0 { 407 return "", fmt.Errorf("could not parse output for git ls-remote for %q", remote) 408 } 409 return string(out[:ix]), nil 410 411 default: 412 return "", fmt.Errorf("unknown version control system: %s", vcs) 413 } 414 } 415 416 // Mod returns the module path for the module that contains the package 417 // named by importPath. The name of the go_repository rule for the module 418 // is also returned. For example, calling Mod on "github.com/foo/bar/v2/baz" 419 // would give the module path "github.com/foo/bar/v2" and the name 420 // "com_github_foo_bar_v2". 421 // 422 // If a known repository *could* provide importPath (because its "importpath" 423 // is a prefix of importPath), Mod will assume that it does. This may give 424 // inaccurate results if importPath is in an undeclared nested module. Run 425 // "gazelle update-repos -from_file=go.mod" first for best results. 426 // 427 // If no known repository could provide importPath, Mod will run "go list" to 428 // find the module. The special patterns that Root uses are ignored. Results are 429 // cached. Use GOPROXY for faster results. 430 func (r *RemoteCache) Mod(importPath string) (modPath, name string, err error) { 431 // Check if any of the known repositories is a prefix. 432 prefix := importPath 433 for { 434 v, ok, err := r.mod.get(prefix) 435 if ok { 436 if err != nil { 437 return "", "", err 438 } 439 value := v.(modValue) 440 if value.known { 441 return value.path, value.name, nil 442 } else { 443 break 444 } 445 } 446 447 prefix = path.Dir(prefix) 448 if prefix == "." || prefix == "/" { 449 break 450 } 451 } 452 453 // Ask "go list". 454 v, err := r.mod.ensure(importPath, func() (interface{}, error) { 455 modPath, err := r.ModInfo(importPath) 456 if err != nil { 457 return nil, err 458 } 459 return modValue{ 460 path: modPath, 461 name: label.ImportPathToBazelRepoName(modPath), 462 }, nil 463 }) 464 if err != nil { 465 return "", "", err 466 } 467 value := v.(modValue) 468 return value.path, value.name, nil 469 } 470 471 func defaultModInfo(rc *RemoteCache, importPath string) (modPath string, err error) { 472 rc.initTmp() 473 if rc.tmpErr != nil { 474 return "", rc.tmpErr 475 } 476 defer func() { 477 if err != nil { 478 err = fmt.Errorf("finding module path for import %s: %v", importPath, cleanCmdError(err)) 479 } 480 }() 481 482 goTool := findGoTool() 483 env := append(os.Environ(), "GO111MODULE=on") 484 485 cmd := exec.Command(goTool, "get", "-d", "--", importPath) 486 cmd.Dir = rc.tmpDir 487 cmd.Env = env 488 if _, err := cmd.Output(); err != nil { 489 return "", err 490 } 491 492 cmd = exec.Command(goTool, "list", "-find", "-f", "{{.Module.Path}}", "--", importPath) 493 cmd.Dir = rc.tmpDir 494 cmd.Env = env 495 out, err := cmd.Output() 496 if err != nil { 497 return "", fmt.Errorf("finding module path for import %s: %v", importPath, cleanCmdError(err)) 498 } 499 return strings.TrimSpace(string(out)), nil 500 } 501 502 // ModVersion looks up information about a module at a given version. 503 // The path must be the module path, not a package within the module. 504 // The version may be a canonical semantic version, a query like "latest", 505 // or a branch, tag, or revision name. ModVersion returns the name of 506 // the repository rule providing the module (if any), the true version, 507 // and the sum. 508 func (r *RemoteCache) ModVersion(modPath, query string) (name, version, sum string, err error) { 509 // Ask "go list". 510 arg := modPath + "@" + query 511 v, err := r.modVersion.ensure(arg, func() (interface{}, error) { 512 version, sum, err := r.ModVersionInfo(modPath, query) 513 if err != nil { 514 return nil, err 515 } 516 return modVersionValue{ 517 path: modPath, 518 version: version, 519 sum: sum, 520 }, nil 521 }) 522 if err != nil { 523 return "", "", "", err 524 } 525 value := v.(modVersionValue) 526 527 // Try to find the repository name for the module, if there's already 528 // a repository rule that provides it. 529 v, ok, err := r.mod.get(modPath) 530 if ok && err == nil { 531 name = v.(modValue).name 532 } else { 533 name = label.ImportPathToBazelRepoName(modPath) 534 } 535 536 return name, value.version, value.sum, nil 537 } 538 539 func defaultModVersionInfo(rc *RemoteCache, modPath, query string) (version, sum string, err error) { 540 rc.initTmp() 541 if rc.tmpErr != nil { 542 return "", "", rc.tmpErr 543 } 544 defer func() { 545 if err != nil { 546 err = fmt.Errorf("finding module version and sum for %s@%s: %v", modPath, query, cleanCmdError(err)) 547 } 548 }() 549 550 goTool := findGoTool() 551 cmd := exec.Command(goTool, "mod", "download", "-json", "--", modPath+"@"+query) 552 cmd.Dir = rc.tmpDir 553 cmd.Env = append(os.Environ(), "GO111MODULE=on") 554 out, err := cmd.Output() 555 if err != nil { 556 return "", "", err 557 } 558 559 var result struct{ Version, Sum string } 560 if err := json.Unmarshal(out, &result); err != nil { 561 return "", "", fmt.Errorf("invalid output from 'go mod download': %v", err) 562 } 563 return result.Version, result.Sum, nil 564 } 565 566 // get retrieves a value associated with the given key from the cache. ok will 567 // be true if the key exists in the cache, even if it's in the process of 568 // being fetched. 569 func (m *remoteCacheMap) get(key string) (value interface{}, ok bool, err error) { 570 m.mu.Lock() 571 e, ok := m.cache[key] 572 m.mu.Unlock() 573 if !ok { 574 return nil, ok, nil 575 } 576 if e.ready != nil { 577 <-e.ready 578 } 579 return e.value, ok, e.err 580 } 581 582 // ensure retreives a value associated with the given key from the cache. If 583 // the key does not exist in the cache, the load function will be called, 584 // and its result will be associated with the key. The load function will not 585 // be called more than once for any key. 586 func (m *remoteCacheMap) ensure(key string, load func() (interface{}, error)) (interface{}, error) { 587 m.mu.Lock() 588 e, ok := m.cache[key] 589 if !ok { 590 e = &remoteCacheEntry{ready: make(chan struct{})} 591 m.cache[key] = e 592 m.mu.Unlock() 593 e.value, e.err = load() 594 close(e.ready) 595 } else { 596 m.mu.Unlock() 597 if e.ready != nil { 598 <-e.ready 599 } 600 } 601 return e.value, e.err 602 } 603 604 func (rc *RemoteCache) initTmp() { 605 rc.tmpOnce.Do(func() { 606 rc.tmpDir, rc.tmpErr = os.MkdirTemp("", "gazelle-remotecache-") 607 if rc.tmpErr != nil { 608 return 609 } 610 rc.tmpErr = os.WriteFile(filepath.Join(rc.tmpDir, "go.mod"), []byte("module gazelle_remote_cache\ngo 1.15\n"), 0o666) 611 }) 612 } 613 614 var semverRex = regexp.MustCompile(`^.*?(/v\d+)(?:/.*)?$`) 615 616 // pathWithoutSemver removes a semantic version suffix from path. 617 // For example, if path is "example.com/foo/v2/bar", pathWithoutSemver 618 // will return "example.com/foo/bar". If there is no semantic version suffix, 619 // "" will be returned. 620 // TODO(jayconrod): copied from language/go. This whole type should be 621 // migrated there. 622 func pathWithoutSemver(path string) string { 623 m := semverRex.FindStringSubmatchIndex(path) 624 if m == nil { 625 return "" 626 } 627 v := path[m[2]+2 : m[3]] 628 if v == "0" || v == "1" { 629 return "" 630 } 631 return path[:m[2]] + path[m[3]:] 632 } 633 634 // findGoTool attempts to locate the go executable. If GOROOT is set, we'll 635 // prefer the one in there; otherwise, we'll rely on PATH. If the wrapper 636 // script generated by the gazelle rule is invoked by Bazel, it will set 637 // GOROOT to the configured SDK. We don't want to rely on the host SDK in 638 // that situation. 639 // 640 // TODO(jayconrod): copied from language/go (though it was originally in this 641 // package). Go-specific details should be removed from RemoteCache, and 642 // this copy should be deleted. 643 func findGoTool() string { 644 path := "go" // rely on PATH by default 645 if goroot, ok := os.LookupEnv("GOROOT"); ok { 646 path = filepath.Join(goroot, "bin", "go") 647 } 648 if runtime.GOOS == "windows" { 649 path += ".exe" 650 } 651 return path 652 } 653 654 // cleanCmdError simplifies error messages from os/exec.Cmd.Run. 655 // For ExitErrors, it trims and returns stderr. This is useful for go commands 656 // that print well-formatted errors. By default, ExitError prints the exit 657 // status but not stderr. 658 // 659 // cleanCmdError returns other errors unmodified. 660 func cleanCmdError(err error) error { 661 if xerr, ok := err.(*exec.ExitError); ok { 662 if stderr := strings.TrimSpace(string(xerr.Stderr)); stderr != "" { 663 return errors.New(stderr) 664 } 665 } 666 return err 667 }