github.com/wolfd/bazel-gazelle@v0.14.0/internal/repos/remote.go (about) 1 /* Copyright 2018 The Bazel Authors. All rights reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 */ 15 16 package repos 17 18 import ( 19 "bytes" 20 "fmt" 21 "os/exec" 22 "path" 23 "regexp" 24 "strings" 25 "sync" 26 27 "github.com/bazelbuild/bazel-gazelle/internal/label" 28 "github.com/bazelbuild/bazel-gazelle/internal/pathtools" 29 "golang.org/x/tools/go/vcs" 30 ) 31 32 // UpdateRepo returns an object describing a repository at the most recent 33 // commit or version tag. 34 // 35 // This function uses RemoteCache to retrieve information about the repository. 36 // Depending on how the RemoteCache was initialized and used earlier, some 37 // information may already be locally available. Frequently though, information 38 // will be fetched over the network, so this function may be slow. 39 func UpdateRepo(rc *RemoteCache, importPath string) (Repo, error) { 40 root, name, err := rc.Root(importPath) 41 if err != nil { 42 return Repo{}, err 43 } 44 remote, vcs, err := rc.Remote(root) 45 if err != nil { 46 return Repo{}, err 47 } 48 commit, tag, err := rc.Head(remote, vcs) 49 if err != nil { 50 return Repo{}, err 51 } 52 repo := Repo{ 53 Name: name, 54 GoPrefix: root, 55 Commit: commit, 56 Tag: tag, 57 Remote: remote, 58 VCS: vcs, 59 } 60 return repo, nil 61 } 62 63 // RemoteCache stores information about external repositories. The cache may 64 // be initialized with information about known repositories, i.e., those listed 65 // in the WORKSPACE file and mentioned on the command line. Other information 66 // is retrieved over the network. 67 // 68 // Public methods of RemoteCache may be slow in cases where a network fetch 69 // is needed. Public methods may be called concurrently. 70 type RemoteCache struct { 71 // RepoRootForImportPath is vcs.RepoRootForImportPath by default. It may 72 // be overridden so that tests may avoid accessing the network. 73 RepoRootForImportPath func(string, bool) (*vcs.RepoRoot, error) 74 75 // HeadCmd returns the latest commit on the default branch in the given 76 // repository. This is used by Head. It may be stubbed out for tests. 77 HeadCmd func(remote, vcs string) (string, error) 78 79 root, remote, head remoteCacheMap 80 } 81 82 // remoteCacheMap is a thread-safe, idempotent cache. It is used to store 83 // information which should be fetched over the network no more than once. 84 // This follows the Memo pattern described in The Go Programming Language, 85 // section 9.7. 86 type remoteCacheMap struct { 87 mu sync.Mutex 88 cache map[string]*remoteCacheEntry 89 } 90 91 type remoteCacheEntry struct { 92 value interface{} 93 err error 94 95 // ready is nil for entries that were added when the cache was initialized. 96 // It is non-nil for other entries. It is closed when an entry is ready, 97 // i.e., the operation loading the entry completed. 98 ready chan struct{} 99 } 100 101 type rootValue struct { 102 root, name string 103 } 104 105 type remoteValue struct { 106 remote, vcs string 107 } 108 109 type headValue struct { 110 commit, tag string 111 } 112 113 // NewRemoteCache creates a new RemoteCache with a set of known repositories. 114 // The Root and Remote methods will return information about repositories listed 115 // here without accessing the network. However, the Head method will still 116 // access the network for these repositories to retrieve information about new 117 // versions. 118 func NewRemoteCache(knownRepos []Repo) *RemoteCache { 119 r := &RemoteCache{ 120 RepoRootForImportPath: vcs.RepoRootForImportPath, 121 HeadCmd: defaultHeadCmd, 122 root: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 123 remote: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 124 head: remoteCacheMap{cache: make(map[string]*remoteCacheEntry)}, 125 } 126 for _, repo := range knownRepos { 127 r.root.cache[repo.GoPrefix] = &remoteCacheEntry{ 128 value: rootValue{ 129 root: repo.GoPrefix, 130 name: repo.Name, 131 }, 132 } 133 if repo.Remote != "" { 134 r.remote.cache[repo.GoPrefix] = &remoteCacheEntry{ 135 value: remoteValue{ 136 remote: repo.Remote, 137 vcs: repo.VCS, 138 }, 139 } 140 } 141 } 142 return r 143 } 144 145 var gopkginPattern = regexp.MustCompile("^(gopkg.in/(?:[^/]+/)?[^/]+\\.v\\d+)(?:/|$)") 146 147 var knownPrefixes = []struct { 148 prefix string 149 missing int 150 }{ 151 {prefix: "golang.org/x", missing: 1}, 152 {prefix: "google.golang.org", missing: 1}, 153 {prefix: "cloud.google.com", missing: 1}, 154 {prefix: "github.com", missing: 2}, 155 } 156 157 // Root returns the portion of an import path that corresponds to the root 158 // directory of the repository containing the given import path. For example, 159 // given "golang.org/x/tools/go/loader", this will return "golang.org/x/tools". 160 // The workspace name of the repository is also returned. This may be a custom 161 // name set in WORKSPACE, or it may be a generated name based on the root path. 162 func (r *RemoteCache) Root(importPath string) (root, name string, err error) { 163 // Try prefixes of the import path in the cache, but don't actually go out 164 // to vcs yet. We do this before handling known special cases because 165 // the cache is pre-populated with repository rules, and we want to use their 166 // names if we can. 167 prefix := importPath 168 for { 169 v, ok, err := r.root.get(prefix) 170 if ok { 171 if err != nil { 172 return "", "", err 173 } 174 value := v.(rootValue) 175 return value.root, value.name, nil 176 } 177 178 prefix = path.Dir(prefix) 179 if prefix == "." || prefix == "/" { 180 break 181 } 182 } 183 184 // Try known prefixes. 185 for _, p := range knownPrefixes { 186 if pathtools.HasPrefix(importPath, p.prefix) { 187 rest := pathtools.TrimPrefix(importPath, p.prefix) 188 var components []string 189 if rest != "" { 190 components = strings.Split(rest, "/") 191 } 192 if len(components) < p.missing { 193 return "", "", fmt.Errorf("import path %q is shorter than the known prefix %q", importPath, p.prefix) 194 } 195 root = p.prefix 196 for _, c := range components[:p.missing] { 197 root = path.Join(root, c) 198 } 199 name = label.ImportPathToBazelRepoName(root) 200 return root, name, nil 201 } 202 } 203 204 // gopkg.in is special, and might have either one or two levels of 205 // missing paths. See http://labix.org/gopkg.in for URL patterns. 206 if match := gopkginPattern.FindStringSubmatch(importPath); len(match) > 0 { 207 root = match[1] 208 name = label.ImportPathToBazelRepoName(root) 209 return root, name, nil 210 } 211 212 // Find the prefix using vcs and cache the result. 213 v, err := r.root.ensure(importPath, func() (interface{}, error) { 214 res, err := r.RepoRootForImportPath(importPath, false) 215 if err != nil { 216 return nil, err 217 } 218 return rootValue{res.Root, label.ImportPathToBazelRepoName(res.Root)}, nil 219 }) 220 if err != nil { 221 return "", "", err 222 } 223 value := v.(rootValue) 224 return value.root, value.name, nil 225 } 226 227 // Remote returns the VCS name and the remote URL for a repository with the 228 // given root import path. This is suitable for creating new repository rules. 229 func (r *RemoteCache) Remote(root string) (remote, vcs string, err error) { 230 v, err := r.remote.ensure(root, func() (interface{}, error) { 231 repo, err := r.RepoRootForImportPath(root, false) 232 if err != nil { 233 return nil, err 234 } 235 return remoteValue{remote: repo.Repo, vcs: repo.VCS.Cmd}, nil 236 }) 237 if err != nil { 238 return "", "", err 239 } 240 value := v.(remoteValue) 241 return value.remote, value.vcs, nil 242 } 243 244 // Head returns the most recent commit id on the default branch and latest 245 // version tag for the given remote repository. The tag "" is returned if 246 // no latest version was found. 247 // 248 // TODO(jayconrod): support VCS other than git. 249 // TODO(jayconrod): support version tags. "" is always returned. 250 func (r *RemoteCache) Head(remote, vcs string) (commit, tag string, err error) { 251 if vcs != "git" { 252 return "", "", fmt.Errorf("could not locate recent commit in repo %q with unknown version control scheme %q", remote, vcs) 253 } 254 255 v, err := r.head.ensure(remote, func() (interface{}, error) { 256 commit, err := r.HeadCmd(remote, vcs) 257 if err != nil { 258 return nil, err 259 } 260 return headValue{commit: commit}, nil 261 }) 262 if err != nil { 263 return "", "", err 264 } 265 value := v.(headValue) 266 return value.commit, value.tag, nil 267 } 268 269 func defaultHeadCmd(remote, vcs string) (string, error) { 270 switch vcs { 271 case "local": 272 return "", nil 273 274 case "git": 275 // Old versions of git ls-remote exit with code 129 when "--" is passed. 276 // We'll try to validate the argument here instead. 277 if strings.HasPrefix(remote, "-") { 278 return "", fmt.Errorf("remote must not start with '-': %q", remote) 279 } 280 cmd := exec.Command("git", "ls-remote", remote, "HEAD") 281 out, err := cmd.Output() 282 if err != nil { 283 return "", err 284 } 285 ix := bytes.IndexByte(out, '\t') 286 if ix < 0 { 287 return "", fmt.Errorf("could not parse output for git ls-remote for %q", remote) 288 } 289 return string(out[:ix]), nil 290 291 default: 292 return "", fmt.Errorf("unknown version control system: %s", vcs) 293 } 294 } 295 296 // get retrieves a value associated with the given key from the cache. ok will 297 // be true if the key exists in the cache, even if it's in the process of 298 // being fetched. 299 func (m *remoteCacheMap) get(key string) (value interface{}, ok bool, err error) { 300 m.mu.Lock() 301 e, ok := m.cache[key] 302 m.mu.Unlock() 303 if !ok { 304 return nil, ok, nil 305 } 306 if e.ready != nil { 307 <-e.ready 308 } 309 return e.value, ok, e.err 310 } 311 312 // ensure retreives a value associated with the given key from the cache. If 313 // the key does not exist in the cache, the load function will be called, 314 // and its result will be associated with the key. The load function will not 315 // be called more than once for any key. 316 func (m *remoteCacheMap) ensure(key string, load func() (interface{}, error)) (interface{}, error) { 317 m.mu.Lock() 318 e, ok := m.cache[key] 319 if !ok { 320 e = &remoteCacheEntry{ready: make(chan struct{})} 321 m.cache[key] = e 322 m.mu.Unlock() 323 e.value, e.err = load() 324 close(e.ready) 325 } else { 326 m.mu.Unlock() 327 if e.ready != nil { 328 <-e.ready 329 } 330 } 331 return e.value, e.err 332 }