github.com/wolfd/bazel-gazelle@v0.14.0/internal/repos/remote.go (about)

     1  /* Copyright 2018 The Bazel Authors. All rights reserved.
     2  
     3  Licensed under the Apache License, Version 2.0 (the "License");
     4  you may not use this file except in compliance with the License.
     5  You may obtain a copy of the License at
     6  
     7     http://www.apache.org/licenses/LICENSE-2.0
     8  
     9  Unless required by applicable law or agreed to in writing, software
    10  distributed under the License is distributed on an "AS IS" BASIS,
    11  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  See the License for the specific language governing permissions and
    13  limitations under the License.
    14  */
    15  
    16  package repos
    17  
    18  import (
    19  	"bytes"
    20  	"fmt"
    21  	"os/exec"
    22  	"path"
    23  	"regexp"
    24  	"strings"
    25  	"sync"
    26  
    27  	"github.com/bazelbuild/bazel-gazelle/internal/label"
    28  	"github.com/bazelbuild/bazel-gazelle/internal/pathtools"
    29  	"golang.org/x/tools/go/vcs"
    30  )
    31  
    32  // UpdateRepo returns an object describing a repository at the most recent
    33  // commit or version tag.
    34  //
    35  // This function uses RemoteCache to retrieve information about the repository.
    36  // Depending on how the RemoteCache was initialized and used earlier, some
    37  // information may already be locally available. Frequently though, information
    38  // will be fetched over the network, so this function may be slow.
    39  func UpdateRepo(rc *RemoteCache, importPath string) (Repo, error) {
    40  	root, name, err := rc.Root(importPath)
    41  	if err != nil {
    42  		return Repo{}, err
    43  	}
    44  	remote, vcs, err := rc.Remote(root)
    45  	if err != nil {
    46  		return Repo{}, err
    47  	}
    48  	commit, tag, err := rc.Head(remote, vcs)
    49  	if err != nil {
    50  		return Repo{}, err
    51  	}
    52  	repo := Repo{
    53  		Name:     name,
    54  		GoPrefix: root,
    55  		Commit:   commit,
    56  		Tag:      tag,
    57  		Remote:   remote,
    58  		VCS:      vcs,
    59  	}
    60  	return repo, nil
    61  }
    62  
    63  // RemoteCache stores information about external repositories. The cache may
    64  // be initialized with information about known repositories, i.e., those listed
    65  // in the WORKSPACE file and mentioned on the command line. Other information
    66  // is retrieved over the network.
    67  //
    68  // Public methods of RemoteCache may be slow in cases where a network fetch
    69  // is needed. Public methods may be called concurrently.
    70  type RemoteCache struct {
    71  	// RepoRootForImportPath is vcs.RepoRootForImportPath by default. It may
    72  	// be overridden so that tests may avoid accessing the network.
    73  	RepoRootForImportPath func(string, bool) (*vcs.RepoRoot, error)
    74  
    75  	// HeadCmd returns the latest commit on the default branch in the given
    76  	// repository. This is used by Head. It may be stubbed out for tests.
    77  	HeadCmd func(remote, vcs string) (string, error)
    78  
    79  	root, remote, head remoteCacheMap
    80  }
    81  
    82  // remoteCacheMap is a thread-safe, idempotent cache. It is used to store
    83  // information which should be fetched over the network no more than once.
    84  // This follows the Memo pattern described in The Go Programming Language,
    85  // section 9.7.
    86  type remoteCacheMap struct {
    87  	mu    sync.Mutex
    88  	cache map[string]*remoteCacheEntry
    89  }
    90  
    91  type remoteCacheEntry struct {
    92  	value interface{}
    93  	err   error
    94  
    95  	// ready is nil for entries that were added when the cache was initialized.
    96  	// It is non-nil for other entries. It is closed when an entry is ready,
    97  	// i.e., the operation loading the entry completed.
    98  	ready chan struct{}
    99  }
   100  
   101  type rootValue struct {
   102  	root, name string
   103  }
   104  
   105  type remoteValue struct {
   106  	remote, vcs string
   107  }
   108  
   109  type headValue struct {
   110  	commit, tag string
   111  }
   112  
   113  // NewRemoteCache creates a new RemoteCache with a set of known repositories.
   114  // The Root and Remote methods will return information about repositories listed
   115  // here without accessing the network. However, the Head method will still
   116  // access the network for these repositories to retrieve information about new
   117  // versions.
   118  func NewRemoteCache(knownRepos []Repo) *RemoteCache {
   119  	r := &RemoteCache{
   120  		RepoRootForImportPath: vcs.RepoRootForImportPath,
   121  		HeadCmd:               defaultHeadCmd,
   122  		root:                  remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
   123  		remote:                remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
   124  		head:                  remoteCacheMap{cache: make(map[string]*remoteCacheEntry)},
   125  	}
   126  	for _, repo := range knownRepos {
   127  		r.root.cache[repo.GoPrefix] = &remoteCacheEntry{
   128  			value: rootValue{
   129  				root: repo.GoPrefix,
   130  				name: repo.Name,
   131  			},
   132  		}
   133  		if repo.Remote != "" {
   134  			r.remote.cache[repo.GoPrefix] = &remoteCacheEntry{
   135  				value: remoteValue{
   136  					remote: repo.Remote,
   137  					vcs:    repo.VCS,
   138  				},
   139  			}
   140  		}
   141  	}
   142  	return r
   143  }
   144  
   145  var gopkginPattern = regexp.MustCompile("^(gopkg.in/(?:[^/]+/)?[^/]+\\.v\\d+)(?:/|$)")
   146  
   147  var knownPrefixes = []struct {
   148  	prefix  string
   149  	missing int
   150  }{
   151  	{prefix: "golang.org/x", missing: 1},
   152  	{prefix: "google.golang.org", missing: 1},
   153  	{prefix: "cloud.google.com", missing: 1},
   154  	{prefix: "github.com", missing: 2},
   155  }
   156  
   157  // Root returns the portion of an import path that corresponds to the root
   158  // directory of the repository containing the given import path. For example,
   159  // given "golang.org/x/tools/go/loader", this will return "golang.org/x/tools".
   160  // The workspace name of the repository is also returned. This may be a custom
   161  // name set in WORKSPACE, or it may be a generated name based on the root path.
   162  func (r *RemoteCache) Root(importPath string) (root, name string, err error) {
   163  	// Try prefixes of the import path in the cache, but don't actually go out
   164  	// to vcs yet. We do this before handling known special cases because
   165  	// the cache is pre-populated with repository rules, and we want to use their
   166  	// names if we can.
   167  	prefix := importPath
   168  	for {
   169  		v, ok, err := r.root.get(prefix)
   170  		if ok {
   171  			if err != nil {
   172  				return "", "", err
   173  			}
   174  			value := v.(rootValue)
   175  			return value.root, value.name, nil
   176  		}
   177  
   178  		prefix = path.Dir(prefix)
   179  		if prefix == "." || prefix == "/" {
   180  			break
   181  		}
   182  	}
   183  
   184  	// Try known prefixes.
   185  	for _, p := range knownPrefixes {
   186  		if pathtools.HasPrefix(importPath, p.prefix) {
   187  			rest := pathtools.TrimPrefix(importPath, p.prefix)
   188  			var components []string
   189  			if rest != "" {
   190  				components = strings.Split(rest, "/")
   191  			}
   192  			if len(components) < p.missing {
   193  				return "", "", fmt.Errorf("import path %q is shorter than the known prefix %q", importPath, p.prefix)
   194  			}
   195  			root = p.prefix
   196  			for _, c := range components[:p.missing] {
   197  				root = path.Join(root, c)
   198  			}
   199  			name = label.ImportPathToBazelRepoName(root)
   200  			return root, name, nil
   201  		}
   202  	}
   203  
   204  	// gopkg.in is special, and might have either one or two levels of
   205  	// missing paths. See http://labix.org/gopkg.in for URL patterns.
   206  	if match := gopkginPattern.FindStringSubmatch(importPath); len(match) > 0 {
   207  		root = match[1]
   208  		name = label.ImportPathToBazelRepoName(root)
   209  		return root, name, nil
   210  	}
   211  
   212  	// Find the prefix using vcs and cache the result.
   213  	v, err := r.root.ensure(importPath, func() (interface{}, error) {
   214  		res, err := r.RepoRootForImportPath(importPath, false)
   215  		if err != nil {
   216  			return nil, err
   217  		}
   218  		return rootValue{res.Root, label.ImportPathToBazelRepoName(res.Root)}, nil
   219  	})
   220  	if err != nil {
   221  		return "", "", err
   222  	}
   223  	value := v.(rootValue)
   224  	return value.root, value.name, nil
   225  }
   226  
   227  // Remote returns the VCS name and the remote URL for a repository with the
   228  // given root import path. This is suitable for creating new repository rules.
   229  func (r *RemoteCache) Remote(root string) (remote, vcs string, err error) {
   230  	v, err := r.remote.ensure(root, func() (interface{}, error) {
   231  		repo, err := r.RepoRootForImportPath(root, false)
   232  		if err != nil {
   233  			return nil, err
   234  		}
   235  		return remoteValue{remote: repo.Repo, vcs: repo.VCS.Cmd}, nil
   236  	})
   237  	if err != nil {
   238  		return "", "", err
   239  	}
   240  	value := v.(remoteValue)
   241  	return value.remote, value.vcs, nil
   242  }
   243  
   244  // Head returns the most recent commit id on the default branch and latest
   245  // version tag for the given remote repository. The tag "" is returned if
   246  // no latest version was found.
   247  //
   248  // TODO(jayconrod): support VCS other than git.
   249  // TODO(jayconrod): support version tags. "" is always returned.
   250  func (r *RemoteCache) Head(remote, vcs string) (commit, tag string, err error) {
   251  	if vcs != "git" {
   252  		return "", "", fmt.Errorf("could not locate recent commit in repo %q with unknown version control scheme %q", remote, vcs)
   253  	}
   254  
   255  	v, err := r.head.ensure(remote, func() (interface{}, error) {
   256  		commit, err := r.HeadCmd(remote, vcs)
   257  		if err != nil {
   258  			return nil, err
   259  		}
   260  		return headValue{commit: commit}, nil
   261  	})
   262  	if err != nil {
   263  		return "", "", err
   264  	}
   265  	value := v.(headValue)
   266  	return value.commit, value.tag, nil
   267  }
   268  
   269  func defaultHeadCmd(remote, vcs string) (string, error) {
   270  	switch vcs {
   271  	case "local":
   272  		return "", nil
   273  
   274  	case "git":
   275  		// Old versions of git ls-remote exit with code 129 when "--" is passed.
   276  		// We'll try to validate the argument here instead.
   277  		if strings.HasPrefix(remote, "-") {
   278  			return "", fmt.Errorf("remote must not start with '-': %q", remote)
   279  		}
   280  		cmd := exec.Command("git", "ls-remote", remote, "HEAD")
   281  		out, err := cmd.Output()
   282  		if err != nil {
   283  			return "", err
   284  		}
   285  		ix := bytes.IndexByte(out, '\t')
   286  		if ix < 0 {
   287  			return "", fmt.Errorf("could not parse output for git ls-remote for %q", remote)
   288  		}
   289  		return string(out[:ix]), nil
   290  
   291  	default:
   292  		return "", fmt.Errorf("unknown version control system: %s", vcs)
   293  	}
   294  }
   295  
   296  // get retrieves a value associated with the given key from the cache. ok will
   297  // be true if the key exists in the cache, even if it's in the process of
   298  // being fetched.
   299  func (m *remoteCacheMap) get(key string) (value interface{}, ok bool, err error) {
   300  	m.mu.Lock()
   301  	e, ok := m.cache[key]
   302  	m.mu.Unlock()
   303  	if !ok {
   304  		return nil, ok, nil
   305  	}
   306  	if e.ready != nil {
   307  		<-e.ready
   308  	}
   309  	return e.value, ok, e.err
   310  }
   311  
   312  // ensure retreives a value associated with the given key from the cache. If
   313  // the key does not exist in the cache, the load function will be called,
   314  // and its result will be associated with the key. The load function will not
   315  // be called more than once for any key.
   316  func (m *remoteCacheMap) ensure(key string, load func() (interface{}, error)) (interface{}, error) {
   317  	m.mu.Lock()
   318  	e, ok := m.cache[key]
   319  	if !ok {
   320  		e = &remoteCacheEntry{ready: make(chan struct{})}
   321  		m.cache[key] = e
   322  		m.mu.Unlock()
   323  		e.value, e.err = load()
   324  		close(e.ready)
   325  	} else {
   326  		m.mu.Unlock()
   327  		if e.ready != nil {
   328  			<-e.ready
   329  		}
   330  	}
   331  	return e.value, e.err
   332  }