cuelang.org/go@v0.10.1/internal/mod/modresolve/resolve.go (about)

     1  // Copyright 2024 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package modresolve
    16  
    17  import (
    18  	"crypto/sha256"
    19  	_ "embed"
    20  	"fmt"
    21  	"net"
    22  	"net/netip"
    23  	"path"
    24  	"sort"
    25  	"strings"
    26  	"sync"
    27  
    28  	"cuelabs.dev/go/oci/ociregistry/ociref"
    29  
    30  	"cuelang.org/go/cue"
    31  	"cuelang.org/go/cue/cuecontext"
    32  	"cuelang.org/go/cue/errors"
    33  	"cuelang.org/go/cue/token"
    34  	"cuelang.org/go/mod/module"
    35  )
    36  
    37  // pathEncoding represents one of the possible types of
    38  // encoding for module paths within a registry.
    39  // It reflects the #registry.pathEncoding disjunction
    40  // in schema.cue.
    41  // TODO it would be nice if this could be auto-generated
    42  // from the schema.
    43  type pathEncoding string
    44  
    45  const (
    46  	encPath       pathEncoding = "path"
    47  	encHashAsRepo pathEncoding = "hashAsRepo"
    48  	encHashAsTag  pathEncoding = "hashAsTag"
    49  )
    50  
    51  // LocationResolver resolves module paths to a location
    52  // consisting of a host name of a registry and where
    53  // in that registry the module is to be found.
    54  //
    55  // Note: The implementation in this package operates entirely lexically,
    56  // which is why [Location] contains only a host name and not an actual
    57  // [ociregistry.Interface] implementation.
    58  type LocationResolver interface {
    59  	// ResolveToLocation resolves a base module path (without a version
    60  	// suffix, a.k.a. OCI repository name) and optional version to
    61  	// the location for that path. It reports whether it can find
    62  	// appropriate location for the module.
    63  	//
    64  	// If the version is empty, the Tag in the returned Location
    65  	// will hold the prefix that all versions of the module in its
    66  	// repository have. That prefix will be followed by the version
    67  	// itself.
    68  	ResolveToLocation(path string, vers string) (Location, bool)
    69  
    70  	// AllHosts returns all the registry hosts that the resolver
    71  	// might resolve to, ordered lexically by hostname.
    72  	AllHosts() []Host
    73  }
    74  
    75  // Host represents a registry host name.
    76  type Host struct {
    77  	// Name holds the IP host name of the registry.
    78  	// If it's an IP v6 address, it will be surrounded with
    79  	// square brackets ([, ]).
    80  	Name string
    81  	// Insecure holds whether this host should be connected
    82  	// to insecurely (with an HTTP rather than HTTP connection).
    83  	Insecure bool
    84  }
    85  
    86  // Location represents the location for a given module version or versions.
    87  type Location struct {
    88  	// Host holds the host or host:port of the registry.
    89  	Host string
    90  
    91  	// Insecure holds whether an insecure connection
    92  	// should be used when connecting to the registry.
    93  	Insecure bool
    94  
    95  	// Repository holds the repository to store the module in.
    96  	Repository string
    97  
    98  	// Tag holds the tag for the module version.
    99  	// If an empty version was passed to
   100  	// Resolve, it holds the prefix shared by all version
   101  	// tags for the module.
   102  	Tag string
   103  }
   104  
   105  // config mirrors the #File definition in schema.cue.
   106  // TODO it would be nice to be able to generate this
   107  // type directly from the schema.
   108  type config struct {
   109  	ModuleRegistries map[string]*registryConfig `json:"moduleRegistries,omitempty"`
   110  	DefaultRegistry  *registryConfig            `json:"defaultRegistry,omitempty"`
   111  }
   112  
   113  func (cfg *config) init() error {
   114  	for prefix, reg := range cfg.ModuleRegistries {
   115  		if err := module.CheckPathWithoutVersion(prefix); err != nil {
   116  			return fmt.Errorf("invalid module path %q: %v", prefix, err)
   117  		}
   118  		if err := reg.init(); err != nil {
   119  			return fmt.Errorf("invalid registry configuration in %q: %v", prefix, err)
   120  		}
   121  	}
   122  	if cfg.DefaultRegistry != nil {
   123  		if err := cfg.DefaultRegistry.init(); err != nil {
   124  			return fmt.Errorf("invalid default registry configuration: %v", err)
   125  		}
   126  	}
   127  	return nil
   128  }
   129  
   130  type registryConfig struct {
   131  	Registry      string       `json:"registry,omitempty"`
   132  	PathEncoding  pathEncoding `json:"pathEncoding,omitempty"`
   133  	PrefixForTags string       `json:"prefixForTags,omitempty"`
   134  	StripPrefix   bool         `json:"stripPrefix,omitempty"`
   135  
   136  	// The following fields are filled in from Registry after parsing.
   137  	none       bool
   138  	host       string
   139  	repository string
   140  	insecure   bool
   141  }
   142  
   143  func (r *registryConfig) init() error {
   144  	r1, err := parseRegistry(r.Registry)
   145  	if err != nil {
   146  		return err
   147  	}
   148  	r.none, r.host, r.repository, r.insecure = r1.none, r1.host, r1.repository, r1.insecure
   149  
   150  	if r.PrefixForTags != "" {
   151  		if !ociref.IsValidTag(r.PrefixForTags) {
   152  			return fmt.Errorf("invalid tag prefix %q", r.PrefixForTags)
   153  		}
   154  	}
   155  	if r.PathEncoding == "" {
   156  		// Shouldn't happen because default should apply.
   157  		return fmt.Errorf("empty pathEncoding")
   158  	}
   159  	if r.StripPrefix {
   160  		if r.PathEncoding != encPath {
   161  			// TODO we could relax this to allow storing of naked tags
   162  			// when the module path matches exactly and hash tags
   163  			// otherwise.
   164  			return fmt.Errorf("cannot strip prefix unless using path encoding")
   165  		}
   166  		if r.repository == "" {
   167  			return fmt.Errorf("use of stripPrefix requires a non-empty repository within the registry")
   168  		}
   169  	}
   170  	return nil
   171  }
   172  
   173  var (
   174  	configSchemaOnce sync.Once // guards the creation of _configSchema
   175  	// TODO remove this mutex when https://cuelang.org/issue/2733 is fixed.
   176  	configSchemaMutex sync.Mutex // guards any use of _configSchema
   177  	_configSchema     cue.Value
   178  )
   179  
   180  //go:embed schema.cue
   181  var configSchemaData []byte
   182  
   183  // RegistryConfigSchema returns the CUE schema
   184  // for the configuration parsed by [ParseConfig].
   185  func RegistryConfigSchema() string {
   186  	// Cut out the copyright header and the header that's
   187  	// not pure schema.
   188  	schema := string(configSchemaData)
   189  	i := strings.Index(schema, "\n// #file ")
   190  	if i == -1 {
   191  		panic("no file definition found in schema")
   192  	}
   193  	i++
   194  	return schema[i:]
   195  }
   196  
   197  // ParseConfig parses the registry configuration with the given contents and file name.
   198  // If there is no default registry, then the single registry specified in catchAllDefault
   199  // will be used as a default.
   200  func ParseConfig(configFile []byte, filename string, catchAllDefault string) (LocationResolver, error) {
   201  	configSchemaOnce.Do(func() {
   202  		ctx := cuecontext.New()
   203  		schemav := ctx.CompileBytes(configSchemaData, cue.Filename("cuelang.org/go/internal/mod/modresolve/schema.cue"))
   204  		schemav = schemav.LookupPath(cue.MakePath(cue.Def("#file")))
   205  		if err := schemav.Validate(); err != nil {
   206  			panic(fmt.Errorf("internal error: invalid CUE registry config schema: %v", errors.Details(err, nil)))
   207  		}
   208  		_configSchema = schemav
   209  	})
   210  	configSchemaMutex.Lock()
   211  	defer configSchemaMutex.Unlock()
   212  
   213  	v := _configSchema.Context().CompileBytes(configFile, cue.Filename(filename))
   214  	if err := v.Err(); err != nil {
   215  		return nil, errors.Wrapf(err, token.NoPos, "invalid registry configuration file")
   216  	}
   217  	v = v.Unify(_configSchema)
   218  	if err := v.Err(); err != nil {
   219  		return nil, errors.Wrapf(err, token.NoPos, "invalid configuration file")
   220  	}
   221  	var cfg config
   222  	if err := v.Decode(&cfg); err != nil {
   223  		return nil, errors.Wrapf(err, token.NoPos, "internal error: cannot decode into registry config struct")
   224  	}
   225  	if err := cfg.init(); err != nil {
   226  		return nil, err
   227  	}
   228  	if cfg.DefaultRegistry == nil {
   229  		if catchAllDefault == "" {
   230  			return nil, fmt.Errorf("no default catch-all registry provided")
   231  		}
   232  		// TODO is it too limiting to have the catch-all registry specified as a simple string?
   233  		reg, err := parseRegistry(catchAllDefault)
   234  		if err != nil {
   235  			return nil, fmt.Errorf("invalid catch-all registry %q: %v", catchAllDefault, err)
   236  		}
   237  		cfg.DefaultRegistry = reg
   238  	}
   239  	r := &resolver{
   240  		cfg: cfg,
   241  	}
   242  	if err := r.initHosts(); err != nil {
   243  		return nil, err
   244  	}
   245  	return r, nil
   246  }
   247  
   248  // ParseCUERegistry parses a registry routing specification that
   249  // maps module prefixes to the registry that should be used to
   250  // fetch that module.
   251  //
   252  // The specification consists of an order-independent, comma-separated list.
   253  //
   254  // Each element either maps a module prefix to the registry that will be used
   255  // for all modules that have that prefix (prefix=registry), or a catch-all registry to be used
   256  // for modules that do not match any prefix (registry).
   257  //
   258  // For example:
   259  //
   260  //	myorg.com=myregistry.com/m,catchallregistry.example.org
   261  //
   262  // Any module with a matching prefix will be routed to the given registry.
   263  // A prefix only matches whole path elements.
   264  // In the above example, module myorg.com/foo/bar@v0 will be looked up
   265  // in myregistry.com in the repository m/myorg.com/foo/bar,
   266  // whereas github.com/x/y will be looked up in catchallregistry.example.com.
   267  //
   268  // The registry part is syntactically similar to a [docker reference]
   269  // except that the repository is optional and no tag or digest is allowed.
   270  // Additionally, a +secure or +insecure suffix may be used to indicate
   271  // whether to use a secure or insecure connection. Without that,
   272  // localhost, 127.0.0.1 and [::1] will default to insecure, and anything
   273  // else to secure.
   274  //
   275  // If s does not declare a catch-all registry location, catchAllDefault is
   276  // used. It is an error if s fails to declares a catch-all registry location
   277  // and no catchAllDefault is provided.
   278  //
   279  // [docker reference]: https://pkg.go.dev/github.com/distribution/reference
   280  func ParseCUERegistry(s string, catchAllDefault string) (LocationResolver, error) {
   281  	if s == "" && catchAllDefault == "" {
   282  		return nil, fmt.Errorf("no catch-all registry or default")
   283  	}
   284  	if s == "" {
   285  		s = catchAllDefault
   286  	}
   287  	cfg := config{
   288  		ModuleRegistries: make(map[string]*registryConfig),
   289  	}
   290  	parts := strings.Split(s, ",")
   291  	for _, part := range parts {
   292  		key, val, ok := strings.Cut(part, "=")
   293  		if !ok {
   294  			if part == "" {
   295  				// TODO or just ignore it?
   296  				return nil, fmt.Errorf("empty registry part")
   297  			}
   298  			if _, ok := cfg.ModuleRegistries[""]; ok {
   299  				return nil, fmt.Errorf("duplicate catch-all registry")
   300  			}
   301  			key, val = "", part
   302  		} else {
   303  			if key == "" {
   304  				return nil, fmt.Errorf("empty module prefix")
   305  			}
   306  			if val == "" {
   307  				return nil, fmt.Errorf("empty registry reference")
   308  			}
   309  			if err := module.CheckPathWithoutVersion(key); err != nil {
   310  				return nil, fmt.Errorf("invalid module path %q: %v", key, err)
   311  			}
   312  			if _, ok := cfg.ModuleRegistries[key]; ok {
   313  				return nil, fmt.Errorf("duplicate module prefix %q", key)
   314  			}
   315  		}
   316  		reg, err := parseRegistry(val)
   317  		if err != nil {
   318  			return nil, fmt.Errorf("invalid registry %q: %v", val, err)
   319  		}
   320  		cfg.ModuleRegistries[key] = reg
   321  	}
   322  	if _, ok := cfg.ModuleRegistries[""]; !ok {
   323  		if catchAllDefault == "" {
   324  			return nil, fmt.Errorf("no default catch-all registry provided")
   325  		}
   326  		reg, err := parseRegistry(catchAllDefault)
   327  		if err != nil {
   328  			return nil, fmt.Errorf("invalid catch-all registry %q: %v", catchAllDefault, err)
   329  		}
   330  		cfg.ModuleRegistries[""] = reg
   331  	}
   332  	cfg.DefaultRegistry = cfg.ModuleRegistries[""]
   333  	delete(cfg.ModuleRegistries, "")
   334  
   335  	r := &resolver{
   336  		cfg: cfg,
   337  	}
   338  	if err := r.initHosts(); err != nil {
   339  		return nil, err
   340  	}
   341  	return r, nil
   342  }
   343  
   344  type resolver struct {
   345  	allHosts []Host
   346  	cfg      config
   347  }
   348  
   349  func (r *resolver) initHosts() error {
   350  	hosts := make(map[string]bool)
   351  	addHost := func(reg *registryConfig) error {
   352  		if reg.none {
   353  			return nil
   354  		}
   355  		if insecure, ok := hosts[reg.host]; ok {
   356  			if insecure != reg.insecure {
   357  				return fmt.Errorf("registry host %q is specified both as secure and insecure", reg.host)
   358  			}
   359  		} else {
   360  			hosts[reg.host] = reg.insecure
   361  		}
   362  		return nil
   363  	}
   364  	for _, reg := range r.cfg.ModuleRegistries {
   365  		if err := addHost(reg); err != nil {
   366  			return err
   367  		}
   368  	}
   369  
   370  	if reg := r.cfg.DefaultRegistry; reg != nil {
   371  		if err := addHost(reg); err != nil {
   372  			return err
   373  		}
   374  	}
   375  	allHosts := make([]Host, 0, len(hosts))
   376  	for host, insecure := range hosts {
   377  		allHosts = append(allHosts, Host{
   378  			Name:     host,
   379  			Insecure: insecure,
   380  		})
   381  	}
   382  	sort.Slice(allHosts, func(i, j int) bool {
   383  		return allHosts[i].Name < allHosts[j].Name
   384  	})
   385  	r.allHosts = allHosts
   386  	return nil
   387  }
   388  
   389  // AllHosts implements Resolver.AllHosts.
   390  func (r *resolver) AllHosts() []Host {
   391  	return r.allHosts
   392  }
   393  
   394  func (r *resolver) ResolveToLocation(mpath, vers string) (Location, bool) {
   395  	if mpath == "" {
   396  		return Location{}, false
   397  	}
   398  	bestMatch := ""
   399  	// Note: there's always a wildcard match.
   400  	bestMatchReg := r.cfg.DefaultRegistry
   401  	for pat, reg := range r.cfg.ModuleRegistries {
   402  		if pat == mpath {
   403  			bestMatch = pat
   404  			bestMatchReg = reg
   405  			break
   406  		}
   407  		if !strings.HasPrefix(mpath, pat) {
   408  			continue
   409  		}
   410  		if len(bestMatch) > len(pat) {
   411  			// We've already found a more specific match.
   412  			continue
   413  		}
   414  		if mpath[len(pat)] != '/' {
   415  			// The path doesn't have a separator at the end of
   416  			// the prefix, which means that it doesn't match.
   417  			// For example, foo.com/bar does not match foo.com/ba.
   418  			continue
   419  		}
   420  		// It's a possible match but not necessarily the longest one.
   421  		bestMatch, bestMatchReg = pat, reg
   422  	}
   423  	reg := bestMatchReg
   424  	if reg == nil || reg.none {
   425  		return Location{}, false
   426  	}
   427  	loc := Location{
   428  		Host:     reg.host,
   429  		Insecure: reg.insecure,
   430  		Tag:      vers,
   431  	}
   432  	switch reg.PathEncoding {
   433  	case encPath:
   434  		if reg.StripPrefix {
   435  			mpath = strings.TrimPrefix(mpath, bestMatch)
   436  			mpath = strings.TrimPrefix(mpath, "/")
   437  		}
   438  		loc.Repository = path.Join(reg.repository, mpath)
   439  	case encHashAsRepo:
   440  		loc.Repository = fmt.Sprintf("%s/%x", reg.repository, sha256.Sum256([]byte(mpath)))
   441  	case encHashAsTag:
   442  		loc.Repository = reg.repository
   443  	default:
   444  		panic("unreachable")
   445  	}
   446  	if reg.PathEncoding == encHashAsTag {
   447  		loc.Tag = fmt.Sprintf("%s%x-%s", reg.PrefixForTags, sha256.Sum256([]byte(mpath)), vers)
   448  	} else {
   449  		loc.Tag = reg.PrefixForTags + vers
   450  	}
   451  	return loc, true
   452  }
   453  
   454  func parseRegistry(env0 string) (*registryConfig, error) {
   455  	if env0 == "none" {
   456  		return &registryConfig{
   457  			Registry: env0,
   458  			none:     true,
   459  		}, nil
   460  	}
   461  	env := env0
   462  	var suffix string
   463  	if i := strings.LastIndex(env, "+"); i > 0 {
   464  		suffix = env[i:]
   465  		env = env[:i]
   466  	}
   467  	var r ociref.Reference
   468  	if !strings.Contains(env, "/") {
   469  		// OCI references don't allow a host name on its own without a repo,
   470  		// but we do.
   471  		r.Host = env
   472  		if !ociref.IsValidHost(r.Host) {
   473  			return nil, fmt.Errorf("invalid host name %q in registry", r.Host)
   474  		}
   475  	} else {
   476  		var err error
   477  		r, err = ociref.Parse(env)
   478  		if err != nil {
   479  			return nil, err
   480  		}
   481  		if r.Tag != "" || r.Digest != "" {
   482  			return nil, fmt.Errorf("cannot have an associated tag or digest")
   483  		}
   484  	}
   485  	if suffix == "" {
   486  		if isInsecureHost(r.Host) {
   487  			suffix = "+insecure"
   488  		} else {
   489  			suffix = "+secure"
   490  		}
   491  	}
   492  	insecure := false
   493  	switch suffix {
   494  	case "+insecure":
   495  		insecure = true
   496  	case "+secure":
   497  	default:
   498  		return nil, fmt.Errorf("unknown suffix (%q), need +insecure, +secure or no suffix)", suffix)
   499  	}
   500  	return &registryConfig{
   501  		Registry:     env0,
   502  		PathEncoding: encPath,
   503  		host:         r.Host,
   504  		repository:   r.Repository,
   505  		insecure:     insecure,
   506  	}, nil
   507  }
   508  
   509  var (
   510  	ipV4Localhost = netip.MustParseAddr("127.0.0.1")
   511  	ipV6Localhost = netip.MustParseAddr("::1")
   512  )
   513  
   514  func isInsecureHost(hostPort string) bool {
   515  	host, _, err := net.SplitHostPort(hostPort)
   516  	if err != nil {
   517  		host = hostPort
   518  		if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
   519  			host = host[1 : len(host)-1]
   520  		}
   521  	}
   522  	if host == "localhost" {
   523  		return true
   524  	}
   525  	addr, err := netip.ParseAddr(host)
   526  	if err != nil {
   527  		return false
   528  	}
   529  	// TODO other clients have logic for RFC1918 too, amongst other
   530  	// things. Maybe we should do that too.
   531  	return addr == ipV4Localhost || addr == ipV6Localhost
   532  }