cuelang.org/go@v0.13.0/internal/mod/modresolve/resolve.go (about)

     1  // Copyright 2024 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package modresolve
    16  
    17  import (
    18  	"cmp"
    19  	"crypto/sha256"
    20  	_ "embed"
    21  	"fmt"
    22  	"net"
    23  	"net/netip"
    24  	"path"
    25  	"slices"
    26  	"strings"
    27  	"sync"
    28  
    29  	"cuelabs.dev/go/oci/ociregistry/ociref"
    30  
    31  	"cuelang.org/go/cue"
    32  	"cuelang.org/go/cue/cuecontext"
    33  	"cuelang.org/go/cue/errors"
    34  	"cuelang.org/go/cue/token"
    35  	"cuelang.org/go/mod/module"
    36  )
    37  
    38  // pathEncoding represents one of the possible types of
    39  // encoding for module paths within a registry.
    40  // It reflects the #registry.pathEncoding disjunction
    41  // in schema.cue.
    42  // TODO it would be nice if this could be auto-generated
    43  // from the schema.
    44  type pathEncoding string
    45  
    46  const (
    47  	encPath       pathEncoding = "path"
    48  	encHashAsRepo pathEncoding = "hashAsRepo"
    49  	encHashAsTag  pathEncoding = "hashAsTag"
    50  )
    51  
    52  // LocationResolver resolves module paths to a location
    53  // consisting of a host name of a registry and where
    54  // in that registry the module is to be found.
    55  //
    56  // Note: The implementation in this package operates entirely lexically,
    57  // which is why [Location] contains only a host name and not an actual
    58  // [ociregistry.Interface] implementation.
    59  type LocationResolver interface {
    60  	// ResolveToLocation resolves a base module path (without a version
    61  	// suffix, a.k.a. OCI repository name) and optional version to
    62  	// the location for that path. It reports whether it can find
    63  	// appropriate location for the module.
    64  	//
    65  	// If the version is empty, the Tag in the returned Location
    66  	// will hold the prefix that all versions of the module in its
    67  	// repository have. That prefix will be followed by the version
    68  	// itself.
    69  	ResolveToLocation(path string, vers string) (Location, bool)
    70  
    71  	// AllHosts returns all the registry hosts that the resolver
    72  	// might resolve to, ordered lexically by hostname.
    73  	AllHosts() []Host
    74  }
    75  
    76  // Host represents a registry host name.
    77  type Host struct {
    78  	// Name holds the IP host name of the registry.
    79  	// If it's an IP v6 address, it will be surrounded with
    80  	// square brackets ([, ]).
    81  	Name string
    82  	// Insecure holds whether this host should be connected
    83  	// to insecurely (with an HTTP rather than HTTP connection).
    84  	Insecure bool
    85  }
    86  
    87  // Location represents the location for a given module version or versions.
    88  type Location struct {
    89  	// Host holds the host or host:port of the registry.
    90  	Host string
    91  
    92  	// Insecure holds whether an insecure connection
    93  	// should be used when connecting to the registry.
    94  	Insecure bool
    95  
    96  	// Repository holds the repository to store the module in.
    97  	Repository string
    98  
    99  	// Tag holds the tag for the module version.
   100  	// If an empty version was passed to
   101  	// Resolve, it holds the prefix shared by all version
   102  	// tags for the module.
   103  	Tag string
   104  }
   105  
   106  // config mirrors the #File definition in schema.cue.
   107  // TODO it would be nice to be able to generate this
   108  // type directly from the schema.
   109  type config struct {
   110  	ModuleRegistries map[string]*registryConfig `json:"moduleRegistries,omitempty"`
   111  	DefaultRegistry  *registryConfig            `json:"defaultRegistry,omitempty"`
   112  }
   113  
   114  func (cfg *config) init() error {
   115  	for prefix, reg := range cfg.ModuleRegistries {
   116  		if err := module.CheckPathWithoutVersion(prefix); err != nil {
   117  			return fmt.Errorf("invalid module path %q: %v", prefix, err)
   118  		}
   119  		if err := reg.init(); err != nil {
   120  			return fmt.Errorf("invalid registry configuration in %q: %v", prefix, err)
   121  		}
   122  	}
   123  	if cfg.DefaultRegistry != nil {
   124  		if err := cfg.DefaultRegistry.init(); err != nil {
   125  			return fmt.Errorf("invalid default registry configuration: %v", err)
   126  		}
   127  	}
   128  	return nil
   129  }
   130  
   131  type registryConfig struct {
   132  	Registry      string       `json:"registry,omitempty"`
   133  	PathEncoding  pathEncoding `json:"pathEncoding,omitempty"`
   134  	PrefixForTags string       `json:"prefixForTags,omitempty"`
   135  	StripPrefix   bool         `json:"stripPrefix,omitempty"`
   136  
   137  	// The following fields are filled in from Registry after parsing.
   138  	none       bool
   139  	host       string
   140  	repository string
   141  	insecure   bool
   142  }
   143  
   144  func (r *registryConfig) init() error {
   145  	r1, err := parseRegistry(r.Registry)
   146  	if err != nil {
   147  		return err
   148  	}
   149  	r.none, r.host, r.repository, r.insecure = r1.none, r1.host, r1.repository, r1.insecure
   150  
   151  	if r.PrefixForTags != "" {
   152  		if !ociref.IsValidTag(r.PrefixForTags) {
   153  			return fmt.Errorf("invalid tag prefix %q", r.PrefixForTags)
   154  		}
   155  	}
   156  	if r.PathEncoding == "" {
   157  		// Shouldn't happen because default should apply.
   158  		return fmt.Errorf("empty pathEncoding")
   159  	}
   160  	if r.StripPrefix {
   161  		if r.PathEncoding != encPath {
   162  			// TODO we could relax this to allow storing of naked tags
   163  			// when the module path matches exactly and hash tags
   164  			// otherwise.
   165  			return fmt.Errorf("cannot strip prefix unless using path encoding")
   166  		}
   167  		if r.repository == "" {
   168  			return fmt.Errorf("use of stripPrefix requires a non-empty repository within the registry")
   169  		}
   170  	}
   171  	return nil
   172  }
   173  
   174  var (
   175  	configSchemaOnce sync.Once // guards the creation of _configSchema
   176  	// TODO remove this mutex when https://cuelang.org/issue/2733 is fixed.
   177  	configSchemaMutex sync.Mutex // guards any use of _configSchema
   178  	_configSchema     cue.Value
   179  )
   180  
   181  //go:embed schema.cue
   182  var configSchemaData []byte
   183  
   184  // RegistryConfigSchema returns the CUE schema
   185  // for the configuration parsed by [ParseConfig].
   186  func RegistryConfigSchema() string {
   187  	// Cut out the copyright header and the header that's
   188  	// not pure schema.
   189  	schema := string(configSchemaData)
   190  	i := strings.Index(schema, "\n// #file ")
   191  	if i == -1 {
   192  		panic("no file definition found in schema")
   193  	}
   194  	i++
   195  	return schema[i:]
   196  }
   197  
   198  // ParseConfig parses the registry configuration with the given contents and file name.
   199  // If there is no default registry, then the single registry specified in catchAllDefault
   200  // will be used as a default.
   201  func ParseConfig(configFile []byte, filename string, catchAllDefault string) (LocationResolver, error) {
   202  	configSchemaOnce.Do(func() {
   203  		ctx := cuecontext.New()
   204  		schemav := ctx.CompileBytes(configSchemaData, cue.Filename("cuelang.org/go/internal/mod/modresolve/schema.cue"))
   205  		schemav = schemav.LookupPath(cue.MakePath(cue.Def("#file")))
   206  		if err := schemav.Validate(); err != nil {
   207  			panic(fmt.Errorf("internal error: invalid CUE registry config schema: %v", errors.Details(err, nil)))
   208  		}
   209  		_configSchema = schemav
   210  	})
   211  	configSchemaMutex.Lock()
   212  	defer configSchemaMutex.Unlock()
   213  
   214  	v := _configSchema.Context().CompileBytes(configFile, cue.Filename(filename))
   215  	if err := v.Err(); err != nil {
   216  		return nil, errors.Wrapf(err, token.NoPos, "invalid registry configuration file")
   217  	}
   218  	v = v.Unify(_configSchema)
   219  	if err := v.Err(); err != nil {
   220  		return nil, errors.Wrapf(err, token.NoPos, "invalid configuration file")
   221  	}
   222  	var cfg config
   223  	if err := v.Decode(&cfg); err != nil {
   224  		return nil, errors.Wrapf(err, token.NoPos, "internal error: cannot decode into registry config struct")
   225  	}
   226  	if err := cfg.init(); err != nil {
   227  		return nil, err
   228  	}
   229  	if cfg.DefaultRegistry == nil {
   230  		if catchAllDefault == "" {
   231  			return nil, fmt.Errorf("no default catch-all registry provided")
   232  		}
   233  		// TODO is it too limiting to have the catch-all registry specified as a simple string?
   234  		reg, err := parseRegistry(catchAllDefault)
   235  		if err != nil {
   236  			return nil, fmt.Errorf("invalid catch-all registry %q: %v", catchAllDefault, err)
   237  		}
   238  		cfg.DefaultRegistry = reg
   239  	}
   240  	r := &resolver{
   241  		cfg: cfg,
   242  	}
   243  	if err := r.initHosts(); err != nil {
   244  		return nil, err
   245  	}
   246  	return r, nil
   247  }
   248  
   249  // ParseCUERegistry parses a registry routing specification that
   250  // maps module prefixes to the registry that should be used to
   251  // fetch that module.
   252  //
   253  // The specification consists of an order-independent, comma-separated list.
   254  //
   255  // Each element either maps a module prefix to the registry that will be used
   256  // for all modules that have that prefix (prefix=registry), or a catch-all registry to be used
   257  // for modules that do not match any prefix (registry).
   258  //
   259  // For example:
   260  //
   261  //	myorg.com=myregistry.com/m,catchallregistry.example.org
   262  //
   263  // Any module with a matching prefix will be routed to the given registry.
   264  // A prefix only matches whole path elements.
   265  // In the above example, module myorg.com/foo/bar@v0 will be looked up
   266  // in myregistry.com in the repository m/myorg.com/foo/bar,
   267  // whereas github.com/x/y will be looked up in catchallregistry.example.com.
   268  //
   269  // The registry part is syntactically similar to a [docker reference]
   270  // except that the repository is optional and no tag or digest is allowed.
   271  // Additionally, a +secure or +insecure suffix may be used to indicate
   272  // whether to use a secure or insecure connection. Without that,
   273  // localhost, 127.0.0.1 and [::1] will default to insecure, and anything
   274  // else to secure.
   275  //
   276  // If s does not declare a catch-all registry location, catchAllDefault is
   277  // used. It is an error if s fails to declares a catch-all registry location
   278  // and no catchAllDefault is provided.
   279  //
   280  // [docker reference]: https://pkg.go.dev/github.com/distribution/reference
   281  func ParseCUERegistry(s string, catchAllDefault string) (LocationResolver, error) {
   282  	if s == "" && catchAllDefault == "" {
   283  		return nil, fmt.Errorf("no catch-all registry or default")
   284  	}
   285  	if s == "" {
   286  		s = catchAllDefault
   287  	}
   288  	cfg := config{
   289  		ModuleRegistries: make(map[string]*registryConfig),
   290  	}
   291  	parts := strings.Split(s, ",")
   292  	for _, part := range parts {
   293  		key, val, ok := strings.Cut(part, "=")
   294  		if !ok {
   295  			if part == "" {
   296  				// TODO or just ignore it?
   297  				return nil, fmt.Errorf("empty registry part")
   298  			}
   299  			if _, ok := cfg.ModuleRegistries[""]; ok {
   300  				return nil, fmt.Errorf("duplicate catch-all registry")
   301  			}
   302  			key, val = "", part
   303  		} else {
   304  			if key == "" {
   305  				return nil, fmt.Errorf("empty module prefix")
   306  			}
   307  			if val == "" {
   308  				return nil, fmt.Errorf("empty registry reference")
   309  			}
   310  			if err := module.CheckPathWithoutVersion(key); err != nil {
   311  				return nil, fmt.Errorf("invalid module path %q: %v", key, err)
   312  			}
   313  			if _, ok := cfg.ModuleRegistries[key]; ok {
   314  				return nil, fmt.Errorf("duplicate module prefix %q", key)
   315  			}
   316  		}
   317  		reg, err := parseRegistry(val)
   318  		if err != nil {
   319  			return nil, fmt.Errorf("invalid registry %q: %v", val, err)
   320  		}
   321  		cfg.ModuleRegistries[key] = reg
   322  	}
   323  	if _, ok := cfg.ModuleRegistries[""]; !ok {
   324  		if catchAllDefault == "" {
   325  			return nil, fmt.Errorf("no default catch-all registry provided")
   326  		}
   327  		reg, err := parseRegistry(catchAllDefault)
   328  		if err != nil {
   329  			return nil, fmt.Errorf("invalid catch-all registry %q: %v", catchAllDefault, err)
   330  		}
   331  		cfg.ModuleRegistries[""] = reg
   332  	}
   333  	cfg.DefaultRegistry = cfg.ModuleRegistries[""]
   334  	delete(cfg.ModuleRegistries, "")
   335  
   336  	r := &resolver{
   337  		cfg: cfg,
   338  	}
   339  	if err := r.initHosts(); err != nil {
   340  		return nil, err
   341  	}
   342  	return r, nil
   343  }
   344  
   345  type resolver struct {
   346  	allHosts []Host
   347  	cfg      config
   348  }
   349  
   350  func (r *resolver) initHosts() error {
   351  	hosts := make(map[string]bool)
   352  	addHost := func(reg *registryConfig) error {
   353  		if reg.none {
   354  			return nil
   355  		}
   356  		if insecure, ok := hosts[reg.host]; ok {
   357  			if insecure != reg.insecure {
   358  				return fmt.Errorf("registry host %q is specified both as secure and insecure", reg.host)
   359  			}
   360  		} else {
   361  			hosts[reg.host] = reg.insecure
   362  		}
   363  		return nil
   364  	}
   365  	for _, reg := range r.cfg.ModuleRegistries {
   366  		if err := addHost(reg); err != nil {
   367  			return err
   368  		}
   369  	}
   370  
   371  	if reg := r.cfg.DefaultRegistry; reg != nil {
   372  		if err := addHost(reg); err != nil {
   373  			return err
   374  		}
   375  	}
   376  	allHosts := make([]Host, 0, len(hosts))
   377  	for host, insecure := range hosts {
   378  		allHosts = append(allHosts, Host{
   379  			Name:     host,
   380  			Insecure: insecure,
   381  		})
   382  	}
   383  	slices.SortFunc(allHosts, func(a, b Host) int {
   384  		return cmp.Compare(a.Name, b.Name)
   385  	})
   386  	r.allHosts = allHosts
   387  	return nil
   388  }
   389  
   390  // AllHosts implements Resolver.AllHosts.
   391  func (r *resolver) AllHosts() []Host {
   392  	return r.allHosts
   393  }
   394  
   395  func (r *resolver) ResolveToLocation(mpath, vers string) (Location, bool) {
   396  	if mpath == "" {
   397  		return Location{}, false
   398  	}
   399  	bestMatch := ""
   400  	// Note: there's always a wildcard match.
   401  	bestMatchReg := r.cfg.DefaultRegistry
   402  	for pat, reg := range r.cfg.ModuleRegistries {
   403  		if pat == mpath {
   404  			bestMatch = pat
   405  			bestMatchReg = reg
   406  			break
   407  		}
   408  		if !strings.HasPrefix(mpath, pat) {
   409  			continue
   410  		}
   411  		if len(bestMatch) > len(pat) {
   412  			// We've already found a more specific match.
   413  			continue
   414  		}
   415  		if mpath[len(pat)] != '/' {
   416  			// The path doesn't have a separator at the end of
   417  			// the prefix, which means that it doesn't match.
   418  			// For example, foo.com/bar does not match foo.com/ba.
   419  			continue
   420  		}
   421  		// It's a possible match but not necessarily the longest one.
   422  		bestMatch, bestMatchReg = pat, reg
   423  	}
   424  	reg := bestMatchReg
   425  	if reg == nil || reg.none {
   426  		return Location{}, false
   427  	}
   428  	loc := Location{
   429  		Host:     reg.host,
   430  		Insecure: reg.insecure,
   431  		Tag:      vers,
   432  	}
   433  	switch reg.PathEncoding {
   434  	case encPath:
   435  		if reg.StripPrefix {
   436  			mpath = strings.TrimPrefix(mpath, bestMatch)
   437  			mpath = strings.TrimPrefix(mpath, "/")
   438  		}
   439  		loc.Repository = path.Join(reg.repository, mpath)
   440  	case encHashAsRepo:
   441  		loc.Repository = fmt.Sprintf("%s/%x", reg.repository, sha256.Sum256([]byte(mpath)))
   442  	case encHashAsTag:
   443  		loc.Repository = reg.repository
   444  	default:
   445  		panic("unreachable")
   446  	}
   447  	if reg.PathEncoding == encHashAsTag {
   448  		loc.Tag = fmt.Sprintf("%s%x-%s", reg.PrefixForTags, sha256.Sum256([]byte(mpath)), vers)
   449  	} else {
   450  		loc.Tag = reg.PrefixForTags + vers
   451  	}
   452  	return loc, true
   453  }
   454  
   455  func parseRegistry(env0 string) (*registryConfig, error) {
   456  	if env0 == "none" {
   457  		return &registryConfig{
   458  			Registry: env0,
   459  			none:     true,
   460  		}, nil
   461  	}
   462  	env := env0
   463  	var suffix string
   464  	if i := strings.LastIndex(env, "+"); i > 0 {
   465  		suffix = env[i:]
   466  		env = env[:i]
   467  	}
   468  	var r ociref.Reference
   469  	if !strings.Contains(env, "/") {
   470  		// OCI references don't allow a host name on its own without a repo,
   471  		// but we do.
   472  		r.Host = env
   473  		if !ociref.IsValidHost(r.Host) {
   474  			return nil, fmt.Errorf("invalid host name %q in registry", r.Host)
   475  		}
   476  	} else {
   477  		var err error
   478  		r, err = ociref.Parse(env)
   479  		if err != nil {
   480  			return nil, err
   481  		}
   482  		if r.Tag != "" || r.Digest != "" {
   483  			return nil, fmt.Errorf("cannot have an associated tag or digest")
   484  		}
   485  	}
   486  	if suffix == "" {
   487  		if isInsecureHost(r.Host) {
   488  			suffix = "+insecure"
   489  		} else {
   490  			suffix = "+secure"
   491  		}
   492  	}
   493  	insecure := false
   494  	switch suffix {
   495  	case "+insecure":
   496  		insecure = true
   497  	case "+secure":
   498  	default:
   499  		return nil, fmt.Errorf("unknown suffix (%q), need +insecure, +secure or no suffix)", suffix)
   500  	}
   501  	return &registryConfig{
   502  		Registry:     env0,
   503  		PathEncoding: encPath,
   504  		host:         r.Host,
   505  		repository:   r.Repository,
   506  		insecure:     insecure,
   507  	}, nil
   508  }
   509  
   510  var (
   511  	ipV4Localhost = netip.MustParseAddr("127.0.0.1")
   512  	ipV6Localhost = netip.MustParseAddr("::1")
   513  )
   514  
   515  func isInsecureHost(hostPort string) bool {
   516  	host, _, err := net.SplitHostPort(hostPort)
   517  	if err != nil {
   518  		host = hostPort
   519  		if strings.HasPrefix(host, "[") && strings.HasSuffix(host, "]") {
   520  			host = host[1 : len(host)-1]
   521  		}
   522  	}
   523  	if host == "localhost" {
   524  		return true
   525  	}
   526  	addr, err := netip.ParseAddr(host)
   527  	if err != nil {
   528  		return false
   529  	}
   530  	// TODO other clients have logic for RFC1918 too, amongst other
   531  	// things. Maybe we should do that too.
   532  	return addr == ipV4Localhost || addr == ipV6Localhost
   533  }