github.com/cycloidio/terraform@v1.1.10-0.20220513142504-76d5c768dc63/addrs/module_source.go (about)

     1  package addrs
     2  
     3  import (
     4  	"fmt"
     5  	"path"
     6  	"regexp"
     7  	"strings"
     8  
     9  	svchost "github.com/hashicorp/terraform-svchost"
    10  	"github.com/cycloidio/terraform/getmodules"
    11  )
    12  
    13  // ModuleSource is the general type for all three of the possible module source
    14  // address types. The concrete implementations of this are ModuleSourceLocal,
    15  // ModuleSourceRegistry, and ModuleSourceRemote.
    16  type ModuleSource interface {
    17  	// String returns a full representation of the address, including any
    18  	// additional components that are typically implied by omission in
    19  	// user-written addresses.
    20  	//
    21  	// We typically use this longer representation in error message, in case
    22  	// the inclusion of normally-omitted components is helpful in debugging
    23  	// unexpected behavior.
    24  	String() string
    25  
    26  	// ForDisplay is similar to String but instead returns a representation of
    27  	// the idiomatic way to write the address in configuration, omitting
    28  	// components that are commonly just implied in addresses written by
    29  	// users.
    30  	//
    31  	// We typically use this shorter representation in informational messages,
    32  	// such as the note that we're about to start downloading a package.
    33  	ForDisplay() string
    34  
    35  	moduleSource()
    36  }
    37  
    38  var _ ModuleSource = ModuleSourceLocal("")
    39  var _ ModuleSource = ModuleSourceRegistry{}
    40  var _ ModuleSource = ModuleSourceRemote{}
    41  
    42  var moduleSourceLocalPrefixes = []string{
    43  	"./",
    44  	"../",
    45  	".\\",
    46  	"..\\",
    47  }
    48  
    49  func ParseModuleSource(raw string) (ModuleSource, error) {
    50  	for _, prefix := range moduleSourceLocalPrefixes {
    51  		if strings.HasPrefix(raw, prefix) {
    52  			localAddr, err := parseModuleSourceLocal(raw)
    53  			if err != nil {
    54  				// This is to make sure we really return a nil ModuleSource in
    55  				// this case, rather than an interface containing the zero
    56  				// value of ModuleSourceLocal.
    57  				return nil, err
    58  			}
    59  			return localAddr, nil
    60  		}
    61  	}
    62  
    63  	// For historical reasons, whether an address is a registry
    64  	// address is defined only by whether it can be successfully
    65  	// parsed as one, and anything else must fall through to be
    66  	// parsed as a direct remote source, where go-getter might
    67  	// then recognize it as a filesystem path. This is odd
    68  	// but matches behavior we've had since Terraform v0.10 which
    69  	// existing modules may be relying on.
    70  	// (Notice that this means that there's never any path where
    71  	// the registry source parse error gets returned to the caller,
    72  	// which is annoying but has been true for many releases
    73  	// without it posing a serious problem in practice.)
    74  	if ret, err := parseModuleSourceRegistry(raw); err == nil {
    75  		return ret, nil
    76  	}
    77  
    78  	// If we get down here then we treat everything else as a
    79  	// remote address. In practice there's very little that
    80  	// go-getter doesn't consider invalid input, so even invalid
    81  	// nonsense will probably interpreted as _something_ here
    82  	// and then fail during installation instead. We can't
    83  	// really improve this situation for historical reasons.
    84  	remoteAddr, err := parseModuleSourceRemote(raw)
    85  	if err != nil {
    86  		// This is to make sure we really return a nil ModuleSource in
    87  		// this case, rather than an interface containing the zero
    88  		// value of ModuleSourceRemote.
    89  		return nil, err
    90  	}
    91  	return remoteAddr, nil
    92  }
    93  
    94  // ModuleSourceLocal is a ModuleSource representing a local path reference
    95  // from the caller's directory to the callee's directory within the same
    96  // module package.
    97  //
    98  // A "module package" here means a set of modules distributed together in
    99  // the same archive, repository, or similar. That's a significant distinction
   100  // because we always download and cache entire module packages at once,
   101  // and then create relative references within the same directory in order
   102  // to ensure all modules in the package are looking at a consistent filesystem
   103  // layout. We also assume that modules within a package are maintained together,
   104  // which means that cross-cutting maintenence across all of them would be
   105  // possible.
   106  //
   107  // The actual value of a ModuleSourceLocal is a normalized relative path using
   108  // forward slashes, even on operating systems that have other conventions,
   109  // because we're representing traversal within the logical filesystem
   110  // represented by the containing package, not actually within the physical
   111  // filesystem we unpacked the package into. We should typically not construct
   112  // ModuleSourceLocal values directly, except in tests where we can ensure
   113  // the value meets our assumptions. Use ParseModuleSource instead if the
   114  // input string is not hard-coded in the program.
   115  type ModuleSourceLocal string
   116  
   117  func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) {
   118  	// As long as we have a suitable prefix (detected by ParseModuleSource)
   119  	// there is no failure case for local paths: we just use the "path"
   120  	// package's cleaning logic to remove any redundant "./" and "../"
   121  	// sequences and any duplicate slashes and accept whatever that
   122  	// produces.
   123  
   124  	// Although using backslashes (Windows-style) is non-idiomatic, we do
   125  	// allow it and just normalize it away, so the rest of Terraform will
   126  	// only see the forward-slash form.
   127  	if strings.Contains(raw, `\`) {
   128  		// Note: We use string replacement rather than filepath.ToSlash
   129  		// here because the filepath package behavior varies by current
   130  		// platform, but we want to interpret configured paths the same
   131  		// across all platforms: these are virtual paths within a module
   132  		// package, not physical filesystem paths.
   133  		raw = strings.ReplaceAll(raw, `\`, "/")
   134  	}
   135  
   136  	// Note that we could've historically blocked using "//" in a path here
   137  	// in order to avoid confusion with the subdir syntax in remote addresses,
   138  	// but we historically just treated that as the same as a single slash
   139  	// and so we continue to do that now for compatibility. Clean strips those
   140  	// out and reduces them to just a single slash.
   141  	clean := path.Clean(raw)
   142  
   143  	// However, we do need to keep a single "./" on the front if it isn't
   144  	// a "../" path, or else it would be ambigous with the registry address
   145  	// syntax.
   146  	if !strings.HasPrefix(clean, "../") {
   147  		clean = "./" + clean
   148  	}
   149  
   150  	return ModuleSourceLocal(clean), nil
   151  }
   152  
   153  func (s ModuleSourceLocal) moduleSource() {}
   154  
   155  func (s ModuleSourceLocal) String() string {
   156  	// We assume that our underlying string was already normalized at
   157  	// construction, so we just return it verbatim.
   158  	return string(s)
   159  }
   160  
   161  func (s ModuleSourceLocal) ForDisplay() string {
   162  	return string(s)
   163  }
   164  
   165  // ModuleSourceRegistry is a ModuleSource representing a module listed in a
   166  // Terraform module registry.
   167  //
   168  // A registry source isn't a direct source location but rather an indirection
   169  // over a ModuleSourceRemote. The job of a registry is to translate the
   170  // combination of a ModuleSourceRegistry and a module version number into
   171  // a concrete ModuleSourceRemote that Terraform will then download and
   172  // install.
   173  type ModuleSourceRegistry struct {
   174  	// PackageAddr is the registry package that the target module belongs to.
   175  	// The module installer must translate this into a ModuleSourceRemote
   176  	// using the registry API and then take that underlying address's
   177  	// PackageAddr in order to find the actual package location.
   178  	PackageAddr ModuleRegistryPackage
   179  
   180  	// If Subdir is non-empty then it represents a sub-directory within the
   181  	// remote package that the registry address eventually resolves to.
   182  	// This will ultimately become the suffix of the Subdir of the
   183  	// ModuleSourceRemote that the registry address translates to.
   184  	//
   185  	// Subdir uses a normalized forward-slash-based path syntax within the
   186  	// virtual filesystem represented by the final package. It will never
   187  	// include `../` or `./` sequences.
   188  	Subdir string
   189  }
   190  
   191  // DefaultModuleRegistryHost is the hostname used for registry-based module
   192  // source addresses that do not have an explicit hostname.
   193  const DefaultModuleRegistryHost = svchost.Hostname("registry.terraform.io")
   194  
   195  var moduleRegistryNamePattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$")
   196  var moduleRegistryTargetSystemPattern = regexp.MustCompile("^[0-9a-z]{1,64}$")
   197  
   198  func parseModuleSourceRegistry(raw string) (ModuleSourceRegistry, error) {
   199  	var err error
   200  
   201  	var subDir string
   202  	raw, subDir = getmodules.SplitPackageSubdir(raw)
   203  	if strings.HasPrefix(subDir, "../") {
   204  		return ModuleSourceRegistry{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir)
   205  	}
   206  
   207  	parts := strings.Split(raw, "/")
   208  	// A valid registry address has either three or four parts, because the
   209  	// leading hostname part is optional.
   210  	if len(parts) != 3 && len(parts) != 4 {
   211  		return ModuleSourceRegistry{}, fmt.Errorf("a module registry source address must have either three or four slash-separated components")
   212  	}
   213  
   214  	host := DefaultModuleRegistryHost
   215  	if len(parts) == 4 {
   216  		host, err = svchost.ForComparison(parts[0])
   217  		if err != nil {
   218  			// The svchost library doesn't produce very good error messages to
   219  			// return to an end-user, so we'll use some custom ones here.
   220  			switch {
   221  			case strings.Contains(parts[0], "--"):
   222  				// Looks like possibly punycode, which we don't allow here
   223  				// to ensure that source addresses are written readably.
   224  				return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q; internationalized domain names must be given as direct unicode characters, not in punycode", parts[0])
   225  			default:
   226  				return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q", parts[0])
   227  			}
   228  		}
   229  		if !strings.Contains(host.String(), ".") {
   230  			return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname: must contain at least one dot")
   231  		}
   232  		// Discard the hostname prefix now that we've processed it
   233  		parts = parts[1:]
   234  	}
   235  
   236  	ret := ModuleSourceRegistry{
   237  		PackageAddr: ModuleRegistryPackage{
   238  			Host: host,
   239  		},
   240  
   241  		Subdir: subDir,
   242  	}
   243  
   244  	if host == svchost.Hostname("github.com") || host == svchost.Hostname("bitbucket.org") {
   245  		return ret, fmt.Errorf("can't use %q as a module registry host, because it's reserved for installing directly from version control repositories", host)
   246  	}
   247  
   248  	if ret.PackageAddr.Namespace, err = parseModuleRegistryName(parts[0]); err != nil {
   249  		if strings.Contains(parts[0], ".") {
   250  			// Seems like the user omitted one of the latter components in
   251  			// an address with an explicit hostname.
   252  			return ret, fmt.Errorf("source address must have three more components after the hostname: the namespace, the name, and the target system")
   253  		}
   254  		return ret, fmt.Errorf("invalid namespace %q: %s", parts[0], err)
   255  	}
   256  	if ret.PackageAddr.Name, err = parseModuleRegistryName(parts[1]); err != nil {
   257  		return ret, fmt.Errorf("invalid module name %q: %s", parts[1], err)
   258  	}
   259  	if ret.PackageAddr.TargetSystem, err = parseModuleRegistryTargetSystem(parts[2]); err != nil {
   260  		if strings.Contains(parts[2], "?") {
   261  			// The user was trying to include a query string, probably?
   262  			return ret, fmt.Errorf("module registry addresses may not include a query string portion")
   263  		}
   264  		return ret, fmt.Errorf("invalid target system %q: %s", parts[2], err)
   265  	}
   266  
   267  	return ret, nil
   268  }
   269  
   270  // parseModuleRegistryName validates and normalizes a string in either the
   271  // "namespace" or "name" position of a module registry source address.
   272  func parseModuleRegistryName(given string) (string, error) {
   273  	// Similar to the names in provider source addresses, we defined these
   274  	// to be compatible with what filesystems and typical remote systems
   275  	// like GitHub allow in names. Unfortunately we didn't end up defining
   276  	// these exactly equivalently: provider names can only use dashes as
   277  	// punctuation, whereas module names can use underscores. So here we're
   278  	// using some regular expressions from the original module source
   279  	// implementation, rather than using the IDNA rules as we do in
   280  	// ParseProviderPart.
   281  
   282  	if !moduleRegistryNamePattern.MatchString(given) {
   283  		return "", fmt.Errorf("must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix")
   284  	}
   285  
   286  	// We also skip normalizing the name to lowercase, because we historically
   287  	// didn't do that and so existing module registries might be doing
   288  	// case-sensitive matching.
   289  	return given, nil
   290  }
   291  
   292  // parseModuleRegistryTargetSystem validates and normalizes a string in the
   293  // "target system" position of a module registry source address. This is
   294  // what we historically called "provider" but never actually enforced as
   295  // being a provider address, and now _cannot_ be a provider address because
   296  // provider addresses have three slash-separated components of their own.
   297  func parseModuleRegistryTargetSystem(given string) (string, error) {
   298  	// Similar to the names in provider source addresses, we defined these
   299  	// to be compatible with what filesystems and typical remote systems
   300  	// like GitHub allow in names. Unfortunately we didn't end up defining
   301  	// these exactly equivalently: provider names can only use dashes as
   302  	// punctuation, whereas module names can use underscores. So here we're
   303  	// using some regular expressions from the original module source
   304  	// implementation, rather than using the IDNA rules as we do in
   305  	// ParseProviderPart.
   306  
   307  	if !moduleRegistryTargetSystemPattern.MatchString(given) {
   308  		return "", fmt.Errorf("must be between one and 64 ASCII letters or digits")
   309  	}
   310  
   311  	// We also skip normalizing the name to lowercase, because we historically
   312  	// didn't do that and so existing module registries might be doing
   313  	// case-sensitive matching.
   314  	return given, nil
   315  }
   316  
   317  func (s ModuleSourceRegistry) moduleSource() {}
   318  
   319  func (s ModuleSourceRegistry) String() string {
   320  	if s.Subdir != "" {
   321  		return s.PackageAddr.String() + "//" + s.Subdir
   322  	}
   323  	return s.PackageAddr.String()
   324  }
   325  
   326  func (s ModuleSourceRegistry) ForDisplay() string {
   327  	if s.Subdir != "" {
   328  		return s.PackageAddr.ForDisplay() + "//" + s.Subdir
   329  	}
   330  	return s.PackageAddr.ForDisplay()
   331  }
   332  
   333  // ModuleSourceRemote is a ModuleSource representing a remote location from
   334  // which we can retrieve a module package.
   335  //
   336  // A ModuleSourceRemote can optionally include a "subdirectory" path, which
   337  // means that it's selecting a sub-directory of the given package to use as
   338  // the entry point into the package.
   339  type ModuleSourceRemote struct {
   340  	// PackageAddr is the address of the remote package that the requested
   341  	// module belongs to.
   342  	PackageAddr ModulePackage
   343  
   344  	// If Subdir is non-empty then it represents a sub-directory within the
   345  	// remote package which will serve as the entry-point for the package.
   346  	//
   347  	// Subdir uses a normalized forward-slash-based path syntax within the
   348  	// virtual filesystem represented by the final package. It will never
   349  	// include `../` or `./` sequences.
   350  	Subdir string
   351  }
   352  
   353  func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) {
   354  	var subDir string
   355  	raw, subDir = getmodules.SplitPackageSubdir(raw)
   356  	if strings.HasPrefix(subDir, "../") {
   357  		return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir)
   358  	}
   359  
   360  	// A remote source address is really just a go-getter address resulting
   361  	// from go-getter's "detect" phase, which adds on the prefix specifying
   362  	// which protocol it should use and possibly also adjusts the
   363  	// protocol-specific part into different syntax.
   364  	//
   365  	// Note that for historical reasons this can potentially do network
   366  	// requests in order to disambiguate certain address types, although
   367  	// that's a legacy thing that is only for some specific, less-commonly-used
   368  	// address types. Most just do local string manipulation. We should
   369  	// aim to remove the network requests over time, if possible.
   370  	norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw)
   371  	if err != nil {
   372  		// We must pass through the returned error directly here because
   373  		// the getmodules package has some special error types it uses
   374  		// for certain cases where the UI layer might want to include a
   375  		// more helpful error message.
   376  		return ModuleSourceRemote{}, err
   377  	}
   378  
   379  	if moreSubDir != "" {
   380  		switch {
   381  		case subDir != "":
   382  			// The detector's own subdir goes first, because the
   383  			// subdir we were given is conceptually relative to
   384  			// the subdirectory that we just detected.
   385  			subDir = path.Join(moreSubDir, subDir)
   386  		default:
   387  			subDir = path.Clean(moreSubDir)
   388  		}
   389  		if strings.HasPrefix(subDir, "../") {
   390  			// This would suggest a bug in a go-getter detector, but
   391  			// we'll catch it anyway to avoid doing something confusing
   392  			// downstream.
   393  			return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm)
   394  		}
   395  	}
   396  
   397  	return ModuleSourceRemote{
   398  		PackageAddr: ModulePackage(norm),
   399  		Subdir:      subDir,
   400  	}, nil
   401  }
   402  
   403  func (s ModuleSourceRemote) moduleSource() {}
   404  
   405  func (s ModuleSourceRemote) String() string {
   406  	if s.Subdir != "" {
   407  		return s.PackageAddr.String() + "//" + s.Subdir
   408  	}
   409  	return s.PackageAddr.String()
   410  }
   411  
   412  func (s ModuleSourceRemote) ForDisplay() string {
   413  	// The two string representations are identical for this address type.
   414  	// This isn't really entirely true to the idea of "ForDisplay" since
   415  	// it'll often include some additional components added in by the
   416  	// go-getter detectors, but we don't have any function to turn a
   417  	// "detected" string back into an idiomatic shorthand the user might've
   418  	// entered.
   419  	return s.String()
   420  }
   421  
   422  // FromRegistry can be called on a remote source address that was returned
   423  // from a module registry, passing in the original registry source address
   424  // that the registry was asked about, in order to get the effective final
   425  // remote source address.
   426  //
   427  // Specifically, this method handles the situations where one or both of
   428  // the two addresses contain subdirectory paths, combining both when necessary
   429  // in order to ensure that both the registry's given path and the user's
   430  // given path are both respected.
   431  //
   432  // This will return nonsense if given a registry address other than the one
   433  // that generated the reciever via a registry lookup.
   434  func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote {
   435  	ret := s // not a pointer, so this is a shallow copy
   436  
   437  	switch {
   438  	case s.Subdir != "" && given.Subdir != "":
   439  		ret.Subdir = path.Join(s.Subdir, given.Subdir)
   440  	case given.Subdir != "":
   441  		ret.Subdir = given.Subdir
   442  	}
   443  
   444  	return ret
   445  }