github.com/terramate-io/tf@v0.0.0-20230830114523-fce866b4dfcd/addrs/module_source.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package addrs
     5  
     6  import (
     7  	"fmt"
     8  	"path"
     9  	"strings"
    10  
    11  	tfaddr "github.com/hashicorp/terraform-registry-address"
    12  	"github.com/terramate-io/tf/getmodules"
    13  )
    14  
    15  // ModuleSource is the general type for all three of the possible module source
    16  // address types. The concrete implementations of this are ModuleSourceLocal,
    17  // ModuleSourceRegistry, and ModuleSourceRemote.
    18  type ModuleSource interface {
    19  	// String returns a full representation of the address, including any
    20  	// additional components that are typically implied by omission in
    21  	// user-written addresses.
    22  	//
    23  	// We typically use this longer representation in error message, in case
    24  	// the inclusion of normally-omitted components is helpful in debugging
    25  	// unexpected behavior.
    26  	String() string
    27  
    28  	// ForDisplay is similar to String but instead returns a representation of
    29  	// the idiomatic way to write the address in configuration, omitting
    30  	// components that are commonly just implied in addresses written by
    31  	// users.
    32  	//
    33  	// We typically use this shorter representation in informational messages,
    34  	// such as the note that we're about to start downloading a package.
    35  	ForDisplay() string
    36  
    37  	moduleSource()
    38  }
    39  
    40  var _ ModuleSource = ModuleSourceLocal("")
    41  var _ ModuleSource = ModuleSourceRegistry{}
    42  var _ ModuleSource = ModuleSourceRemote{}
    43  
    44  var moduleSourceLocalPrefixes = []string{
    45  	"./",
    46  	"../",
    47  	".\\",
    48  	"..\\",
    49  }
    50  
    51  // ParseModuleSource parses a module source address as given in the "source"
    52  // argument inside a "module" block in the configuration.
    53  //
    54  // For historical reasons this syntax is a bit overloaded, supporting three
    55  // different address types:
    56  //   - Local paths starting with either ./ or ../, which are special because
    57  //     Terraform considers them to belong to the same "package" as the caller.
    58  //   - Module registry addresses, given as either NAMESPACE/NAME/SYSTEM or
    59  //     HOST/NAMESPACE/NAME/SYSTEM, in which case the remote registry serves
    60  //     as an indirection over the third address type that follows.
    61  //   - Various URL-like and other heuristically-recognized strings which
    62  //     we currently delegate to the external library go-getter.
    63  //
    64  // There is some ambiguity between the module registry addresses and go-getter's
    65  // very liberal heuristics and so this particular function will typically treat
    66  // an invalid registry address as some other sort of remote source address
    67  // rather than returning an error. If you know that you're expecting a
    68  // registry address in particular, use ParseModuleSourceRegistry instead, which
    69  // can therefore expose more detailed error messages about registry address
    70  // parsing in particular.
    71  func ParseModuleSource(raw string) (ModuleSource, error) {
    72  	if isModuleSourceLocal(raw) {
    73  		localAddr, err := parseModuleSourceLocal(raw)
    74  		if err != nil {
    75  			// This is to make sure we really return a nil ModuleSource in
    76  			// this case, rather than an interface containing the zero
    77  			// value of ModuleSourceLocal.
    78  			return nil, err
    79  		}
    80  		return localAddr, nil
    81  	}
    82  
    83  	// For historical reasons, whether an address is a registry
    84  	// address is defined only by whether it can be successfully
    85  	// parsed as one, and anything else must fall through to be
    86  	// parsed as a direct remote source, where go-getter might
    87  	// then recognize it as a filesystem path. This is odd
    88  	// but matches behavior we've had since Terraform v0.10 which
    89  	// existing modules may be relying on.
    90  	// (Notice that this means that there's never any path where
    91  	// the registry source parse error gets returned to the caller,
    92  	// which is annoying but has been true for many releases
    93  	// without it posing a serious problem in practice.)
    94  	if ret, err := ParseModuleSourceRegistry(raw); err == nil {
    95  		return ret, nil
    96  	}
    97  
    98  	// If we get down here then we treat everything else as a
    99  	// remote address. In practice there's very little that
   100  	// go-getter doesn't consider invalid input, so even invalid
   101  	// nonsense will probably interpreted as _something_ here
   102  	// and then fail during installation instead. We can't
   103  	// really improve this situation for historical reasons.
   104  	remoteAddr, err := parseModuleSourceRemote(raw)
   105  	if err != nil {
   106  		// This is to make sure we really return a nil ModuleSource in
   107  		// this case, rather than an interface containing the zero
   108  		// value of ModuleSourceRemote.
   109  		return nil, err
   110  	}
   111  	return remoteAddr, nil
   112  }
   113  
   114  // ModuleSourceLocal is a ModuleSource representing a local path reference
   115  // from the caller's directory to the callee's directory within the same
   116  // module package.
   117  //
   118  // A "module package" here means a set of modules distributed together in
   119  // the same archive, repository, or similar. That's a significant distinction
   120  // because we always download and cache entire module packages at once,
   121  // and then create relative references within the same directory in order
   122  // to ensure all modules in the package are looking at a consistent filesystem
   123  // layout. We also assume that modules within a package are maintained together,
   124  // which means that cross-cutting maintenence across all of them would be
   125  // possible.
   126  //
   127  // The actual value of a ModuleSourceLocal is a normalized relative path using
   128  // forward slashes, even on operating systems that have other conventions,
   129  // because we're representing traversal within the logical filesystem
   130  // represented by the containing package, not actually within the physical
   131  // filesystem we unpacked the package into. We should typically not construct
   132  // ModuleSourceLocal values directly, except in tests where we can ensure
   133  // the value meets our assumptions. Use ParseModuleSource instead if the
   134  // input string is not hard-coded in the program.
   135  type ModuleSourceLocal string
   136  
   137  func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) {
   138  	// As long as we have a suitable prefix (detected by ParseModuleSource)
   139  	// there is no failure case for local paths: we just use the "path"
   140  	// package's cleaning logic to remove any redundant "./" and "../"
   141  	// sequences and any duplicate slashes and accept whatever that
   142  	// produces.
   143  
   144  	// Although using backslashes (Windows-style) is non-idiomatic, we do
   145  	// allow it and just normalize it away, so the rest of Terraform will
   146  	// only see the forward-slash form.
   147  	if strings.Contains(raw, `\`) {
   148  		// Note: We use string replacement rather than filepath.ToSlash
   149  		// here because the filepath package behavior varies by current
   150  		// platform, but we want to interpret configured paths the same
   151  		// across all platforms: these are virtual paths within a module
   152  		// package, not physical filesystem paths.
   153  		raw = strings.ReplaceAll(raw, `\`, "/")
   154  	}
   155  
   156  	// Note that we could've historically blocked using "//" in a path here
   157  	// in order to avoid confusion with the subdir syntax in remote addresses,
   158  	// but we historically just treated that as the same as a single slash
   159  	// and so we continue to do that now for compatibility. Clean strips those
   160  	// out and reduces them to just a single slash.
   161  	clean := path.Clean(raw)
   162  
   163  	// However, we do need to keep a single "./" on the front if it isn't
   164  	// a "../" path, or else it would be ambigous with the registry address
   165  	// syntax.
   166  	if !strings.HasPrefix(clean, "../") {
   167  		clean = "./" + clean
   168  	}
   169  
   170  	return ModuleSourceLocal(clean), nil
   171  }
   172  
   173  func isModuleSourceLocal(raw string) bool {
   174  	for _, prefix := range moduleSourceLocalPrefixes {
   175  		if strings.HasPrefix(raw, prefix) {
   176  			return true
   177  		}
   178  	}
   179  	return false
   180  }
   181  
   182  func (s ModuleSourceLocal) moduleSource() {}
   183  
   184  func (s ModuleSourceLocal) String() string {
   185  	// We assume that our underlying string was already normalized at
   186  	// construction, so we just return it verbatim.
   187  	return string(s)
   188  }
   189  
   190  func (s ModuleSourceLocal) ForDisplay() string {
   191  	return string(s)
   192  }
   193  
   194  // ModuleSourceRegistry is a ModuleSource representing a module listed in a
   195  // Terraform module registry.
   196  //
   197  // A registry source isn't a direct source location but rather an indirection
   198  // over a ModuleSourceRemote. The job of a registry is to translate the
   199  // combination of a ModuleSourceRegistry and a module version number into
   200  // a concrete ModuleSourceRemote that Terraform will then download and
   201  // install.
   202  type ModuleSourceRegistry tfaddr.Module
   203  
   204  // DefaultModuleRegistryHost is the hostname used for registry-based module
   205  // source addresses that do not have an explicit hostname.
   206  const DefaultModuleRegistryHost = tfaddr.DefaultModuleRegistryHost
   207  
   208  // ParseModuleSourceRegistry is a variant of ParseModuleSource which only
   209  // accepts module registry addresses, and will reject any other address type.
   210  //
   211  // Use this instead of ParseModuleSource if you know from some other surrounding
   212  // context that an address is intended to be a registry address rather than
   213  // some other address type, which will then allow for better error reporting
   214  // due to the additional information about user intent.
   215  func ParseModuleSourceRegistry(raw string) (ModuleSource, error) {
   216  	// Before we delegate to the "real" function we'll just make sure this
   217  	// doesn't look like a local source address, so we can return a better
   218  	// error message for that situation.
   219  	if isModuleSourceLocal(raw) {
   220  		return ModuleSourceRegistry{}, fmt.Errorf("can't use local directory %q as a module registry address", raw)
   221  	}
   222  
   223  	src, err := tfaddr.ParseModuleSource(raw)
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	return ModuleSourceRegistry{
   228  		Package: src.Package,
   229  		Subdir:  src.Subdir,
   230  	}, nil
   231  }
   232  
   233  func (s ModuleSourceRegistry) moduleSource() {}
   234  
   235  func (s ModuleSourceRegistry) String() string {
   236  	if s.Subdir != "" {
   237  		return s.Package.String() + "//" + s.Subdir
   238  	}
   239  	return s.Package.String()
   240  }
   241  
   242  func (s ModuleSourceRegistry) ForDisplay() string {
   243  	if s.Subdir != "" {
   244  		return s.Package.ForDisplay() + "//" + s.Subdir
   245  	}
   246  	return s.Package.ForDisplay()
   247  }
   248  
   249  // ModuleSourceRemote is a ModuleSource representing a remote location from
   250  // which we can retrieve a module package.
   251  //
   252  // A ModuleSourceRemote can optionally include a "subdirectory" path, which
   253  // means that it's selecting a sub-directory of the given package to use as
   254  // the entry point into the package.
   255  type ModuleSourceRemote struct {
   256  	// Package is the address of the remote package that the requested
   257  	// module belongs to.
   258  	Package ModulePackage
   259  
   260  	// If Subdir is non-empty then it represents a sub-directory within the
   261  	// remote package which will serve as the entry-point for the package.
   262  	//
   263  	// Subdir uses a normalized forward-slash-based path syntax within the
   264  	// virtual filesystem represented by the final package. It will never
   265  	// include `../` or `./` sequences.
   266  	Subdir string
   267  }
   268  
   269  func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) {
   270  	var subDir string
   271  	raw, subDir = getmodules.SplitPackageSubdir(raw)
   272  	if strings.HasPrefix(subDir, "../") {
   273  		return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir)
   274  	}
   275  
   276  	// A remote source address is really just a go-getter address resulting
   277  	// from go-getter's "detect" phase, which adds on the prefix specifying
   278  	// which protocol it should use and possibly also adjusts the
   279  	// protocol-specific part into different syntax.
   280  	//
   281  	// Note that for historical reasons this can potentially do network
   282  	// requests in order to disambiguate certain address types, although
   283  	// that's a legacy thing that is only for some specific, less-commonly-used
   284  	// address types. Most just do local string manipulation. We should
   285  	// aim to remove the network requests over time, if possible.
   286  	norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw)
   287  	if err != nil {
   288  		// We must pass through the returned error directly here because
   289  		// the getmodules package has some special error types it uses
   290  		// for certain cases where the UI layer might want to include a
   291  		// more helpful error message.
   292  		return ModuleSourceRemote{}, err
   293  	}
   294  
   295  	if moreSubDir != "" {
   296  		switch {
   297  		case subDir != "":
   298  			// The detector's own subdir goes first, because the
   299  			// subdir we were given is conceptually relative to
   300  			// the subdirectory that we just detected.
   301  			subDir = path.Join(moreSubDir, subDir)
   302  		default:
   303  			subDir = path.Clean(moreSubDir)
   304  		}
   305  		if strings.HasPrefix(subDir, "../") {
   306  			// This would suggest a bug in a go-getter detector, but
   307  			// we'll catch it anyway to avoid doing something confusing
   308  			// downstream.
   309  			return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm)
   310  		}
   311  	}
   312  
   313  	return ModuleSourceRemote{
   314  		Package: ModulePackage(norm),
   315  		Subdir:  subDir,
   316  	}, nil
   317  }
   318  
   319  func (s ModuleSourceRemote) moduleSource() {}
   320  
   321  func (s ModuleSourceRemote) String() string {
   322  	base := s.Package.String()
   323  
   324  	if s.Subdir != "" {
   325  		// Address contains query string
   326  		if strings.Contains(base, "?") {
   327  			parts := strings.SplitN(base, "?", 2)
   328  			return parts[0] + "//" + s.Subdir + "?" + parts[1]
   329  		}
   330  		return base + "//" + s.Subdir
   331  	}
   332  	return base
   333  }
   334  
   335  func (s ModuleSourceRemote) ForDisplay() string {
   336  	// The two string representations are identical for this address type.
   337  	// This isn't really entirely true to the idea of "ForDisplay" since
   338  	// it'll often include some additional components added in by the
   339  	// go-getter detectors, but we don't have any function to turn a
   340  	// "detected" string back into an idiomatic shorthand the user might've
   341  	// entered.
   342  	return s.String()
   343  }
   344  
   345  // FromRegistry can be called on a remote source address that was returned
   346  // from a module registry, passing in the original registry source address
   347  // that the registry was asked about, in order to get the effective final
   348  // remote source address.
   349  //
   350  // Specifically, this method handles the situations where one or both of
   351  // the two addresses contain subdirectory paths, combining both when necessary
   352  // in order to ensure that both the registry's given path and the user's
   353  // given path are both respected.
   354  //
   355  // This will return nonsense if given a registry address other than the one
   356  // that generated the reciever via a registry lookup.
   357  func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote {
   358  	ret := s // not a pointer, so this is a shallow copy
   359  
   360  	switch {
   361  	case s.Subdir != "" && given.Subdir != "":
   362  		ret.Subdir = path.Join(s.Subdir, given.Subdir)
   363  	case given.Subdir != "":
   364  		ret.Subdir = given.Subdir
   365  	}
   366  
   367  	return ret
   368  }