github.com/opentofu/opentofu@v1.7.1/internal/addrs/module_source.go (about)

     1  // Copyright (c) The OpenTofu Authors
     2  // SPDX-License-Identifier: MPL-2.0
     3  // Copyright (c) 2023 HashiCorp, Inc.
     4  // SPDX-License-Identifier: MPL-2.0
     5  
     6  package addrs
     7  
     8  import (
     9  	"fmt"
    10  	"path"
    11  	"strings"
    12  
    13  	"github.com/opentofu/opentofu/internal/getmodules"
    14  	tfaddr "github.com/opentofu/registry-address"
    15  )
    16  
    17  // ModuleSource is the general type for all three of the possible module source
    18  // address types. The concrete implementations of this are ModuleSourceLocal,
    19  // ModuleSourceRegistry, and ModuleSourceRemote.
    20  type ModuleSource interface {
    21  	// String returns a full representation of the address, including any
    22  	// additional components that are typically implied by omission in
    23  	// user-written addresses.
    24  	//
    25  	// We typically use this longer representation in error message, in case
    26  	// the inclusion of normally-omitted components is helpful in debugging
    27  	// unexpected behavior.
    28  	String() string
    29  
    30  	// ForDisplay is similar to String but instead returns a representation of
    31  	// the idiomatic way to write the address in configuration, omitting
    32  	// components that are commonly just implied in addresses written by
    33  	// users.
    34  	//
    35  	// We typically use this shorter representation in informational messages,
    36  	// such as the note that we're about to start downloading a package.
    37  	ForDisplay() string
    38  
    39  	moduleSource()
    40  }
    41  
    42  var _ ModuleSource = ModuleSourceLocal("")
    43  var _ ModuleSource = ModuleSourceRegistry{}
    44  var _ ModuleSource = ModuleSourceRemote{}
    45  
    46  var moduleSourceLocalPrefixes = []string{
    47  	"./",
    48  	"../",
    49  	".\\",
    50  	"..\\",
    51  }
    52  
    53  // ParseModuleSource parses a module source address as given in the "source"
    54  // argument inside a "module" block in the configuration.
    55  //
    56  // For historical reasons this syntax is a bit overloaded, supporting three
    57  // different address types:
    58  //   - Local paths starting with either ./ or ../, which are special because
    59  //     OpenTofu considers them to belong to the same "package" as the caller.
    60  //   - Module registry addresses, given as either NAMESPACE/NAME/SYSTEM or
    61  //     HOST/NAMESPACE/NAME/SYSTEM, in which case the remote registry serves
    62  //     as an indirection over the third address type that follows.
    63  //   - Various URL-like and other heuristically-recognized strings which
    64  //     we currently delegate to the external library go-getter.
    65  //
    66  // There is some ambiguity between the module registry addresses and go-getter's
    67  // very liberal heuristics and so this particular function will typically treat
    68  // an invalid registry address as some other sort of remote source address
    69  // rather than returning an error. If you know that you're expecting a
    70  // registry address in particular, use ParseModuleSourceRegistry instead, which
    71  // can therefore expose more detailed error messages about registry address
    72  // parsing in particular.
    73  func ParseModuleSource(raw string) (ModuleSource, error) {
    74  	if isModuleSourceLocal(raw) {
    75  		localAddr, err := parseModuleSourceLocal(raw)
    76  		if err != nil {
    77  			// This is to make sure we really return a nil ModuleSource in
    78  			// this case, rather than an interface containing the zero
    79  			// value of ModuleSourceLocal.
    80  			return nil, err
    81  		}
    82  		return localAddr, nil
    83  	}
    84  
    85  	// For historical reasons, whether an address is a registry
    86  	// address is defined only by whether it can be successfully
    87  	// parsed as one, and anything else must fall through to be
    88  	// parsed as a direct remote source, where go-getter might
    89  	// then recognize it as a filesystem path. This is odd
    90  	// but matches behavior we've had since OpenTofu v0.10 which
    91  	// existing modules may be relying on.
    92  	// (Notice that this means that there's never any path where
    93  	// the registry source parse error gets returned to the caller,
    94  	// which is annoying but has been true for many releases
    95  	// without it posing a serious problem in practice.)
    96  	if ret, err := ParseModuleSourceRegistry(raw); err == nil {
    97  		return ret, nil
    98  	}
    99  
   100  	// If we get down here then we treat everything else as a
   101  	// remote address. In practice there's very little that
   102  	// go-getter doesn't consider invalid input, so even invalid
   103  	// nonsense will probably interpreted as _something_ here
   104  	// and then fail during installation instead. We can't
   105  	// really improve this situation for historical reasons.
   106  	remoteAddr, err := parseModuleSourceRemote(raw)
   107  	if err != nil {
   108  		// This is to make sure we really return a nil ModuleSource in
   109  		// this case, rather than an interface containing the zero
   110  		// value of ModuleSourceRemote.
   111  		return nil, err
   112  	}
   113  	return remoteAddr, nil
   114  }
   115  
   116  // ModuleSourceLocal is a ModuleSource representing a local path reference
   117  // from the caller's directory to the callee's directory within the same
   118  // module package.
   119  //
   120  // A "module package" here means a set of modules distributed together in
   121  // the same archive, repository, or similar. That's a significant distinction
   122  // because we always download and cache entire module packages at once,
   123  // and then create relative references within the same directory in order
   124  // to ensure all modules in the package are looking at a consistent filesystem
   125  // layout. We also assume that modules within a package are maintained together,
   126  // which means that cross-cutting maintenence across all of them would be
   127  // possible.
   128  //
   129  // The actual value of a ModuleSourceLocal is a normalized relative path using
   130  // forward slashes, even on operating systems that have other conventions,
   131  // because we're representing traversal within the logical filesystem
   132  // represented by the containing package, not actually within the physical
   133  // filesystem we unpacked the package into. We should typically not construct
   134  // ModuleSourceLocal values directly, except in tests where we can ensure
   135  // the value meets our assumptions. Use ParseModuleSource instead if the
   136  // input string is not hard-coded in the program.
   137  type ModuleSourceLocal string
   138  
   139  func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) {
   140  	// As long as we have a suitable prefix (detected by ParseModuleSource)
   141  	// there is no failure case for local paths: we just use the "path"
   142  	// package's cleaning logic to remove any redundant "./" and "../"
   143  	// sequences and any duplicate slashes and accept whatever that
   144  	// produces.
   145  
   146  	// Although using backslashes (Windows-style) is non-idiomatic, we do
   147  	// allow it and just normalize it away, so the rest of OpenTofu will
   148  	// only see the forward-slash form.
   149  	if strings.Contains(raw, `\`) {
   150  		// Note: We use string replacement rather than filepath.ToSlash
   151  		// here because the filepath package behavior varies by current
   152  		// platform, but we want to interpret configured paths the same
   153  		// across all platforms: these are virtual paths within a module
   154  		// package, not physical filesystem paths.
   155  		raw = strings.ReplaceAll(raw, `\`, "/")
   156  	}
   157  
   158  	// Note that we could've historically blocked using "//" in a path here
   159  	// in order to avoid confusion with the subdir syntax in remote addresses,
   160  	// but we historically just treated that as the same as a single slash
   161  	// and so we continue to do that now for compatibility. Clean strips those
   162  	// out and reduces them to just a single slash.
   163  	clean := path.Clean(raw)
   164  
   165  	// However, we do need to keep a single "./" on the front if it isn't
   166  	// a "../" path, or else it would be ambigous with the registry address
   167  	// syntax.
   168  	if !strings.HasPrefix(clean, "../") {
   169  		clean = "./" + clean
   170  	}
   171  
   172  	return ModuleSourceLocal(clean), nil
   173  }
   174  
   175  func isModuleSourceLocal(raw string) bool {
   176  	for _, prefix := range moduleSourceLocalPrefixes {
   177  		if strings.HasPrefix(raw, prefix) {
   178  			return true
   179  		}
   180  	}
   181  	return false
   182  }
   183  
   184  func (s ModuleSourceLocal) moduleSource() {}
   185  
   186  func (s ModuleSourceLocal) String() string {
   187  	// We assume that our underlying string was already normalized at
   188  	// construction, so we just return it verbatim.
   189  	return string(s)
   190  }
   191  
   192  func (s ModuleSourceLocal) ForDisplay() string {
   193  	return string(s)
   194  }
   195  
   196  // ModuleSourceRegistry is a ModuleSource representing a module listed in a
   197  // OpenTofu module registry.
   198  //
   199  // A registry source isn't a direct source location but rather an indirection
   200  // over a ModuleSourceRemote. The job of a registry is to translate the
   201  // combination of a ModuleSourceRegistry and a module version number into
   202  // a concrete ModuleSourceRemote that OpenTofu will then download and
   203  // install.
   204  type ModuleSourceRegistry tfaddr.Module
   205  
   206  // DefaultModuleRegistryHost is the hostname used for registry-based module
   207  // source addresses that do not have an explicit hostname.
   208  const DefaultModuleRegistryHost = tfaddr.DefaultModuleRegistryHost
   209  
   210  // ParseModuleSourceRegistry is a variant of ParseModuleSource which only
   211  // accepts module registry addresses, and will reject any other address type.
   212  //
   213  // Use this instead of ParseModuleSource if you know from some other surrounding
   214  // context that an address is intended to be a registry address rather than
   215  // some other address type, which will then allow for better error reporting
   216  // due to the additional information about user intent.
   217  func ParseModuleSourceRegistry(raw string) (ModuleSource, error) {
   218  	// Before we delegate to the "real" function we'll just make sure this
   219  	// doesn't look like a local source address, so we can return a better
   220  	// error message for that situation.
   221  	if isModuleSourceLocal(raw) {
   222  		return ModuleSourceRegistry{}, fmt.Errorf("can't use local directory %q as a module registry address", raw)
   223  	}
   224  
   225  	src, err := tfaddr.ParseModuleSource(raw)
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  	return ModuleSourceRegistry{
   230  		Package: src.Package,
   231  		Subdir:  src.Subdir,
   232  	}, nil
   233  }
   234  
   235  func (s ModuleSourceRegistry) moduleSource() {}
   236  
   237  func (s ModuleSourceRegistry) String() string {
   238  	if s.Subdir != "" {
   239  		return s.Package.String() + "//" + s.Subdir
   240  	}
   241  	return s.Package.String()
   242  }
   243  
   244  func (s ModuleSourceRegistry) ForDisplay() string {
   245  	if s.Subdir != "" {
   246  		return s.Package.ForDisplay() + "//" + s.Subdir
   247  	}
   248  	return s.Package.ForDisplay()
   249  }
   250  
   251  // ModuleSourceRemote is a ModuleSource representing a remote location from
   252  // which we can retrieve a module package.
   253  //
   254  // A ModuleSourceRemote can optionally include a "subdirectory" path, which
   255  // means that it's selecting a sub-directory of the given package to use as
   256  // the entry point into the package.
   257  type ModuleSourceRemote struct {
   258  	// Package is the address of the remote package that the requested
   259  	// module belongs to.
   260  	Package ModulePackage
   261  
   262  	// If Subdir is non-empty then it represents a sub-directory within the
   263  	// remote package which will serve as the entry-point for the package.
   264  	//
   265  	// Subdir uses a normalized forward-slash-based path syntax within the
   266  	// virtual filesystem represented by the final package. It will never
   267  	// include `../` or `./` sequences.
   268  	Subdir string
   269  }
   270  
   271  func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) {
   272  	var subDir string
   273  	raw, subDir = getmodules.SplitPackageSubdir(raw)
   274  	if strings.HasPrefix(subDir, "../") {
   275  		return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir)
   276  	}
   277  
   278  	// A remote source address is really just a go-getter address resulting
   279  	// from go-getter's "detect" phase, which adds on the prefix specifying
   280  	// which protocol it should use and possibly also adjusts the
   281  	// protocol-specific part into different syntax.
   282  	//
   283  	// Note that for historical reasons this can potentially do network
   284  	// requests in order to disambiguate certain address types, although
   285  	// that's a legacy thing that is only for some specific, less-commonly-used
   286  	// address types. Most just do local string manipulation. We should
   287  	// aim to remove the network requests over time, if possible.
   288  	norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw)
   289  	if err != nil {
   290  		// We must pass through the returned error directly here because
   291  		// the getmodules package has some special error types it uses
   292  		// for certain cases where the UI layer might want to include a
   293  		// more helpful error message.
   294  		return ModuleSourceRemote{}, err
   295  	}
   296  
   297  	if moreSubDir != "" {
   298  		switch {
   299  		case subDir != "":
   300  			// The detector's own subdir goes first, because the
   301  			// subdir we were given is conceptually relative to
   302  			// the subdirectory that we just detected.
   303  			subDir = path.Join(moreSubDir, subDir)
   304  		default:
   305  			subDir = path.Clean(moreSubDir)
   306  		}
   307  		if strings.HasPrefix(subDir, "../") {
   308  			// This would suggest a bug in a go-getter detector, but
   309  			// we'll catch it anyway to avoid doing something confusing
   310  			// downstream.
   311  			return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm)
   312  		}
   313  	}
   314  
   315  	return ModuleSourceRemote{
   316  		Package: ModulePackage(norm),
   317  		Subdir:  subDir,
   318  	}, nil
   319  }
   320  
   321  func (s ModuleSourceRemote) moduleSource() {}
   322  
   323  func (s ModuleSourceRemote) String() string {
   324  	base := s.Package.String()
   325  
   326  	if s.Subdir != "" {
   327  		// Address contains query string
   328  		if strings.Contains(base, "?") {
   329  			parts := strings.SplitN(base, "?", 2)
   330  			return parts[0] + "//" + s.Subdir + "?" + parts[1]
   331  		}
   332  		return base + "//" + s.Subdir
   333  	}
   334  	return base
   335  }
   336  
   337  func (s ModuleSourceRemote) ForDisplay() string {
   338  	// The two string representations are identical for this address type.
   339  	// This isn't really entirely true to the idea of "ForDisplay" since
   340  	// it'll often include some additional components added in by the
   341  	// go-getter detectors, but we don't have any function to turn a
   342  	// "detected" string back into an idiomatic shorthand the user might've
   343  	// entered.
   344  	return s.String()
   345  }
   346  
   347  // FromRegistry can be called on a remote source address that was returned
   348  // from a module registry, passing in the original registry source address
   349  // that the registry was asked about, in order to get the effective final
   350  // remote source address.
   351  //
   352  // Specifically, this method handles the situations where one or both of
   353  // the two addresses contain subdirectory paths, combining both when necessary
   354  // in order to ensure that both the registry's given path and the user's
   355  // given path are both respected.
   356  //
   357  // This will return nonsense if given a registry address other than the one
   358  // that generated the reciever via a registry lookup.
   359  func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote {
   360  	ret := s // not a pointer, so this is a shallow copy
   361  
   362  	switch {
   363  	case s.Subdir != "" && given.Subdir != "":
   364  		ret.Subdir = path.Join(s.Subdir, given.Subdir)
   365  	case given.Subdir != "":
   366  		ret.Subdir = given.Subdir
   367  	}
   368  
   369  	return ret
   370  }