github.com/cycloidio/terraform@v1.1.10-0.20220513142504-76d5c768dc63/addrs/module_source.go (about) 1 package addrs 2 3 import ( 4 "fmt" 5 "path" 6 "regexp" 7 "strings" 8 9 svchost "github.com/hashicorp/terraform-svchost" 10 "github.com/cycloidio/terraform/getmodules" 11 ) 12 13 // ModuleSource is the general type for all three of the possible module source 14 // address types. The concrete implementations of this are ModuleSourceLocal, 15 // ModuleSourceRegistry, and ModuleSourceRemote. 16 type ModuleSource interface { 17 // String returns a full representation of the address, including any 18 // additional components that are typically implied by omission in 19 // user-written addresses. 20 // 21 // We typically use this longer representation in error message, in case 22 // the inclusion of normally-omitted components is helpful in debugging 23 // unexpected behavior. 24 String() string 25 26 // ForDisplay is similar to String but instead returns a representation of 27 // the idiomatic way to write the address in configuration, omitting 28 // components that are commonly just implied in addresses written by 29 // users. 30 // 31 // We typically use this shorter representation in informational messages, 32 // such as the note that we're about to start downloading a package. 33 ForDisplay() string 34 35 moduleSource() 36 } 37 38 var _ ModuleSource = ModuleSourceLocal("") 39 var _ ModuleSource = ModuleSourceRegistry{} 40 var _ ModuleSource = ModuleSourceRemote{} 41 42 var moduleSourceLocalPrefixes = []string{ 43 "./", 44 "../", 45 ".\\", 46 "..\\", 47 } 48 49 func ParseModuleSource(raw string) (ModuleSource, error) { 50 for _, prefix := range moduleSourceLocalPrefixes { 51 if strings.HasPrefix(raw, prefix) { 52 localAddr, err := parseModuleSourceLocal(raw) 53 if err != nil { 54 // This is to make sure we really return a nil ModuleSource in 55 // this case, rather than an interface containing the zero 56 // value of ModuleSourceLocal. 57 return nil, err 58 } 59 return localAddr, nil 60 } 61 } 62 63 // For historical reasons, whether an address is a registry 64 // address is defined only by whether it can be successfully 65 // parsed as one, and anything else must fall through to be 66 // parsed as a direct remote source, where go-getter might 67 // then recognize it as a filesystem path. This is odd 68 // but matches behavior we've had since Terraform v0.10 which 69 // existing modules may be relying on. 70 // (Notice that this means that there's never any path where 71 // the registry source parse error gets returned to the caller, 72 // which is annoying but has been true for many releases 73 // without it posing a serious problem in practice.) 74 if ret, err := parseModuleSourceRegistry(raw); err == nil { 75 return ret, nil 76 } 77 78 // If we get down here then we treat everything else as a 79 // remote address. In practice there's very little that 80 // go-getter doesn't consider invalid input, so even invalid 81 // nonsense will probably interpreted as _something_ here 82 // and then fail during installation instead. We can't 83 // really improve this situation for historical reasons. 84 remoteAddr, err := parseModuleSourceRemote(raw) 85 if err != nil { 86 // This is to make sure we really return a nil ModuleSource in 87 // this case, rather than an interface containing the zero 88 // value of ModuleSourceRemote. 89 return nil, err 90 } 91 return remoteAddr, nil 92 } 93 94 // ModuleSourceLocal is a ModuleSource representing a local path reference 95 // from the caller's directory to the callee's directory within the same 96 // module package. 97 // 98 // A "module package" here means a set of modules distributed together in 99 // the same archive, repository, or similar. That's a significant distinction 100 // because we always download and cache entire module packages at once, 101 // and then create relative references within the same directory in order 102 // to ensure all modules in the package are looking at a consistent filesystem 103 // layout. We also assume that modules within a package are maintained together, 104 // which means that cross-cutting maintenence across all of them would be 105 // possible. 106 // 107 // The actual value of a ModuleSourceLocal is a normalized relative path using 108 // forward slashes, even on operating systems that have other conventions, 109 // because we're representing traversal within the logical filesystem 110 // represented by the containing package, not actually within the physical 111 // filesystem we unpacked the package into. We should typically not construct 112 // ModuleSourceLocal values directly, except in tests where we can ensure 113 // the value meets our assumptions. Use ParseModuleSource instead if the 114 // input string is not hard-coded in the program. 115 type ModuleSourceLocal string 116 117 func parseModuleSourceLocal(raw string) (ModuleSourceLocal, error) { 118 // As long as we have a suitable prefix (detected by ParseModuleSource) 119 // there is no failure case for local paths: we just use the "path" 120 // package's cleaning logic to remove any redundant "./" and "../" 121 // sequences and any duplicate slashes and accept whatever that 122 // produces. 123 124 // Although using backslashes (Windows-style) is non-idiomatic, we do 125 // allow it and just normalize it away, so the rest of Terraform will 126 // only see the forward-slash form. 127 if strings.Contains(raw, `\`) { 128 // Note: We use string replacement rather than filepath.ToSlash 129 // here because the filepath package behavior varies by current 130 // platform, but we want to interpret configured paths the same 131 // across all platforms: these are virtual paths within a module 132 // package, not physical filesystem paths. 133 raw = strings.ReplaceAll(raw, `\`, "/") 134 } 135 136 // Note that we could've historically blocked using "//" in a path here 137 // in order to avoid confusion with the subdir syntax in remote addresses, 138 // but we historically just treated that as the same as a single slash 139 // and so we continue to do that now for compatibility. Clean strips those 140 // out and reduces them to just a single slash. 141 clean := path.Clean(raw) 142 143 // However, we do need to keep a single "./" on the front if it isn't 144 // a "../" path, or else it would be ambigous with the registry address 145 // syntax. 146 if !strings.HasPrefix(clean, "../") { 147 clean = "./" + clean 148 } 149 150 return ModuleSourceLocal(clean), nil 151 } 152 153 func (s ModuleSourceLocal) moduleSource() {} 154 155 func (s ModuleSourceLocal) String() string { 156 // We assume that our underlying string was already normalized at 157 // construction, so we just return it verbatim. 158 return string(s) 159 } 160 161 func (s ModuleSourceLocal) ForDisplay() string { 162 return string(s) 163 } 164 165 // ModuleSourceRegistry is a ModuleSource representing a module listed in a 166 // Terraform module registry. 167 // 168 // A registry source isn't a direct source location but rather an indirection 169 // over a ModuleSourceRemote. The job of a registry is to translate the 170 // combination of a ModuleSourceRegistry and a module version number into 171 // a concrete ModuleSourceRemote that Terraform will then download and 172 // install. 173 type ModuleSourceRegistry struct { 174 // PackageAddr is the registry package that the target module belongs to. 175 // The module installer must translate this into a ModuleSourceRemote 176 // using the registry API and then take that underlying address's 177 // PackageAddr in order to find the actual package location. 178 PackageAddr ModuleRegistryPackage 179 180 // If Subdir is non-empty then it represents a sub-directory within the 181 // remote package that the registry address eventually resolves to. 182 // This will ultimately become the suffix of the Subdir of the 183 // ModuleSourceRemote that the registry address translates to. 184 // 185 // Subdir uses a normalized forward-slash-based path syntax within the 186 // virtual filesystem represented by the final package. It will never 187 // include `../` or `./` sequences. 188 Subdir string 189 } 190 191 // DefaultModuleRegistryHost is the hostname used for registry-based module 192 // source addresses that do not have an explicit hostname. 193 const DefaultModuleRegistryHost = svchost.Hostname("registry.terraform.io") 194 195 var moduleRegistryNamePattern = regexp.MustCompile("^[0-9A-Za-z](?:[0-9A-Za-z-_]{0,62}[0-9A-Za-z])?$") 196 var moduleRegistryTargetSystemPattern = regexp.MustCompile("^[0-9a-z]{1,64}$") 197 198 func parseModuleSourceRegistry(raw string) (ModuleSourceRegistry, error) { 199 var err error 200 201 var subDir string 202 raw, subDir = getmodules.SplitPackageSubdir(raw) 203 if strings.HasPrefix(subDir, "../") { 204 return ModuleSourceRegistry{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) 205 } 206 207 parts := strings.Split(raw, "/") 208 // A valid registry address has either three or four parts, because the 209 // leading hostname part is optional. 210 if len(parts) != 3 && len(parts) != 4 { 211 return ModuleSourceRegistry{}, fmt.Errorf("a module registry source address must have either three or four slash-separated components") 212 } 213 214 host := DefaultModuleRegistryHost 215 if len(parts) == 4 { 216 host, err = svchost.ForComparison(parts[0]) 217 if err != nil { 218 // The svchost library doesn't produce very good error messages to 219 // return to an end-user, so we'll use some custom ones here. 220 switch { 221 case strings.Contains(parts[0], "--"): 222 // Looks like possibly punycode, which we don't allow here 223 // to ensure that source addresses are written readably. 224 return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q; internationalized domain names must be given as direct unicode characters, not in punycode", parts[0]) 225 default: 226 return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname %q", parts[0]) 227 } 228 } 229 if !strings.Contains(host.String(), ".") { 230 return ModuleSourceRegistry{}, fmt.Errorf("invalid module registry hostname: must contain at least one dot") 231 } 232 // Discard the hostname prefix now that we've processed it 233 parts = parts[1:] 234 } 235 236 ret := ModuleSourceRegistry{ 237 PackageAddr: ModuleRegistryPackage{ 238 Host: host, 239 }, 240 241 Subdir: subDir, 242 } 243 244 if host == svchost.Hostname("github.com") || host == svchost.Hostname("bitbucket.org") { 245 return ret, fmt.Errorf("can't use %q as a module registry host, because it's reserved for installing directly from version control repositories", host) 246 } 247 248 if ret.PackageAddr.Namespace, err = parseModuleRegistryName(parts[0]); err != nil { 249 if strings.Contains(parts[0], ".") { 250 // Seems like the user omitted one of the latter components in 251 // an address with an explicit hostname. 252 return ret, fmt.Errorf("source address must have three more components after the hostname: the namespace, the name, and the target system") 253 } 254 return ret, fmt.Errorf("invalid namespace %q: %s", parts[0], err) 255 } 256 if ret.PackageAddr.Name, err = parseModuleRegistryName(parts[1]); err != nil { 257 return ret, fmt.Errorf("invalid module name %q: %s", parts[1], err) 258 } 259 if ret.PackageAddr.TargetSystem, err = parseModuleRegistryTargetSystem(parts[2]); err != nil { 260 if strings.Contains(parts[2], "?") { 261 // The user was trying to include a query string, probably? 262 return ret, fmt.Errorf("module registry addresses may not include a query string portion") 263 } 264 return ret, fmt.Errorf("invalid target system %q: %s", parts[2], err) 265 } 266 267 return ret, nil 268 } 269 270 // parseModuleRegistryName validates and normalizes a string in either the 271 // "namespace" or "name" position of a module registry source address. 272 func parseModuleRegistryName(given string) (string, error) { 273 // Similar to the names in provider source addresses, we defined these 274 // to be compatible with what filesystems and typical remote systems 275 // like GitHub allow in names. Unfortunately we didn't end up defining 276 // these exactly equivalently: provider names can only use dashes as 277 // punctuation, whereas module names can use underscores. So here we're 278 // using some regular expressions from the original module source 279 // implementation, rather than using the IDNA rules as we do in 280 // ParseProviderPart. 281 282 if !moduleRegistryNamePattern.MatchString(given) { 283 return "", fmt.Errorf("must be between one and 64 characters, including ASCII letters, digits, dashes, and underscores, where dashes and underscores may not be the prefix or suffix") 284 } 285 286 // We also skip normalizing the name to lowercase, because we historically 287 // didn't do that and so existing module registries might be doing 288 // case-sensitive matching. 289 return given, nil 290 } 291 292 // parseModuleRegistryTargetSystem validates and normalizes a string in the 293 // "target system" position of a module registry source address. This is 294 // what we historically called "provider" but never actually enforced as 295 // being a provider address, and now _cannot_ be a provider address because 296 // provider addresses have three slash-separated components of their own. 297 func parseModuleRegistryTargetSystem(given string) (string, error) { 298 // Similar to the names in provider source addresses, we defined these 299 // to be compatible with what filesystems and typical remote systems 300 // like GitHub allow in names. Unfortunately we didn't end up defining 301 // these exactly equivalently: provider names can only use dashes as 302 // punctuation, whereas module names can use underscores. So here we're 303 // using some regular expressions from the original module source 304 // implementation, rather than using the IDNA rules as we do in 305 // ParseProviderPart. 306 307 if !moduleRegistryTargetSystemPattern.MatchString(given) { 308 return "", fmt.Errorf("must be between one and 64 ASCII letters or digits") 309 } 310 311 // We also skip normalizing the name to lowercase, because we historically 312 // didn't do that and so existing module registries might be doing 313 // case-sensitive matching. 314 return given, nil 315 } 316 317 func (s ModuleSourceRegistry) moduleSource() {} 318 319 func (s ModuleSourceRegistry) String() string { 320 if s.Subdir != "" { 321 return s.PackageAddr.String() + "//" + s.Subdir 322 } 323 return s.PackageAddr.String() 324 } 325 326 func (s ModuleSourceRegistry) ForDisplay() string { 327 if s.Subdir != "" { 328 return s.PackageAddr.ForDisplay() + "//" + s.Subdir 329 } 330 return s.PackageAddr.ForDisplay() 331 } 332 333 // ModuleSourceRemote is a ModuleSource representing a remote location from 334 // which we can retrieve a module package. 335 // 336 // A ModuleSourceRemote can optionally include a "subdirectory" path, which 337 // means that it's selecting a sub-directory of the given package to use as 338 // the entry point into the package. 339 type ModuleSourceRemote struct { 340 // PackageAddr is the address of the remote package that the requested 341 // module belongs to. 342 PackageAddr ModulePackage 343 344 // If Subdir is non-empty then it represents a sub-directory within the 345 // remote package which will serve as the entry-point for the package. 346 // 347 // Subdir uses a normalized forward-slash-based path syntax within the 348 // virtual filesystem represented by the final package. It will never 349 // include `../` or `./` sequences. 350 Subdir string 351 } 352 353 func parseModuleSourceRemote(raw string) (ModuleSourceRemote, error) { 354 var subDir string 355 raw, subDir = getmodules.SplitPackageSubdir(raw) 356 if strings.HasPrefix(subDir, "../") { 357 return ModuleSourceRemote{}, fmt.Errorf("subdirectory path %q leads outside of the module package", subDir) 358 } 359 360 // A remote source address is really just a go-getter address resulting 361 // from go-getter's "detect" phase, which adds on the prefix specifying 362 // which protocol it should use and possibly also adjusts the 363 // protocol-specific part into different syntax. 364 // 365 // Note that for historical reasons this can potentially do network 366 // requests in order to disambiguate certain address types, although 367 // that's a legacy thing that is only for some specific, less-commonly-used 368 // address types. Most just do local string manipulation. We should 369 // aim to remove the network requests over time, if possible. 370 norm, moreSubDir, err := getmodules.NormalizePackageAddress(raw) 371 if err != nil { 372 // We must pass through the returned error directly here because 373 // the getmodules package has some special error types it uses 374 // for certain cases where the UI layer might want to include a 375 // more helpful error message. 376 return ModuleSourceRemote{}, err 377 } 378 379 if moreSubDir != "" { 380 switch { 381 case subDir != "": 382 // The detector's own subdir goes first, because the 383 // subdir we were given is conceptually relative to 384 // the subdirectory that we just detected. 385 subDir = path.Join(moreSubDir, subDir) 386 default: 387 subDir = path.Clean(moreSubDir) 388 } 389 if strings.HasPrefix(subDir, "../") { 390 // This would suggest a bug in a go-getter detector, but 391 // we'll catch it anyway to avoid doing something confusing 392 // downstream. 393 return ModuleSourceRemote{}, fmt.Errorf("detected subdirectory path %q of %q leads outside of the module package", subDir, norm) 394 } 395 } 396 397 return ModuleSourceRemote{ 398 PackageAddr: ModulePackage(norm), 399 Subdir: subDir, 400 }, nil 401 } 402 403 func (s ModuleSourceRemote) moduleSource() {} 404 405 func (s ModuleSourceRemote) String() string { 406 if s.Subdir != "" { 407 return s.PackageAddr.String() + "//" + s.Subdir 408 } 409 return s.PackageAddr.String() 410 } 411 412 func (s ModuleSourceRemote) ForDisplay() string { 413 // The two string representations are identical for this address type. 414 // This isn't really entirely true to the idea of "ForDisplay" since 415 // it'll often include some additional components added in by the 416 // go-getter detectors, but we don't have any function to turn a 417 // "detected" string back into an idiomatic shorthand the user might've 418 // entered. 419 return s.String() 420 } 421 422 // FromRegistry can be called on a remote source address that was returned 423 // from a module registry, passing in the original registry source address 424 // that the registry was asked about, in order to get the effective final 425 // remote source address. 426 // 427 // Specifically, this method handles the situations where one or both of 428 // the two addresses contain subdirectory paths, combining both when necessary 429 // in order to ensure that both the registry's given path and the user's 430 // given path are both respected. 431 // 432 // This will return nonsense if given a registry address other than the one 433 // that generated the reciever via a registry lookup. 434 func (s ModuleSourceRemote) FromRegistry(given ModuleSourceRegistry) ModuleSourceRemote { 435 ret := s // not a pointer, so this is a shallow copy 436 437 switch { 438 case s.Subdir != "" && given.Subdir != "": 439 ret.Subdir = path.Join(s.Subdir, given.Subdir) 440 case given.Subdir != "": 441 ret.Subdir = given.Subdir 442 } 443 444 return ret 445 }