github.com/terramate-io/tf@v0.0.0-20230830114523-fce866b4dfcd/getproviders/hash.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package getproviders 5 6 import ( 7 "crypto/sha256" 8 "fmt" 9 "io" 10 "os" 11 "path/filepath" 12 "strings" 13 14 "golang.org/x/mod/sumdb/dirhash" 15 ) 16 17 // Hash is a specially-formatted string representing a checksum of a package 18 // or the contents of the package. 19 // 20 // A Hash string is always starts with a scheme, which is a short series of 21 // alphanumeric characters followed by a colon, and then the remainder of the 22 // string has a different meaning depending on the scheme prefix. 23 // 24 // The currently-valid schemes are defined as the constants of type HashScheme 25 // in this package. 26 // 27 // Callers outside of this package must not create Hash values via direct 28 // conversion. Instead, use either the HashScheme.New method on one of the 29 // HashScheme contents (for a hash of a particular scheme) or the ParseHash 30 // function (if hashes of any scheme are acceptable). 31 type Hash string 32 33 // NilHash is the zero value of Hash. It isn't a valid hash, so all of its 34 // methods will panic. 35 const NilHash = Hash("") 36 37 // ParseHash parses the string representation of a Hash into a Hash value. 38 // 39 // A particular version of Terraform only supports a fixed set of hash schemes, 40 // but this function intentionally allows unrecognized schemes so that we can 41 // silently ignore other schemes that may be introduced in the future. For 42 // that reason, the Scheme method of the returned Hash may return a value that 43 // isn't in one of the HashScheme constants in this package. 44 // 45 // This function doesn't verify that the value portion of the given hash makes 46 // sense for the given scheme. Invalid values are just considered to not match 47 // any packages. 48 // 49 // If this function returns an error then the returned Hash is invalid and 50 // must not be used. 51 func ParseHash(s string) (Hash, error) { 52 colon := strings.Index(s, ":") 53 if colon < 1 { // 1 because a zero-length scheme is not allowed 54 return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon") 55 } 56 return Hash(s), nil 57 } 58 59 // MustParseHash is a wrapper around ParseHash that panics if it returns an 60 // error. 61 func MustParseHash(s string) Hash { 62 hash, err := ParseHash(s) 63 if err != nil { 64 panic(err.Error()) 65 } 66 return hash 67 } 68 69 // Scheme returns the scheme of the recieving hash. If the receiver is not 70 // using valid syntax then this method will panic. 71 func (h Hash) Scheme() HashScheme { 72 colon := strings.Index(string(h), ":") 73 if colon < 0 { 74 panic(fmt.Sprintf("invalid hash string %q", h)) 75 } 76 return HashScheme(h[:colon+1]) 77 } 78 79 // HasScheme returns true if the given scheme matches the receiver's scheme, 80 // or false otherwise. 81 // 82 // If the receiver is not using valid syntax then this method will panic. 83 func (h Hash) HasScheme(want HashScheme) bool { 84 return h.Scheme() == want 85 } 86 87 // Value returns the scheme-specific value from the recieving hash. The 88 // meaning of this value depends on the scheme. 89 // 90 // If the receiver is not using valid syntax then this method will panic. 91 func (h Hash) Value() string { 92 colon := strings.Index(string(h), ":") 93 if colon < 0 { 94 panic(fmt.Sprintf("invalid hash string %q", h)) 95 } 96 return string(h[colon+1:]) 97 } 98 99 // String returns a string representation of the receiving hash. 100 func (h Hash) String() string { 101 return string(h) 102 } 103 104 // GoString returns a Go syntax representation of the receiving hash. 105 // 106 // This is here primarily to help with producing descriptive test failure 107 // output; these results are not particularly useful at runtime. 108 func (h Hash) GoString() string { 109 if h == NilHash { 110 return "getproviders.NilHash" 111 } 112 switch scheme := h.Scheme(); scheme { 113 case HashScheme1: 114 return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value()) 115 case HashSchemeZip: 116 return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value()) 117 default: 118 // This fallback is for when we encounter lock files or API responses 119 // with hash schemes that the current version of Terraform isn't 120 // familiar with. They were presumably introduced in a later version. 121 return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value()) 122 } 123 } 124 125 // HashScheme is an enumeration of schemes that are allowed for values of type 126 // Hash. 127 type HashScheme string 128 129 const ( 130 // HashScheme1 is the scheme identifier for the first hash scheme. 131 // 132 // Use HashV1 (or one of its wrapper functions) to calculate hashes with 133 // this scheme. 134 HashScheme1 HashScheme = HashScheme("h1:") 135 136 // HashSchemeZip is the scheme identifier for the legacy hash scheme that 137 // applies to distribution archives (.zip files) rather than package 138 // contents, and can therefore only be verified against the original 139 // distribution .zip file, not an extracted directory. 140 // 141 // Use PackageHashLegacyZipSHA to calculate hashes with this scheme. 142 HashSchemeZip HashScheme = HashScheme("zh:") 143 ) 144 145 // New creates a new Hash value with the receiver as its scheme and the given 146 // raw string as its value. 147 // 148 // It's the caller's responsibility to make sure that the given value makes 149 // sense for the selected scheme. 150 func (hs HashScheme) New(value string) Hash { 151 return Hash(string(hs) + value) 152 } 153 154 // PackageHash computes a hash of the contents of the package at the given 155 // location, using whichever hash algorithm is the current default. 156 // 157 // Currently, this method returns version 1 hashes as produced by the 158 // function PackageHashV1, but this function may switch to other versions in 159 // later releases. Call PackageHashV1 directly if you specifically need a V1 160 // hash. 161 // 162 // PackageHash can be used only with the two local package location types 163 // PackageLocalDir and PackageLocalArchive, because it needs to access the 164 // contents of the indicated package in order to compute the hash. If given 165 // a non-local location this function will always return an error. 166 func PackageHash(loc PackageLocation) (Hash, error) { 167 return PackageHashV1(loc) 168 } 169 170 // PackageMatchesHash returns true if the package at the given location matches 171 // the given hash, or false otherwise. 172 // 173 // If it cannot read from the given location, or if the given hash is in an 174 // unsupported format, PackageMatchesHash returns an error. 175 // 176 // There is currently only one hash format, as implemented by HashV1. However, 177 // if others are introduced in future PackageMatchesHash may accept multiple 178 // formats, and may generate errors for any formats that become obsolete. 179 // 180 // PackageMatchesHash can be used only with the two local package location types 181 // PackageLocalDir and PackageLocalArchive, because it needs to access the 182 // contents of the indicated package in order to compute the hash. If given 183 // a non-local location this function will always return an error. 184 func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) { 185 switch want.Scheme() { 186 case HashScheme1: 187 got, err := PackageHashV1(loc) 188 if err != nil { 189 return false, err 190 } 191 return got == want, nil 192 case HashSchemeZip: 193 archiveLoc, ok := loc.(PackageLocalArchive) 194 if !ok { 195 return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`) 196 } 197 got, err := PackageHashLegacyZipSHA(archiveLoc) 198 if err != nil { 199 return false, err 200 } 201 return got == want, nil 202 default: 203 return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)") 204 } 205 } 206 207 // PackageMatchesAnyHash returns true if the package at the given location 208 // matches at least one of the given hashes, or false otherwise. 209 // 210 // If it cannot read from the given location, PackageMatchesAnyHash returns an 211 // error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash 212 // considers unsupported hash formats as successfully non-matching, rather 213 // than returning an error. 214 // 215 // PackageMatchesAnyHash can be used only with the two local package location 216 // types PackageLocalDir and PackageLocalArchive, because it needs to access the 217 // contents of the indicated package in order to compute the hash. If given 218 // a non-local location this function will always return an error. 219 func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) { 220 // It's likely that we'll have multiple hashes of the same scheme in 221 // the "allowed" set, in which case we'll avoid repeatedly re-reading the 222 // given package by caching its result for each of the two 223 // currently-supported hash formats. These will be NilHash until we 224 // encounter the first hash of the corresponding scheme. 225 var v1Hash, zipHash Hash 226 for _, want := range allowed { 227 switch want.Scheme() { 228 case HashScheme1: 229 if v1Hash == NilHash { 230 got, err := PackageHashV1(loc) 231 if err != nil { 232 return false, err 233 } 234 v1Hash = got 235 } 236 if v1Hash == want { 237 return true, nil 238 } 239 case HashSchemeZip: 240 archiveLoc, ok := loc.(PackageLocalArchive) 241 if !ok { 242 // A zip hash can never match an unpacked directory 243 continue 244 } 245 if zipHash == NilHash { 246 got, err := PackageHashLegacyZipSHA(archiveLoc) 247 if err != nil { 248 return false, err 249 } 250 zipHash = got 251 } 252 if zipHash == want { 253 return true, nil 254 } 255 default: 256 // If it's not a supported format then it can't match. 257 continue 258 } 259 } 260 return false, nil 261 } 262 263 // PreferredHashes examines all of the given hash strings and returns the one 264 // that the current version of Terraform considers to provide the strongest 265 // verification. 266 // 267 // Returns an empty string if none of the given hashes are of a supported 268 // format. If PreferredHash returns a non-empty string then it will be one 269 // of the hash strings in "given", and that hash is the one that must pass 270 // verification in order for a package to be considered valid. 271 func PreferredHashes(given []Hash) []Hash { 272 // For now this is just filtering for the two hash formats we support, 273 // both of which are considered equally "preferred". If we introduce 274 // a new scheme like "h2:" in future then, depending on the characteristics 275 // of that new version, it might make sense to rework this function so 276 // that it only returns "h1:" hashes if the input has no "h2:" hashes, 277 // so that h2: is preferred when possible and h1: is only a fallback for 278 // interacting with older systems that haven't been updated with the new 279 // scheme yet. 280 281 var ret []Hash 282 for _, hash := range given { 283 switch hash.Scheme() { 284 case HashScheme1, HashSchemeZip: 285 ret = append(ret, hash) 286 } 287 } 288 return ret 289 } 290 291 // PackageHashLegacyZipSHA implements the old provider package hashing scheme 292 // of taking a SHA256 hash of the containing .zip archive itself, rather than 293 // of the contents of the archive. 294 // 295 // The result is a hash string with the "zh:" prefix, which is intended to 296 // represent "zip hash". After the prefix is a lowercase-hex encoded SHA256 297 // checksum, intended to exactly match the formatting used in the registry 298 // API (apart from the prefix) so that checksums can be more conveniently 299 // compared by humans. 300 // 301 // Because this hashing scheme uses the official provider .zip file as its 302 // input, it accepts only PackageLocalArchive locations. 303 func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) { 304 archivePath, err := filepath.EvalSymlinks(string(loc)) 305 if err != nil { 306 return "", err 307 } 308 309 f, err := os.Open(archivePath) 310 if err != nil { 311 return "", err 312 } 313 defer f.Close() 314 315 h := sha256.New() 316 _, err = io.Copy(h, f) 317 if err != nil { 318 return "", err 319 } 320 321 gotHash := h.Sum(nil) 322 return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil 323 } 324 325 // HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string 326 // hash format from an already-calculated hash of a provider .zip archive. 327 // 328 // This just adds the "zh:" prefix and encodes the string in hex, so that the 329 // result is in the same format as PackageHashLegacyZipSHA. 330 func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash { 331 return HashSchemeZip.New(fmt.Sprintf("%x", sum[:])) 332 } 333 334 // PackageHashV1 computes a hash of the contents of the package at the given 335 // location using hash algorithm 1. The resulting Hash is guaranteed to have 336 // the scheme HashScheme1. 337 // 338 // The hash covers the paths to files in the directory and the contents of 339 // those files. It does not cover other metadata about the files, such as 340 // permissions. 341 // 342 // This function is named "PackageHashV1" in anticipation of other hashing 343 // algorithms being added in a backward-compatible way in future. The result 344 // from PackageHashV1 always begins with the prefix "h1:" so that callers can 345 // distinguish the results of potentially multiple different hash algorithms in 346 // future. 347 // 348 // PackageHashV1 can be used only with the two local package location types 349 // PackageLocalDir and PackageLocalArchive, because it needs to access the 350 // contents of the indicated package in order to compute the hash. If given 351 // a non-local location this function will always return an error. 352 func PackageHashV1(loc PackageLocation) (Hash, error) { 353 // Our HashV1 is really just the Go Modules hash version 1, which is 354 // sufficient for our needs and already well-used for identity of 355 // Go Modules distribution packages. It is also blocked from incompatible 356 // changes by being used in a wide array of go.sum files already. 357 // 358 // In particular, it also supports computing an equivalent hash from 359 // an unpacked zip file, which is not important for Terraform workflow 360 // today but is likely to become so in future if we adopt a top-level 361 // lockfile mechanism that is intended to be checked in to version control, 362 // rather than just a transient lock for a particular local cache directory. 363 // (In that case we'd need to check hashes of _packed_ packages, too.) 364 // 365 // Internally, dirhash.Hash1 produces a string containing a sequence of 366 // newline-separated path+filehash pairs for all of the files in the 367 // directory, and then finally produces a hash of that string to return. 368 // In both cases, the hash algorithm is SHA256. 369 370 switch loc := loc.(type) { 371 372 case PackageLocalDir: 373 // We'll first dereference a possible symlink at our PackageDir location, 374 // as would be created if this package were linked in from another cache. 375 packageDir, err := filepath.EvalSymlinks(string(loc)) 376 if err != nil { 377 return "", err 378 } 379 380 // The dirhash.HashDir result is already in our expected h1:... 381 // format, so we can just convert directly to Hash. 382 s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1) 383 return Hash(s), err 384 385 case PackageLocalArchive: 386 archivePath, err := filepath.EvalSymlinks(string(loc)) 387 if err != nil { 388 return "", err 389 } 390 391 // The dirhash.HashDir result is already in our expected h1:... 392 // format, so we can just convert directly to Hash. 393 s, err := dirhash.HashZip(archivePath, dirhash.Hash1) 394 return Hash(s), err 395 396 default: 397 return "", fmt.Errorf("cannot hash package at %s", loc.String()) 398 } 399 } 400 401 // Hash computes a hash of the contents of the package at the location 402 // associated with the reciever, using whichever hash algorithm is the current 403 // default. 404 // 405 // This method will change to use new hash versions as they are introduced 406 // in future. If you need a specific hash version, call the method for that 407 // version directly instead, such as HashV1. 408 // 409 // Hash can be used only with the two local package location types 410 // PackageLocalDir and PackageLocalArchive, because it needs to access the 411 // contents of the indicated package in order to compute the hash. If given 412 // a non-local location this function will always return an error. 413 func (m PackageMeta) Hash() (Hash, error) { 414 return PackageHash(m.Location) 415 } 416 417 // MatchesHash returns true if the package at the location associated with 418 // the receiver matches the given hash, or false otherwise. 419 // 420 // If it cannot read from the given location, or if the given hash is in an 421 // unsupported format, MatchesHash returns an error. 422 // 423 // MatchesHash can be used only with the two local package location types 424 // PackageLocalDir and PackageLocalArchive, because it needs to access the 425 // contents of the indicated package in order to compute the hash. If given 426 // a non-local location this function will always return an error. 427 func (m PackageMeta) MatchesHash(want Hash) (bool, error) { 428 return PackageMatchesHash(m.Location, want) 429 } 430 431 // MatchesAnyHash returns true if the package at the location associated with 432 // the receiver matches at least one of the given hashes, or false otherwise. 433 // 434 // If it cannot read from the given location, MatchesHash returns an error. 435 // Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported 436 // hash format to be a successful non-match. 437 func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) { 438 return PackageMatchesAnyHash(m.Location, acceptable) 439 } 440 441 // HashV1 computes a hash of the contents of the package at the location 442 // associated with the receiver using hash algorithm 1. 443 // 444 // The hash covers the paths to files in the directory and the contents of 445 // those files. It does not cover other metadata about the files, such as 446 // permissions. 447 // 448 // HashV1 can be used only with the two local package location types 449 // PackageLocalDir and PackageLocalArchive, because it needs to access the 450 // contents of the indicated package in order to compute the hash. If given 451 // a non-local location this function will always return an error. 452 func (m PackageMeta) HashV1() (Hash, error) { 453 return PackageHashV1(m.Location) 454 }