github.com/terramate-io/tf@v0.0.0-20230830114523-fce866b4dfcd/getproviders/hash.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package getproviders
     5  
     6  import (
     7  	"crypto/sha256"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  
    14  	"golang.org/x/mod/sumdb/dirhash"
    15  )
    16  
    17  // Hash is a specially-formatted string representing a checksum of a package
    18  // or the contents of the package.
    19  //
    20  // A Hash string is always starts with a scheme, which is a short series of
    21  // alphanumeric characters followed by a colon, and then the remainder of the
    22  // string has a different meaning depending on the scheme prefix.
    23  //
    24  // The currently-valid schemes are defined as the constants of type HashScheme
    25  // in this package.
    26  //
    27  // Callers outside of this package must not create Hash values via direct
    28  // conversion. Instead, use either the HashScheme.New method on one of the
    29  // HashScheme contents (for a hash of a particular scheme) or the ParseHash
    30  // function (if hashes of any scheme are acceptable).
    31  type Hash string
    32  
    33  // NilHash is the zero value of Hash. It isn't a valid hash, so all of its
    34  // methods will panic.
    35  const NilHash = Hash("")
    36  
    37  // ParseHash parses the string representation of a Hash into a Hash value.
    38  //
    39  // A particular version of Terraform only supports a fixed set of hash schemes,
    40  // but this function intentionally allows unrecognized schemes so that we can
    41  // silently ignore other schemes that may be introduced in the future. For
    42  // that reason, the Scheme method of the returned Hash may return a value that
    43  // isn't in one of the HashScheme constants in this package.
    44  //
    45  // This function doesn't verify that the value portion of the given hash makes
    46  // sense for the given scheme. Invalid values are just considered to not match
    47  // any packages.
    48  //
    49  // If this function returns an error then the returned Hash is invalid and
    50  // must not be used.
    51  func ParseHash(s string) (Hash, error) {
    52  	colon := strings.Index(s, ":")
    53  	if colon < 1 { // 1 because a zero-length scheme is not allowed
    54  		return NilHash, fmt.Errorf("hash string must start with a scheme keyword followed by a colon")
    55  	}
    56  	return Hash(s), nil
    57  }
    58  
    59  // MustParseHash is a wrapper around ParseHash that panics if it returns an
    60  // error.
    61  func MustParseHash(s string) Hash {
    62  	hash, err := ParseHash(s)
    63  	if err != nil {
    64  		panic(err.Error())
    65  	}
    66  	return hash
    67  }
    68  
    69  // Scheme returns the scheme of the recieving hash. If the receiver is not
    70  // using valid syntax then this method will panic.
    71  func (h Hash) Scheme() HashScheme {
    72  	colon := strings.Index(string(h), ":")
    73  	if colon < 0 {
    74  		panic(fmt.Sprintf("invalid hash string %q", h))
    75  	}
    76  	return HashScheme(h[:colon+1])
    77  }
    78  
    79  // HasScheme returns true if the given scheme matches the receiver's scheme,
    80  // or false otherwise.
    81  //
    82  // If the receiver is not using valid syntax then this method will panic.
    83  func (h Hash) HasScheme(want HashScheme) bool {
    84  	return h.Scheme() == want
    85  }
    86  
    87  // Value returns the scheme-specific value from the recieving hash. The
    88  // meaning of this value depends on the scheme.
    89  //
    90  // If the receiver is not using valid syntax then this method will panic.
    91  func (h Hash) Value() string {
    92  	colon := strings.Index(string(h), ":")
    93  	if colon < 0 {
    94  		panic(fmt.Sprintf("invalid hash string %q", h))
    95  	}
    96  	return string(h[colon+1:])
    97  }
    98  
    99  // String returns a string representation of the receiving hash.
   100  func (h Hash) String() string {
   101  	return string(h)
   102  }
   103  
   104  // GoString returns a Go syntax representation of the receiving hash.
   105  //
   106  // This is here primarily to help with producing descriptive test failure
   107  // output; these results are not particularly useful at runtime.
   108  func (h Hash) GoString() string {
   109  	if h == NilHash {
   110  		return "getproviders.NilHash"
   111  	}
   112  	switch scheme := h.Scheme(); scheme {
   113  	case HashScheme1:
   114  		return fmt.Sprintf("getproviders.HashScheme1.New(%q)", h.Value())
   115  	case HashSchemeZip:
   116  		return fmt.Sprintf("getproviders.HashSchemeZip.New(%q)", h.Value())
   117  	default:
   118  		// This fallback is for when we encounter lock files or API responses
   119  		// with hash schemes that the current version of Terraform isn't
   120  		// familiar with. They were presumably introduced in a later version.
   121  		return fmt.Sprintf("getproviders.HashScheme(%q).New(%q)", scheme, h.Value())
   122  	}
   123  }
   124  
   125  // HashScheme is an enumeration of schemes that are allowed for values of type
   126  // Hash.
   127  type HashScheme string
   128  
   129  const (
   130  	// HashScheme1 is the scheme identifier for the first hash scheme.
   131  	//
   132  	// Use HashV1 (or one of its wrapper functions) to calculate hashes with
   133  	// this scheme.
   134  	HashScheme1 HashScheme = HashScheme("h1:")
   135  
   136  	// HashSchemeZip is the scheme identifier for the legacy hash scheme that
   137  	// applies to distribution archives (.zip files) rather than package
   138  	// contents, and can therefore only be verified against the original
   139  	// distribution .zip file, not an extracted directory.
   140  	//
   141  	// Use PackageHashLegacyZipSHA to calculate hashes with this scheme.
   142  	HashSchemeZip HashScheme = HashScheme("zh:")
   143  )
   144  
   145  // New creates a new Hash value with the receiver as its scheme and the given
   146  // raw string as its value.
   147  //
   148  // It's the caller's responsibility to make sure that the given value makes
   149  // sense for the selected scheme.
   150  func (hs HashScheme) New(value string) Hash {
   151  	return Hash(string(hs) + value)
   152  }
   153  
   154  // PackageHash computes a hash of the contents of the package at the given
   155  // location, using whichever hash algorithm is the current default.
   156  //
   157  // Currently, this method returns version 1 hashes as produced by the
   158  // function PackageHashV1, but this function may switch to other versions in
   159  // later releases. Call PackageHashV1 directly if you specifically need a V1
   160  // hash.
   161  //
   162  // PackageHash can be used only with the two local package location types
   163  // PackageLocalDir and PackageLocalArchive, because it needs to access the
   164  // contents of the indicated package in order to compute the hash. If given
   165  // a non-local location this function will always return an error.
   166  func PackageHash(loc PackageLocation) (Hash, error) {
   167  	return PackageHashV1(loc)
   168  }
   169  
   170  // PackageMatchesHash returns true if the package at the given location matches
   171  // the given hash, or false otherwise.
   172  //
   173  // If it cannot read from the given location, or if the given hash is in an
   174  // unsupported format, PackageMatchesHash returns an error.
   175  //
   176  // There is currently only one hash format, as implemented by HashV1. However,
   177  // if others are introduced in future PackageMatchesHash may accept multiple
   178  // formats, and may generate errors for any formats that become obsolete.
   179  //
   180  // PackageMatchesHash can be used only with the two local package location types
   181  // PackageLocalDir and PackageLocalArchive, because it needs to access the
   182  // contents of the indicated package in order to compute the hash. If given
   183  // a non-local location this function will always return an error.
   184  func PackageMatchesHash(loc PackageLocation, want Hash) (bool, error) {
   185  	switch want.Scheme() {
   186  	case HashScheme1:
   187  		got, err := PackageHashV1(loc)
   188  		if err != nil {
   189  			return false, err
   190  		}
   191  		return got == want, nil
   192  	case HashSchemeZip:
   193  		archiveLoc, ok := loc.(PackageLocalArchive)
   194  		if !ok {
   195  			return false, fmt.Errorf(`ziphash scheme ("zh:" prefix) is not supported for unpacked provider packages`)
   196  		}
   197  		got, err := PackageHashLegacyZipSHA(archiveLoc)
   198  		if err != nil {
   199  			return false, err
   200  		}
   201  		return got == want, nil
   202  	default:
   203  		return false, fmt.Errorf("unsupported hash format (this may require a newer version of Terraform)")
   204  	}
   205  }
   206  
   207  // PackageMatchesAnyHash returns true if the package at the given location
   208  // matches at least one of the given hashes, or false otherwise.
   209  //
   210  // If it cannot read from the given location, PackageMatchesAnyHash returns an
   211  // error. Unlike the singular PackageMatchesHash, PackageMatchesAnyHash
   212  // considers unsupported hash formats as successfully non-matching, rather
   213  // than returning an error.
   214  //
   215  // PackageMatchesAnyHash can be used only with the two local package location
   216  // types PackageLocalDir and PackageLocalArchive, because it needs to access the
   217  // contents of the indicated package in order to compute the hash. If given
   218  // a non-local location this function will always return an error.
   219  func PackageMatchesAnyHash(loc PackageLocation, allowed []Hash) (bool, error) {
   220  	// It's likely that we'll have multiple hashes of the same scheme in
   221  	// the "allowed" set, in which case we'll avoid repeatedly re-reading the
   222  	// given package by caching its result for each of the two
   223  	// currently-supported hash formats. These will be NilHash until we
   224  	// encounter the first hash of the corresponding scheme.
   225  	var v1Hash, zipHash Hash
   226  	for _, want := range allowed {
   227  		switch want.Scheme() {
   228  		case HashScheme1:
   229  			if v1Hash == NilHash {
   230  				got, err := PackageHashV1(loc)
   231  				if err != nil {
   232  					return false, err
   233  				}
   234  				v1Hash = got
   235  			}
   236  			if v1Hash == want {
   237  				return true, nil
   238  			}
   239  		case HashSchemeZip:
   240  			archiveLoc, ok := loc.(PackageLocalArchive)
   241  			if !ok {
   242  				// A zip hash can never match an unpacked directory
   243  				continue
   244  			}
   245  			if zipHash == NilHash {
   246  				got, err := PackageHashLegacyZipSHA(archiveLoc)
   247  				if err != nil {
   248  					return false, err
   249  				}
   250  				zipHash = got
   251  			}
   252  			if zipHash == want {
   253  				return true, nil
   254  			}
   255  		default:
   256  			// If it's not a supported format then it can't match.
   257  			continue
   258  		}
   259  	}
   260  	return false, nil
   261  }
   262  
   263  // PreferredHashes examines all of the given hash strings and returns the one
   264  // that the current version of Terraform considers to provide the strongest
   265  // verification.
   266  //
   267  // Returns an empty string if none of the given hashes are of a supported
   268  // format. If PreferredHash returns a non-empty string then it will be one
   269  // of the hash strings in "given", and that hash is the one that must pass
   270  // verification in order for a package to be considered valid.
   271  func PreferredHashes(given []Hash) []Hash {
   272  	// For now this is just filtering for the two hash formats we support,
   273  	// both of which are considered equally "preferred". If we introduce
   274  	// a new scheme like "h2:" in future then, depending on the characteristics
   275  	// of that new version, it might make sense to rework this function so
   276  	// that it only returns "h1:" hashes if the input has no "h2:" hashes,
   277  	// so that h2: is preferred when possible and h1: is only a fallback for
   278  	// interacting with older systems that haven't been updated with the new
   279  	// scheme yet.
   280  
   281  	var ret []Hash
   282  	for _, hash := range given {
   283  		switch hash.Scheme() {
   284  		case HashScheme1, HashSchemeZip:
   285  			ret = append(ret, hash)
   286  		}
   287  	}
   288  	return ret
   289  }
   290  
   291  // PackageHashLegacyZipSHA implements the old provider package hashing scheme
   292  // of taking a SHA256 hash of the containing .zip archive itself, rather than
   293  // of the contents of the archive.
   294  //
   295  // The result is a hash string with the "zh:" prefix, which is intended to
   296  // represent "zip hash". After the prefix is a lowercase-hex encoded SHA256
   297  // checksum, intended to exactly match the formatting used in the registry
   298  // API (apart from the prefix) so that checksums can be more conveniently
   299  // compared by humans.
   300  //
   301  // Because this hashing scheme uses the official provider .zip file as its
   302  // input, it accepts only PackageLocalArchive locations.
   303  func PackageHashLegacyZipSHA(loc PackageLocalArchive) (Hash, error) {
   304  	archivePath, err := filepath.EvalSymlinks(string(loc))
   305  	if err != nil {
   306  		return "", err
   307  	}
   308  
   309  	f, err := os.Open(archivePath)
   310  	if err != nil {
   311  		return "", err
   312  	}
   313  	defer f.Close()
   314  
   315  	h := sha256.New()
   316  	_, err = io.Copy(h, f)
   317  	if err != nil {
   318  		return "", err
   319  	}
   320  
   321  	gotHash := h.Sum(nil)
   322  	return HashSchemeZip.New(fmt.Sprintf("%x", gotHash)), nil
   323  }
   324  
   325  // HashLegacyZipSHAFromSHA is a convenience method to produce the schemed-string
   326  // hash format from an already-calculated hash of a provider .zip archive.
   327  //
   328  // This just adds the "zh:" prefix and encodes the string in hex, so that the
   329  // result is in the same format as PackageHashLegacyZipSHA.
   330  func HashLegacyZipSHAFromSHA(sum [sha256.Size]byte) Hash {
   331  	return HashSchemeZip.New(fmt.Sprintf("%x", sum[:]))
   332  }
   333  
   334  // PackageHashV1 computes a hash of the contents of the package at the given
   335  // location using hash algorithm 1. The resulting Hash is guaranteed to have
   336  // the scheme HashScheme1.
   337  //
   338  // The hash covers the paths to files in the directory and the contents of
   339  // those files. It does not cover other metadata about the files, such as
   340  // permissions.
   341  //
   342  // This function is named "PackageHashV1" in anticipation of other hashing
   343  // algorithms being added in a backward-compatible way in future. The result
   344  // from PackageHashV1 always begins with the prefix "h1:" so that callers can
   345  // distinguish the results of potentially multiple different hash algorithms in
   346  // future.
   347  //
   348  // PackageHashV1 can be used only with the two local package location types
   349  // PackageLocalDir and PackageLocalArchive, because it needs to access the
   350  // contents of the indicated package in order to compute the hash. If given
   351  // a non-local location this function will always return an error.
   352  func PackageHashV1(loc PackageLocation) (Hash, error) {
   353  	// Our HashV1 is really just the Go Modules hash version 1, which is
   354  	// sufficient for our needs and already well-used for identity of
   355  	// Go Modules distribution packages. It is also blocked from incompatible
   356  	// changes by being used in a wide array of go.sum files already.
   357  	//
   358  	// In particular, it also supports computing an equivalent hash from
   359  	// an unpacked zip file, which is not important for Terraform workflow
   360  	// today but is likely to become so in future if we adopt a top-level
   361  	// lockfile mechanism that is intended to be checked in to version control,
   362  	// rather than just a transient lock for a particular local cache directory.
   363  	// (In that case we'd need to check hashes of _packed_ packages, too.)
   364  	//
   365  	// Internally, dirhash.Hash1 produces a string containing a sequence of
   366  	// newline-separated path+filehash pairs for all of the files in the
   367  	// directory, and then finally produces a hash of that string to return.
   368  	// In both cases, the hash algorithm is SHA256.
   369  
   370  	switch loc := loc.(type) {
   371  
   372  	case PackageLocalDir:
   373  		// We'll first dereference a possible symlink at our PackageDir location,
   374  		// as would be created if this package were linked in from another cache.
   375  		packageDir, err := filepath.EvalSymlinks(string(loc))
   376  		if err != nil {
   377  			return "", err
   378  		}
   379  
   380  		// The dirhash.HashDir result is already in our expected h1:...
   381  		// format, so we can just convert directly to Hash.
   382  		s, err := dirhash.HashDir(packageDir, "", dirhash.Hash1)
   383  		return Hash(s), err
   384  
   385  	case PackageLocalArchive:
   386  		archivePath, err := filepath.EvalSymlinks(string(loc))
   387  		if err != nil {
   388  			return "", err
   389  		}
   390  
   391  		// The dirhash.HashDir result is already in our expected h1:...
   392  		// format, so we can just convert directly to Hash.
   393  		s, err := dirhash.HashZip(archivePath, dirhash.Hash1)
   394  		return Hash(s), err
   395  
   396  	default:
   397  		return "", fmt.Errorf("cannot hash package at %s", loc.String())
   398  	}
   399  }
   400  
   401  // Hash computes a hash of the contents of the package at the location
   402  // associated with the reciever, using whichever hash algorithm is the current
   403  // default.
   404  //
   405  // This method will change to use new hash versions as they are introduced
   406  // in future. If you need a specific hash version, call the method for that
   407  // version directly instead, such as HashV1.
   408  //
   409  // Hash can be used only with the two local package location types
   410  // PackageLocalDir and PackageLocalArchive, because it needs to access the
   411  // contents of the indicated package in order to compute the hash. If given
   412  // a non-local location this function will always return an error.
   413  func (m PackageMeta) Hash() (Hash, error) {
   414  	return PackageHash(m.Location)
   415  }
   416  
   417  // MatchesHash returns true if the package at the location associated with
   418  // the receiver matches the given hash, or false otherwise.
   419  //
   420  // If it cannot read from the given location, or if the given hash is in an
   421  // unsupported format, MatchesHash returns an error.
   422  //
   423  // MatchesHash can be used only with the two local package location types
   424  // PackageLocalDir and PackageLocalArchive, because it needs to access the
   425  // contents of the indicated package in order to compute the hash. If given
   426  // a non-local location this function will always return an error.
   427  func (m PackageMeta) MatchesHash(want Hash) (bool, error) {
   428  	return PackageMatchesHash(m.Location, want)
   429  }
   430  
   431  // MatchesAnyHash returns true if the package at the location associated with
   432  // the receiver matches at least one of the given hashes, or false otherwise.
   433  //
   434  // If it cannot read from the given location, MatchesHash returns an error.
   435  // Unlike the signular MatchesHash, MatchesAnyHash considers an unsupported
   436  // hash format to be a successful non-match.
   437  func (m PackageMeta) MatchesAnyHash(acceptable []Hash) (bool, error) {
   438  	return PackageMatchesAnyHash(m.Location, acceptable)
   439  }
   440  
   441  // HashV1 computes a hash of the contents of the package at the location
   442  // associated with the receiver using hash algorithm 1.
   443  //
   444  // The hash covers the paths to files in the directory and the contents of
   445  // those files. It does not cover other metadata about the files, such as
   446  // permissions.
   447  //
   448  // HashV1 can be used only with the two local package location types
   449  // PackageLocalDir and PackageLocalArchive, because it needs to access the
   450  // contents of the indicated package in order to compute the hash. If given
   451  // a non-local location this function will always return an error.
   452  func (m PackageMeta) HashV1() (Hash, error) {
   453  	return PackageHashV1(m.Location)
   454  }