github.com/opentofu/opentofu@v1.7.1/internal/getproviders/http_mirror_source.go (about)

     1  // Copyright (c) The OpenTofu Authors
     2  // SPDX-License-Identifier: MPL-2.0
     3  // Copyright (c) 2023 HashiCorp, Inc.
     4  // SPDX-License-Identifier: MPL-2.0
     5  
     6  package getproviders
     7  
     8  import (
     9  	"context"
    10  	"encoding/json"
    11  	"fmt"
    12  	"io"
    13  	"log"
    14  	"mime"
    15  	"net/http"
    16  	"net/url"
    17  	"path"
    18  	"strings"
    19  
    20  	"github.com/hashicorp/go-retryablehttp"
    21  	svchost "github.com/hashicorp/terraform-svchost"
    22  	svcauth "github.com/hashicorp/terraform-svchost/auth"
    23  	"golang.org/x/net/idna"
    24  
    25  	"github.com/opentofu/opentofu/internal/addrs"
    26  	"github.com/opentofu/opentofu/internal/httpclient"
    27  	"github.com/opentofu/opentofu/internal/logging"
    28  	"github.com/opentofu/opentofu/version"
    29  )
    30  
    31  // HTTPMirrorSource is a source that reads provider metadata from a provider
    32  // mirror that is accessible over the HTTP provider mirror protocol.
    33  type HTTPMirrorSource struct {
    34  	baseURL    *url.URL
    35  	creds      svcauth.CredentialsSource
    36  	httpClient *retryablehttp.Client
    37  }
    38  
    39  var _ Source = (*HTTPMirrorSource)(nil)
    40  
    41  // NewHTTPMirrorSource constructs and returns a new network mirror source with
    42  // the given base URL. The relative URL offsets defined by the HTTP mirror
    43  // protocol will be resolve relative to the given URL.
    44  //
    45  // The given URL must use the "https" scheme, or this function will panic.
    46  // (When the URL comes from user input, such as in the CLI config, it's the
    47  // UI/config layer's responsibility to validate this and return a suitable
    48  // error message for the end-user audience.)
    49  func NewHTTPMirrorSource(baseURL *url.URL, creds svcauth.CredentialsSource) *HTTPMirrorSource {
    50  	httpClient := httpclient.New()
    51  	httpClient.Timeout = requestTimeout
    52  	httpClient.CheckRedirect = func(req *http.Request, via []*http.Request) error {
    53  		// If we get redirected more than five times we'll assume we're
    54  		// in a redirect loop and bail out, rather than hanging forever.
    55  		if len(via) > 5 {
    56  			return fmt.Errorf("too many redirects")
    57  		}
    58  		return nil
    59  	}
    60  	return newHTTPMirrorSourceWithHTTPClient(baseURL, creds, httpClient)
    61  }
    62  
    63  func newHTTPMirrorSourceWithHTTPClient(baseURL *url.URL, creds svcauth.CredentialsSource, httpClient *http.Client) *HTTPMirrorSource {
    64  	if baseURL.Scheme != "https" {
    65  		panic("non-https URL for HTTP mirror")
    66  	}
    67  
    68  	// We borrow the retry settings and behaviors from the registry client,
    69  	// because our needs here are very similar to those of the registry client.
    70  	retryableClient := retryablehttp.NewClient()
    71  	retryableClient.HTTPClient = httpClient
    72  	retryableClient.RetryMax = discoveryRetry
    73  	retryableClient.RequestLogHook = requestLogHook
    74  	retryableClient.ErrorHandler = maxRetryErrorHandler
    75  
    76  	retryableClient.Logger = log.New(logging.LogOutput(), "", log.Flags())
    77  
    78  	return &HTTPMirrorSource{
    79  		baseURL:    baseURL,
    80  		creds:      creds,
    81  		httpClient: retryableClient,
    82  	}
    83  }
    84  
    85  // AvailableVersions retrieves the available versions for the given provider
    86  // from the object's underlying HTTP mirror service.
    87  func (s *HTTPMirrorSource) AvailableVersions(ctx context.Context, provider addrs.Provider) (VersionList, Warnings, error) {
    88  	log.Printf("[DEBUG] Querying available versions of provider %s at network mirror %s", provider.String(), s.baseURL.String())
    89  
    90  	endpointPath := path.Join(
    91  		provider.Hostname.String(),
    92  		provider.Namespace,
    93  		provider.Type,
    94  		"index.json",
    95  	)
    96  
    97  	statusCode, body, finalURL, err := s.get(ctx, endpointPath)
    98  	defer func() {
    99  		if body != nil {
   100  			body.Close()
   101  		}
   102  	}()
   103  	if err != nil {
   104  		return nil, nil, s.errQueryFailed(provider, err)
   105  	}
   106  
   107  	switch statusCode {
   108  	case http.StatusOK:
   109  		// Great!
   110  	case http.StatusNotFound:
   111  		return nil, nil, ErrProviderNotFound{
   112  			Provider: provider,
   113  		}
   114  	case http.StatusUnauthorized, http.StatusForbidden:
   115  		return nil, nil, s.errUnauthorized(finalURL)
   116  	default:
   117  		return nil, nil, s.errQueryFailed(provider, fmt.Errorf("server returned unsuccessful status %d", statusCode))
   118  	}
   119  
   120  	// If we got here then the response had status OK and so our body
   121  	// will be non-nil and should contain some JSON for us to parse.
   122  	type ResponseBody struct {
   123  		Versions map[string]struct{} `json:"versions"`
   124  	}
   125  	var bodyContent ResponseBody
   126  
   127  	dec := json.NewDecoder(body)
   128  	if err := dec.Decode(&bodyContent); err != nil {
   129  		return nil, nil, s.errQueryFailed(provider, fmt.Errorf("invalid response content from mirror server: %w", err))
   130  	}
   131  
   132  	if len(bodyContent.Versions) == 0 {
   133  		return nil, nil, nil
   134  	}
   135  	ret := make(VersionList, 0, len(bodyContent.Versions))
   136  	for versionStr := range bodyContent.Versions {
   137  		version, err := ParseVersion(versionStr)
   138  		if err != nil {
   139  			log.Printf("[WARN] Ignoring invalid %s version string %q in provider mirror response", provider, versionStr)
   140  			continue
   141  		}
   142  		ret = append(ret, version)
   143  	}
   144  
   145  	ret.Sort()
   146  	return ret, nil, nil
   147  }
   148  
   149  // PackageMeta retrieves metadata for the requested provider package
   150  // from the object's underlying HTTP mirror service.
   151  func (s *HTTPMirrorSource) PackageMeta(ctx context.Context, provider addrs.Provider, version Version, target Platform) (PackageMeta, error) {
   152  	log.Printf("[DEBUG] Finding package URL for %s v%s on %s via network mirror %s", provider.String(), version.String(), target.String(), s.baseURL.String())
   153  
   154  	endpointPath := path.Join(
   155  		provider.Hostname.String(),
   156  		provider.Namespace,
   157  		provider.Type,
   158  		version.String()+".json",
   159  	)
   160  
   161  	statusCode, body, finalURL, err := s.get(ctx, endpointPath)
   162  	defer func() {
   163  		if body != nil {
   164  			body.Close()
   165  		}
   166  	}()
   167  	if err != nil {
   168  		return PackageMeta{}, s.errQueryFailed(provider, err)
   169  	}
   170  
   171  	switch statusCode {
   172  	case http.StatusOK:
   173  		// Great!
   174  	case http.StatusNotFound:
   175  		// A 404 Not Found for a version we previously saw in index.json is
   176  		// a protocol error, so we'll report this as "query failed.
   177  		return PackageMeta{}, s.errQueryFailed(provider, fmt.Errorf("provider mirror does not have archive index for previously-reported %s version %s", provider, version))
   178  	case http.StatusUnauthorized, http.StatusForbidden:
   179  		return PackageMeta{}, s.errUnauthorized(finalURL)
   180  	default:
   181  		return PackageMeta{}, s.errQueryFailed(provider, fmt.Errorf("server returned unsuccessful status %d", statusCode))
   182  	}
   183  
   184  	// If we got here then the response had status OK and so our body
   185  	// will be non-nil and should contain some JSON for us to parse.
   186  	type ResponseArchiveMeta struct {
   187  		RelativeURL string `json:"url"`
   188  		Hashes      []string
   189  	}
   190  	type ResponseBody struct {
   191  		Archives map[string]*ResponseArchiveMeta `json:"archives"`
   192  	}
   193  	var bodyContent ResponseBody
   194  
   195  	dec := json.NewDecoder(body)
   196  	if err := dec.Decode(&bodyContent); err != nil {
   197  		return PackageMeta{}, s.errQueryFailed(provider, fmt.Errorf("invalid response content from mirror server: %w", err))
   198  	}
   199  
   200  	archiveMeta, ok := bodyContent.Archives[target.String()]
   201  	if !ok {
   202  		return PackageMeta{}, ErrPlatformNotSupported{
   203  			Provider:  provider,
   204  			Version:   version,
   205  			Platform:  target,
   206  			MirrorURL: s.baseURL,
   207  		}
   208  	}
   209  
   210  	relURL, err := url.Parse(archiveMeta.RelativeURL)
   211  	if err != nil {
   212  		return PackageMeta{}, s.errQueryFailed(
   213  			provider,
   214  			fmt.Errorf("provider mirror returned invalid URL %q: %w", archiveMeta.RelativeURL, err),
   215  		)
   216  	}
   217  	absURL := finalURL.ResolveReference(relURL)
   218  
   219  	ret := PackageMeta{
   220  		Provider:       provider,
   221  		Version:        version,
   222  		TargetPlatform: target,
   223  
   224  		Location: PackageHTTPURL(absURL.String()),
   225  		Filename: path.Base(absURL.Path),
   226  	}
   227  	// A network mirror might not provide any hashes at all, in which case
   228  	// the package has no source-defined authentication whatsoever.
   229  	if len(archiveMeta.Hashes) > 0 {
   230  		hashes := make([]Hash, 0, len(archiveMeta.Hashes))
   231  		for _, hashStr := range archiveMeta.Hashes {
   232  			hash, err := ParseHash(hashStr)
   233  			if err != nil {
   234  				return PackageMeta{}, s.errQueryFailed(
   235  					provider,
   236  					fmt.Errorf("provider mirror returned invalid provider hash %q: %w", hashStr, err),
   237  				)
   238  			}
   239  			hashes = append(hashes, hash)
   240  		}
   241  		ret.Authentication = NewPackageHashAuthentication(target, hashes)
   242  	}
   243  
   244  	return ret, nil
   245  }
   246  
   247  // ForDisplay returns a string description of the source for user-facing output.
   248  func (s *HTTPMirrorSource) ForDisplay(provider addrs.Provider) string {
   249  	return "provider mirror at " + s.baseURL.String()
   250  }
   251  
   252  // mirrorHost extracts the hostname portion of the configured base URL and
   253  // returns it as a svchost.Hostname, normalized in the usual ways.
   254  //
   255  // If the returned error is non-nil then the given hostname doesn't comply
   256  // with the IETF RFC 5891 section 5.3 and 5.4 validation rules, and thus cannot
   257  // be interpreted as a valid OpenTofu service host. The IDNA validation errors
   258  // are unfortunately usually not very user-friendly, but they are also
   259  // relatively rare because the IDNA normalization rules are quite tolerant.
   260  func (s *HTTPMirrorSource) mirrorHost() (svchost.Hostname, error) {
   261  	return svchostFromURL(s.baseURL)
   262  }
   263  
   264  // mirrorHostCredentials returns the HostCredentials, if any, for the hostname
   265  // included in the mirror base URL.
   266  //
   267  // It might return an error if the mirror base URL is invalid, or if the
   268  // credentials lookup itself fails.
   269  func (s *HTTPMirrorSource) mirrorHostCredentials() (svcauth.HostCredentials, error) {
   270  	hostname, err := s.mirrorHost()
   271  	if err != nil {
   272  		return nil, fmt.Errorf("invalid provider mirror base URL %s: %w", s.baseURL.String(), err)
   273  	}
   274  
   275  	if s.creds == nil {
   276  		// No host-specific credentials, then.
   277  		return nil, nil
   278  	}
   279  
   280  	return s.creds.ForHost(hostname)
   281  }
   282  
   283  // get is the shared functionality for querying a JSON index from a mirror.
   284  //
   285  // It only handles the raw HTTP request. The "body" return value is the
   286  // reader from the response if and only if the response status code is 200 OK
   287  // and the Content-Type is application/json. In all other cases it's nil.
   288  // If body is non-nil then the caller must close it after reading it.
   289  //
   290  // If the "finalURL" return value is not empty then it's the URL that actually
   291  // produced the returned response, possibly after following some redirects.
   292  func (s *HTTPMirrorSource) get(ctx context.Context, relativePath string) (statusCode int, body io.ReadCloser, finalURL *url.URL, error error) {
   293  	endpointPath, err := url.Parse(relativePath)
   294  	if err != nil {
   295  		// Should never happen because the caller should validate all of the
   296  		// components it's including in the path.
   297  		return 0, nil, nil, err
   298  	}
   299  	endpointURL := s.baseURL.ResolveReference(endpointPath)
   300  
   301  	req, err := retryablehttp.NewRequest("GET", endpointURL.String(), nil)
   302  	if err != nil {
   303  		return 0, nil, endpointURL, err
   304  	}
   305  	req = req.WithContext(ctx)
   306  	req.Request.Header.Set(terraformVersionHeader, version.String())
   307  	creds, err := s.mirrorHostCredentials()
   308  	if err != nil {
   309  		return 0, nil, endpointURL, fmt.Errorf("failed to determine request credentials: %w", err)
   310  	}
   311  	if creds != nil {
   312  		// Note that if the initial requests gets redirected elsewhere
   313  		// then the credentials will still be included in the new request,
   314  		// even if they are on a different hostname. This is intentional
   315  		// and consistent with how we handle credentials for other
   316  		// OpenTofu-native services, because the user model is to configure
   317  		// credentials for the "friendly hostname" they configured, not for
   318  		// whatever hostname ends up ultimately serving the request as an
   319  		// implementation detail.
   320  		creds.PrepareRequest(req.Request)
   321  	}
   322  
   323  	resp, err := s.httpClient.Do(req)
   324  	if err != nil {
   325  		return 0, nil, endpointURL, err
   326  	}
   327  	defer func() {
   328  		// If we're not returning the body then we'll close it
   329  		// before we return.
   330  		if body == nil {
   331  			resp.Body.Close()
   332  		}
   333  	}()
   334  	// After this point, our final URL return value should always be the
   335  	// one from resp.Request, because that takes into account any redirects
   336  	// we followed along the way.
   337  	finalURL = resp.Request.URL
   338  
   339  	if resp.StatusCode == http.StatusOK {
   340  		// If and only if we get an OK response, we'll check that the response
   341  		// type is JSON and return the body reader.
   342  		ct := resp.Header.Get("Content-Type")
   343  		mt, params, err := mime.ParseMediaType(ct)
   344  		if err != nil {
   345  			return 0, nil, finalURL, fmt.Errorf("response has invalid Content-Type: %w", err)
   346  		}
   347  		if mt != "application/json" {
   348  			return 0, nil, finalURL, fmt.Errorf("response has invalid Content-Type: must be application/json")
   349  		}
   350  		for name := range params {
   351  			// The application/json content-type has no defined parameters,
   352  			// but some servers are configured to include a redundant "charset"
   353  			// parameter anyway, presumably out of a sense of completeness.
   354  			// We'll ignore them but warn that we're ignoring them in case the
   355  			// subsequent parsing fails due to the server trying to use an
   356  			// unsupported character encoding. (RFC 7159 defines its own
   357  			// JSON-specific character encoding rules.)
   358  			log.Printf("[WARN] Network mirror returned %q as part of its JSON content type, which is not defined. Ignoring.", name)
   359  		}
   360  		body = resp.Body
   361  	}
   362  
   363  	return resp.StatusCode, body, finalURL, nil
   364  }
   365  
   366  func (s *HTTPMirrorSource) errQueryFailed(provider addrs.Provider, err error) error {
   367  	if err == context.Canceled {
   368  		// This one has a special error type so that callers can
   369  		// handle it in a different way.
   370  		return ErrRequestCanceled{}
   371  	}
   372  	return ErrQueryFailed{
   373  		Provider:  provider,
   374  		Wrapped:   err,
   375  		MirrorURL: s.baseURL,
   376  	}
   377  }
   378  
   379  func (s *HTTPMirrorSource) errUnauthorized(finalURL *url.URL) error {
   380  	hostname, err := svchostFromURL(finalURL)
   381  	if err != nil {
   382  		// Again, weird but we'll tolerate it.
   383  		return fmt.Errorf("invalid credentials for %s", finalURL)
   384  	}
   385  
   386  	return ErrUnauthorized{
   387  		Hostname: hostname,
   388  
   389  		// We can't easily tell from here whether we had credentials or
   390  		// not, so for now we'll just assume we did because "host rejected
   391  		// the given credentials" is, hopefully, still understandable in
   392  		// the event that there were none. (If this ends up being confusing
   393  		// in practice then we'll need to do some refactoring of how
   394  		// we handle credentials in this source.)
   395  		HaveCredentials: true,
   396  	}
   397  }
   398  
   399  func svchostFromURL(u *url.URL) (svchost.Hostname, error) {
   400  	raw := u.Host
   401  
   402  	// When "friendly hostnames" appear in OpenTofu-specific identifiers we
   403  	// typically constrain their syntax more strictly than the
   404  	// Internationalized Domain Name specifications call for, such as
   405  	// forbidding direct use of punycode, but in this case we're just
   406  	// working with a standard http: or https: URL and so we'll first use the
   407  	// IDNA "lookup" rules directly, with no additional notational constraints,
   408  	// to effectively normalize away the differences that would normally
   409  	// produce an error.
   410  	var portPortion string
   411  	if colonPos := strings.Index(raw, ":"); colonPos != -1 {
   412  		raw, portPortion = raw[:colonPos], raw[colonPos:]
   413  	}
   414  	// HTTPMirrorSource requires all URLs to be https URLs, because running
   415  	// a network mirror over HTTP would potentially transmit any configured
   416  	// credentials in cleartext. Therefore we don't need to do any special
   417  	// handling of default ports here, because svchost.Hostname already
   418  	// considers the absense of a port to represent the standard HTTPS port
   419  	// 443, and will normalize away an explicit specification of port 443
   420  	// in svchost.ForComparison below.
   421  
   422  	normalized, err := idna.Display.ToUnicode(raw)
   423  	if err != nil {
   424  		return svchost.Hostname(""), err
   425  	}
   426  
   427  	// If ToUnicode succeeded above then "normalized" is now a hostname in the
   428  	// normalized IDNA form, with any direct punycode already interpreted and
   429  	// the case folding and other normalization rules applied. It should
   430  	// therefore now be accepted by svchost.ForComparison with no additional
   431  	// errors, but the port portion can still potentially be invalid.
   432  	return svchost.ForComparison(normalized + portPortion)
   433  }