github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/environs/simplestreams/datasource.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package simplestreams
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"net/url"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/juju/clock"
    16  	"github.com/juju/errors"
    17  	jujuhttp "github.com/juju/http/v2"
    18  	"github.com/juju/retry"
    19  	"github.com/juju/utils/v3"
    20  
    21  	corelogger "github.com/juju/juju/core/logger"
    22  )
    23  
    24  // A DataSource retrieves simplestreams metadata.
    25  type DataSource interface {
    26  	// Description describes the origin of this datasource.
    27  	// eg agent-metadata-url, cloud storage, keystone catalog etc.
    28  	Description() string
    29  
    30  	// Fetch loads the data at the specified relative path. It returns a reader from which
    31  	// the data can be retrieved as well as the full URL of the file. The full URL is typically
    32  	// used in log messages to help diagnose issues accessing the data.
    33  	Fetch(path string) (io.ReadCloser, string, error)
    34  
    35  	// URL returns the full URL of the path, as applicable to this datasource.
    36  	// This method is used primarily for logging purposes.
    37  	URL(path string) (string, error)
    38  
    39  	// PublicSigningKey returns the public key used to validate signed metadata.
    40  	PublicSigningKey() string
    41  
    42  	// Priority is an importance factor for Data Source. Higher number means higher priority.
    43  	// This is will allow to sort data sources in order of importance.
    44  	Priority() int
    45  
    46  	// RequireSigned indicates whether this data source requires signed data.
    47  	RequireSigned() bool
    48  }
    49  
    50  const (
    51  	// These values used as priority factors for sorting data source data.
    52  
    53  	// EXISTING_CLOUD_DATA is the lowest in priority.
    54  	// It is mostly used in merge functions
    55  	// where existing data does not need to be ranked.
    56  	EXISTING_CLOUD_DATA = 0
    57  
    58  	// DEFAULT_CLOUD_DATA is used for common cloud data that
    59  	// is shared an is publicly available.
    60  	DEFAULT_CLOUD_DATA = 10
    61  
    62  	// SPECIFIC_CLOUD_DATA is used to rank cloud specific data
    63  	// above commonly available.
    64  	// For e.g., openstack's "keystone catalogue".
    65  	SPECIFIC_CLOUD_DATA = 20
    66  
    67  	// CUSTOM_CLOUD_DATA is the highest available ranking and
    68  	// is given to custom data.
    69  	CUSTOM_CLOUD_DATA = 50
    70  )
    71  
    72  // A urlDataSource retrieves data from an HTTP URL.
    73  type urlDataSource struct {
    74  	description      string
    75  	baseURL          string
    76  	publicSigningKey string
    77  	priority         int
    78  	requireSigned    bool
    79  	httpClient       *jujuhttp.Client
    80  	clock            clock.Clock
    81  }
    82  
    83  // Config has values to be used in constructing a datasource.
    84  type Config struct {
    85  	// Description of the datasource
    86  	Description string
    87  
    88  	// BaseURL is the URL for this datasource.
    89  	BaseURL string
    90  
    91  	// HostnameVerification indicates whether to use self-signed credentials
    92  	// and not try to verify the hostname on the TLS/SSL certificates.
    93  	HostnameVerification bool
    94  
    95  	// PublicSigningKey is the public key used to validate signed metadata.
    96  	PublicSigningKey string
    97  
    98  	// Priority is an importance factor for the datasource. Higher number means
    99  	// higher priority. This is will facilitate sorting data sources in order of
   100  	// importance.
   101  	Priority int
   102  
   103  	// RequireSigned indicates whether this datasource requires signed data.
   104  	RequireSigned bool
   105  
   106  	// CACertificates contains an optional list of Certificate
   107  	// Authority certificates to be used to validate certificates
   108  	// of cloud infrastructure components
   109  	// The contents are Base64 encoded x.509 certs.
   110  	CACertificates []string
   111  
   112  	// Clock is used for retry. Will use clock.WallClock if nil.
   113  	Clock clock.Clock
   114  }
   115  
   116  // Validate checks that the baseURL is valid and the description is set.
   117  func (c *Config) Validate() error {
   118  	if c.Description == "" {
   119  		return errors.New("no description specified")
   120  	}
   121  	if _, err := url.Parse(c.BaseURL); err != nil {
   122  		return errors.Annotate(err, "base URL is not valid")
   123  	}
   124  	// TODO (hml) 2020-01-08
   125  	// Add validation for PublicSigningKey
   126  	return nil
   127  }
   128  
   129  // NewDataSource returns a new DataSource as defined
   130  // by the given config.
   131  func NewDataSource(cfg Config) DataSource {
   132  	// TODO (hml) 2020-01-08
   133  	// Move call to cfg.Validate() here and add return of error.
   134  	client := jujuhttp.NewClient(
   135  		jujuhttp.WithSkipHostnameVerification(!cfg.HostnameVerification),
   136  		jujuhttp.WithCACertificates(cfg.CACertificates...),
   137  		jujuhttp.WithLogger(logger.ChildWithLabels("http", corelogger.HTTP)),
   138  	)
   139  	return NewDataSourceWithClient(cfg, client)
   140  }
   141  
   142  // NewDataSourceWithClient returns a new DataSource as defines by the given
   143  // Config, but with the addition of a http.Client.
   144  func NewDataSourceWithClient(cfg Config, client *jujuhttp.Client) DataSource {
   145  	clk := cfg.Clock
   146  	if clk == nil {
   147  		clk = clock.WallClock
   148  	}
   149  	return &urlDataSource{
   150  		description:      cfg.Description,
   151  		baseURL:          cfg.BaseURL,
   152  		publicSigningKey: cfg.PublicSigningKey,
   153  		priority:         cfg.Priority,
   154  		requireSigned:    cfg.RequireSigned,
   155  		httpClient:       client,
   156  		clock:            clk,
   157  	}
   158  }
   159  
   160  // Description is defined in simplestreams.DataSource.
   161  func (u *urlDataSource) Description() string {
   162  	return u.description
   163  }
   164  
   165  func (u *urlDataSource) GoString() string {
   166  	return fmt.Sprintf("%v: urlDataSource(%q)", u.description, u.baseURL)
   167  }
   168  
   169  // urlJoin returns baseURL + relpath making sure to have a '/' between them
   170  // This doesn't try to do anything fancy with URL query or parameter bits
   171  // It also doesn't use path.Join because that normalizes slashes, and you need
   172  // to keep both slashes in 'http://'.
   173  func urlJoin(baseURL, relpath string) string {
   174  	if strings.HasSuffix(baseURL, "/") {
   175  		return baseURL + relpath
   176  	}
   177  	return baseURL + "/" + relpath
   178  }
   179  
   180  // Fetch is defined in simplestreams.DataSource.
   181  func (h *urlDataSource) Fetch(path string) (io.ReadCloser, string, error) {
   182  	var readCloser io.ReadCloser
   183  	dataURL := urlJoin(h.baseURL, path)
   184  	// dataURL can be http:// or file://
   185  	// MakeFileURL will only modify the URL if it's a file URL
   186  	dataURL = utils.MakeFileURL(dataURL)
   187  
   188  	err := retry.Call(retry.CallArgs{
   189  		Func: func() error {
   190  			var err error
   191  			readCloser, err = h.fetch(dataURL)
   192  			return err
   193  		},
   194  		IsFatalError: func(err error) bool {
   195  			return errors.Is(err, errors.NotFound) || errors.Is(err, errors.Unauthorized)
   196  		},
   197  		Attempts:    3,
   198  		Delay:       time.Second,
   199  		MaxDelay:    time.Second * 5,
   200  		BackoffFunc: retry.DoubleDelay,
   201  		Clock:       h.clock,
   202  	})
   203  	return readCloser, dataURL, err
   204  }
   205  
   206  func (h *urlDataSource) fetch(path string) (io.ReadCloser, error) {
   207  	resp, err := h.httpClient.Get(context.TODO(), path)
   208  	if err != nil {
   209  		// Callers of this mask the actual error.  Therefore warn here.
   210  		// This is called multiple times when a machine is created, we
   211  		// only need one success for images and one for tools.
   212  		logger.Warningf("Got error requesting %q: %v", path, err)
   213  		return nil, fmt.Errorf("cannot access URL %q: %w", path, err)
   214  	}
   215  	if resp.StatusCode != http.StatusOK {
   216  		_ = resp.Body.Close()
   217  		switch resp.StatusCode {
   218  		case http.StatusNotFound:
   219  			return nil, errors.NotFoundf("%q", path)
   220  		case http.StatusUnauthorized:
   221  			return nil, errors.Unauthorizedf("unauthorised access to URL %q", path)
   222  		}
   223  		return nil, fmt.Errorf("cannot access URL %q, %q", path, resp.Status)
   224  	}
   225  	return resp.Body, nil
   226  }
   227  
   228  // URL is defined in simplestreams.DataSource.
   229  func (h *urlDataSource) URL(path string) (string, error) {
   230  	return utils.MakeFileURL(urlJoin(h.baseURL, path)), nil
   231  }
   232  
   233  // PublicSigningKey is defined in simplestreams.DataSource.
   234  func (u *urlDataSource) PublicSigningKey() string {
   235  	return u.publicSigningKey
   236  }
   237  
   238  // Priority is defined in simplestreams.DataSource.
   239  func (h *urlDataSource) Priority() int {
   240  	return h.priority
   241  }
   242  
   243  // RequireSigned is defined in simplestreams.DataSource.
   244  func (h *urlDataSource) RequireSigned() bool {
   245  	return h.requireSigned
   246  }