github.com/mvdan/u-root-coreutils@v0.0.0-20230122170626-c2eef2898555/pkg/curl/schemes.go (about)

     1  // Copyright 2017-2020 the u-root Authors. All rights reserved
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package curl implements routines to fetch files given a URL.
     6  //
     7  // curl currently supports HTTP, TFTP, and local files.
     8  package curl
     9  
    10  import (
    11  	"context"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"log"
    16  	"net"
    17  	"net/http"
    18  	"net/url"
    19  	"os"
    20  	"path/filepath"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/cenkalti/backoff/v4"
    25  	"github.com/mvdan/u-root-coreutils/pkg/uio"
    26  	"pack.ag/tftp"
    27  )
    28  
    29  // ErrNoSuchScheme is returned by Schemes.Fetch and
    30  // Schemes.LazyFetch if there is no registered FileScheme
    31  // implementation for the given URL scheme.
    32  var ErrNoSuchScheme = errors.New("no such scheme")
    33  var ErrStatusNotOk = errors.New("not status 200")
    34  
    35  // File is a reference to a file fetched through this library.
    36  type File interface {
    37  	fmt.Stringer
    38  	// URL is the file's original URL.
    39  	URL() *url.URL
    40  }
    41  
    42  // FileWithCache is a io.ReaderAt with a nice stringer for file's original URL.
    43  type FileWithCache interface {
    44  	io.ReaderAt
    45  	File
    46  }
    47  
    48  // FileWithoutCache is a io.Reader with a nice stringer for file's original URL.
    49  type FileWithoutCache interface {
    50  	io.Reader
    51  	File
    52  }
    53  
    54  // FileScheme represents the implementation of a URL scheme and gives access to
    55  // fetching files of that scheme.
    56  //
    57  // For example, an http FileScheme implementation would fetch files using
    58  // the HTTP protocol.
    59  type FileScheme interface {
    60  	// Fetch returns a reader that gives the contents of `u`.
    61  	//
    62  	// It may do so by fetching `u` and placing it in a buffer, or by
    63  	// returning an io.ReaderAt that fetchs the file.
    64  	Fetch(ctx context.Context, u *url.URL) (io.ReaderAt, error)
    65  	FetchWithoutCache(ctx context.Context, u *url.URL) (io.Reader, error)
    66  }
    67  
    68  var (
    69  	// DefaultHTTPClient is the default HTTP FileScheme.
    70  	//
    71  	// It is not recommended to use this for HTTPS. We recommend creating an
    72  	// http.Client that accepts only a private pool of certificates.
    73  	DefaultHTTPClient = NewHTTPClient(http.DefaultClient)
    74  
    75  	// DefaultTFTPClient is the default TFTP FileScheme.
    76  	DefaultTFTPClient = NewTFTPClient(tftp.ClientMode(tftp.ModeOctet), tftp.ClientBlocksize(1450), tftp.ClientWindowsize(64))
    77  
    78  	// DefaultSchemes are the schemes supported by default.
    79  	DefaultSchemes = Schemes{
    80  		"tftp": DefaultTFTPClient,
    81  		"http": DefaultHTTPClient,
    82  		"file": &LocalFileClient{},
    83  	}
    84  )
    85  
    86  // URLError is an error involving URLs.
    87  type URLError struct {
    88  	URL *url.URL
    89  	Err error
    90  }
    91  
    92  // Error implements error.Error.
    93  func (s *URLError) Error() string {
    94  	return fmt.Sprintf("encountered error %v with %q", s.Err, s.URL)
    95  }
    96  
    97  // Unwrap unwraps the underlying error.
    98  func (s *URLError) Unwrap() error {
    99  	return s.Err
   100  }
   101  
   102  // IsURLError returns true iff err is a URLError.
   103  func IsURLError(err error) bool {
   104  	_, ok := err.(*URLError)
   105  	return ok
   106  }
   107  
   108  // Schemes is a map of URL scheme identifier -> implementation that can
   109  // fetch a file for that scheme.
   110  type Schemes map[string]FileScheme
   111  
   112  // RegisterScheme calls DefaultSchemes.Register.
   113  func RegisterScheme(scheme string, fs FileScheme) {
   114  	DefaultSchemes.Register(scheme, fs)
   115  }
   116  
   117  // Register registers a scheme identified by `scheme` to be `fs`.
   118  func (s Schemes) Register(scheme string, fs FileScheme) {
   119  	s[scheme] = fs
   120  }
   121  
   122  // Fetch fetchs a file via DefaultSchemes.
   123  func Fetch(ctx context.Context, u *url.URL) (FileWithCache, error) {
   124  	return DefaultSchemes.Fetch(ctx, u)
   125  }
   126  
   127  func FetchWithoutCache(ctx context.Context, u *url.URL) (FileWithoutCache, error) {
   128  	return DefaultSchemes.FetchWithoutCache(ctx, u)
   129  }
   130  
   131  // cacheFile is an io.ReaderAt with a nice Stringer.
   132  type cacheFile struct {
   133  	io.ReaderAt
   134  
   135  	url *url.URL
   136  }
   137  
   138  // URL returns the cacheFile URL.
   139  func (f cacheFile) URL() *url.URL {
   140  	return f.url
   141  }
   142  
   143  // String implements fmt.Stringer.
   144  func (f cacheFile) String() string {
   145  	return f.url.String()
   146  }
   147  
   148  // file is an io.Reader with a nice Stringer.
   149  type file struct {
   150  	io.Reader
   151  
   152  	url *url.URL
   153  }
   154  
   155  // URL returns the file URL.
   156  func (f file) URL() *url.URL {
   157  	return f.url
   158  }
   159  
   160  // String implements fmt.Stringer.
   161  func (f file) String() string {
   162  	return f.url.String()
   163  }
   164  
   165  // Fetch fetchs the file with the given `u`. `u.Scheme` is used to
   166  // select the FileScheme via `s`.
   167  //
   168  // If `s` does not contain a FileScheme for `u.Scheme`, ErrNoSuchScheme is
   169  // returned.
   170  //
   171  // Content is cached in memory as it reads.
   172  func (s Schemes) Fetch(ctx context.Context, u *url.URL) (FileWithCache, error) {
   173  	fg, ok := s[u.Scheme]
   174  	if !ok {
   175  		return nil, &URLError{URL: u, Err: ErrNoSuchScheme}
   176  	}
   177  	r, err := fg.Fetch(ctx, u)
   178  	if err != nil {
   179  		return nil, &URLError{URL: u, Err: err}
   180  	}
   181  	return &cacheFile{ReaderAt: r, url: u}, nil
   182  }
   183  
   184  // FetchWithoutCache is same as Fetch, but returns a io.Reader of File that
   185  // does not cache read content.
   186  func (s Schemes) FetchWithoutCache(ctx context.Context, u *url.URL) (FileWithoutCache, error) {
   187  	fg, ok := s[u.Scheme]
   188  	if !ok {
   189  		return nil, &URLError{URL: u, Err: ErrNoSuchScheme}
   190  	}
   191  	r, err := fg.FetchWithoutCache(ctx, u)
   192  	if err != nil {
   193  		return nil, &URLError{URL: u, Err: err}
   194  	}
   195  	return &file{Reader: r, url: u}, nil
   196  }
   197  
   198  // LazyFetch calls LazyFetch on DefaultSchemes.
   199  func LazyFetch(u *url.URL) (FileWithCache, error) {
   200  	return DefaultSchemes.LazyFetch(u)
   201  }
   202  
   203  // LazyFetch returns a reader that will Fetch the file given by `u` when
   204  // Read is called, based on `u`s scheme. See Schemes.Fetch for more
   205  // details.
   206  func (s Schemes) LazyFetch(u *url.URL) (FileWithCache, error) {
   207  	fg, ok := s[u.Scheme]
   208  	if !ok {
   209  		return nil, &URLError{URL: u, Err: ErrNoSuchScheme}
   210  	}
   211  
   212  	return &cacheFile{
   213  		url: u,
   214  		ReaderAt: uio.NewLazyOpenerAt(u.String(), func() (io.ReaderAt, error) {
   215  			// TODO
   216  			r, err := fg.Fetch(context.TODO(), u)
   217  			if err != nil {
   218  				return nil, &URLError{URL: u, Err: err}
   219  			}
   220  			return r, nil
   221  		}),
   222  	}, nil
   223  }
   224  
   225  // TFTPClient implements FileScheme for TFTP files.
   226  type TFTPClient struct {
   227  	opts []tftp.ClientOpt
   228  }
   229  
   230  // NewTFTPClient returns a new TFTP client based on the given tftp.ClientOpt.
   231  func NewTFTPClient(opts ...tftp.ClientOpt) FileScheme {
   232  	return &TFTPClient{
   233  		opts: opts,
   234  	}
   235  }
   236  
   237  func tftpFetch(_ context.Context, t *TFTPClient, u *url.URL) (io.Reader, error) {
   238  	// TODO(hugelgupf): These clients are basically stateless, except for
   239  	// the options. Figure out whether you actually have to re-establish
   240  	// this connection every time. Audit the TFTP library.
   241  	c, err := tftp.NewClient(t.opts...)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	r, err := c.Get(u.String())
   247  	if err != nil {
   248  		return nil, err
   249  	}
   250  
   251  	return r, nil
   252  }
   253  
   254  // Fetch implements FileScheme.Fetch for TFTP.
   255  func (t *TFTPClient) Fetch(ctx context.Context, u *url.URL) (io.ReaderAt, error) {
   256  	r, err := tftpFetch(ctx, t, u)
   257  	if err != nil {
   258  		return nil, err
   259  	}
   260  	return uio.NewCachingReader(r), nil
   261  }
   262  
   263  // FetchWithoutCache implements FileScheme.FetchWithoutCache for TFTP.
   264  func (t *TFTPClient) FetchWithoutCache(ctx context.Context, u *url.URL) (io.Reader, error) {
   265  	return tftpFetch(ctx, t, u)
   266  }
   267  
   268  // RetryTFTP retries downloads if the error does not contain FILE_NOT_FOUND.
   269  //
   270  // pack.ag/tftp does not export the necessary structs to get the
   271  // code out of the error message cleanly, but it does embed FILE_NOT_FOUND in
   272  // the error string.
   273  func RetryTFTP(u *url.URL, err error) bool {
   274  	return !strings.Contains(err.Error(), "FILE_NOT_FOUND")
   275  }
   276  
   277  // DoRetry returns true if the Fetch request for the URL should be
   278  // retried. err is the error that Fetch previously returned.
   279  //
   280  // DoRetry lets a FileScheme filter for errors returned by Fetch
   281  // which are worth retrying. If this interface is not implemented, the
   282  // default for SchemeWithRetries is to always retry. DoRetry
   283  // returns true to indicate a request should be retried.
   284  type DoRetry func(u *url.URL, err error) bool
   285  
   286  // SchemeWithRetries wraps a FileScheme and automatically retries (with
   287  // backoff) when Fetch returns a non-nil err.
   288  type SchemeWithRetries struct {
   289  	Scheme FileScheme
   290  
   291  	// DoRetry should return true to indicate the Fetch shall be retried.
   292  	// Even if DoRetry returns true, BackOff can still determine whether to
   293  	// stop.
   294  	//
   295  	// If DoRetry is nil, it will be retried if the BackOff agrees.
   296  	DoRetry DoRetry
   297  
   298  	// BackOff determines how often to retry and how long to wait between
   299  	// each retry.
   300  	BackOff backoff.BackOff
   301  }
   302  
   303  // Fetch implements FileScheme.Fetch for retry wrapper.
   304  func (s *SchemeWithRetries) Fetch(ctx context.Context, u *url.URL) (io.ReaderAt, error) {
   305  	var err error
   306  	s.BackOff.Reset()
   307  	back := backoff.WithContext(s.BackOff, ctx)
   308  	for d := time.Duration(0); d != backoff.Stop; d = back.NextBackOff() {
   309  		if d > 0 {
   310  			time.Sleep(d)
   311  		}
   312  
   313  		var r io.ReaderAt
   314  		// Note: err uses the scope outside the for loop.
   315  		r, err = s.Scheme.Fetch(ctx, u)
   316  		if err == nil {
   317  			return r, nil
   318  		}
   319  
   320  		log.Printf("Error: Getting %v: %v", u, err)
   321  		if s.DoRetry != nil && !s.DoRetry(u, err) {
   322  			return r, err
   323  		}
   324  		log.Printf("Retrying %v", u)
   325  	}
   326  
   327  	log.Printf("Error: Too many retries to get file %v", u)
   328  	return nil, err
   329  }
   330  
   331  // FetchWithoutCache implements FileScheme.FetchWithoutCache for retry wrapper.
   332  func (s *SchemeWithRetries) FetchWithoutCache(ctx context.Context, u *url.URL) (io.Reader, error) {
   333  	var err error
   334  	s.BackOff.Reset()
   335  	back := backoff.WithContext(s.BackOff, ctx)
   336  	for d := time.Duration(0); d != backoff.Stop; d = back.NextBackOff() {
   337  		if d > 0 {
   338  			time.Sleep(d)
   339  		}
   340  
   341  		var r io.Reader
   342  		// Note: err uses the scope outside the for loop.
   343  		r, err = s.Scheme.FetchWithoutCache(ctx, u)
   344  		if err == nil {
   345  			return r, nil
   346  		}
   347  
   348  		log.Printf("Error: Getting %v: %v", u, err)
   349  		if s.DoRetry != nil && !s.DoRetry(u, err) {
   350  			return r, err
   351  		}
   352  		log.Printf("Retrying %v", u)
   353  	}
   354  
   355  	log.Printf("Error: Too many retries to get file %v", u)
   356  	return nil, err
   357  }
   358  
   359  // HTTPClientCodeError is returned by HTTPClient.Fetch when the server replies
   360  // with a non-200 code.
   361  type HTTPClientCodeError struct {
   362  	Err      error
   363  	HTTPCode int
   364  }
   365  
   366  // Error implements error for HTTPClientCodeError.
   367  func (h *HTTPClientCodeError) Error() string {
   368  	return fmt.Sprintf("HTTP server responded with error code %d, want 200: response %v", h.HTTPCode, h.Err)
   369  }
   370  
   371  // Unwrap implements errors.Unwrap.
   372  func (h *HTTPClientCodeError) Unwrap() error {
   373  	return h.Err
   374  }
   375  
   376  // HTTPClient implements FileScheme for HTTP files.
   377  type HTTPClient struct {
   378  	c *http.Client
   379  }
   380  
   381  // NewHTTPClient returns a new HTTP FileScheme based on the given http.Client.
   382  func NewHTTPClient(c *http.Client) *HTTPClient {
   383  	return &HTTPClient{
   384  		c: c,
   385  	}
   386  }
   387  
   388  func httpFetch(ctx context.Context, c *http.Client, u *url.URL) (io.Reader, error) {
   389  	req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil)
   390  	if err != nil {
   391  		return nil, err
   392  	}
   393  	resp, err := c.Do(req)
   394  	if err != nil {
   395  		return nil, err
   396  	}
   397  
   398  	if resp.StatusCode != 200 {
   399  		return nil, &HTTPClientCodeError{ErrStatusNotOk, resp.StatusCode}
   400  	}
   401  	return resp.Body, nil
   402  }
   403  
   404  // Fetch implements FileScheme.Fetch for HTTP.
   405  func (h HTTPClient) Fetch(ctx context.Context, u *url.URL) (io.ReaderAt, error) {
   406  	r, err := httpFetch(ctx, h.c, u)
   407  	if err != nil {
   408  		return nil, err
   409  	}
   410  	return uio.NewCachingReader(r), nil
   411  }
   412  
   413  // FetchWithoutCache implements FileScheme.FetchWithoutCache for HTTP.
   414  func (h HTTPClient) FetchWithoutCache(ctx context.Context, u *url.URL) (io.Reader, error) {
   415  	return httpFetch(ctx, h.c, u)
   416  }
   417  
   418  // RetryOr returns a DoRetry function that returns true if any one of fn return
   419  // true.
   420  func RetryOr(fn ...DoRetry) DoRetry {
   421  	return func(u *url.URL, err error) bool {
   422  		for _, f := range fn {
   423  			if f(u, err) {
   424  				return true
   425  			}
   426  		}
   427  		return false
   428  	}
   429  }
   430  
   431  // RetryConnectErrors retries only connect(2) errors.
   432  func RetryConnectErrors(u *url.URL, err error) bool {
   433  	var serr *os.SyscallError
   434  	if errors.As(err, &serr) && serr.Syscall == "connect" {
   435  		return true
   436  	}
   437  	return false
   438  }
   439  
   440  // RetryTemporaryNetworkErrors only retries temporary network errors.
   441  //
   442  // This relies on Go's net.Error.Temporary definition of temporary network
   443  // errors, which does not include network configuration errors. The latter are
   444  // relevant for users of DHCP, for example.
   445  func RetryTemporaryNetworkErrors(u *url.URL, err error) bool {
   446  	var nerr net.Error
   447  	if errors.As(err, &nerr) {
   448  		return nerr.Temporary()
   449  	}
   450  	return false
   451  }
   452  
   453  // RetryHTTP implements DoRetry for HTTP error codes where it makes sense.
   454  func RetryHTTP(u *url.URL, err error) bool {
   455  	var e *HTTPClientCodeError
   456  	if !errors.As(err, &e) {
   457  		return false
   458  	}
   459  	switch c := e.HTTPCode; {
   460  	case c == 200:
   461  		return false
   462  
   463  	case c == 408, c == 409, c == 425, c == 429:
   464  		// Retry for codes "Request Timeout(408), Conflict(409), Too Early(425), and Too Many Requests(429)"
   465  		return true
   466  
   467  	case c >= 400 && c < 500:
   468  		// We don't retry all other 400 codes, since the situation won't be improved with a retry.
   469  		return false
   470  
   471  	default:
   472  		return true
   473  	}
   474  }
   475  
   476  // LocalFileClient implements FileScheme for files on disk.
   477  type LocalFileClient struct{}
   478  
   479  // Fetch implements FileScheme.Fetch for LocalFile.
   480  func (lfs LocalFileClient) Fetch(_ context.Context, u *url.URL) (io.ReaderAt, error) {
   481  	return os.Open(filepath.Clean(u.Path))
   482  }
   483  
   484  // FetchWithoutCache implements FileScheme.FetchWithoutCache for LocalFile.
   485  func (lfs LocalFileClient) FetchWithoutCache(_ context.Context, u *url.URL) (io.Reader, error) {
   486  	return os.Open(filepath.Clean(u.Path))
   487  }