github.com/anchore/syft@v1.38.2/internal/file/getter.go (about)

     1  package file
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net/http"
     7  
     8  	"github.com/hashicorp/go-getter"
     9  	"github.com/hashicorp/go-getter/helper/url"
    10  	"github.com/wagoodman/go-progress"
    11  
    12  	"github.com/anchore/clio"
    13  	"github.com/anchore/stereoscope/pkg/file"
    14  	"github.com/anchore/syft/internal"
    15  )
    16  
    17  var (
    18  	archiveExtensions   = getterDecompressorNames()
    19  	ErrNonArchiveSource = fmt.Errorf("non-archive sources are not supported for directory destinations")
    20  )
    21  
    22  type Getter interface {
    23  	// GetFile downloads the give URL into the given path. The URL must reference a single file.
    24  	GetFile(dst, src string, monitor ...*progress.Manual) error
    25  
    26  	// GetToDir downloads the resource found at the `src` URL into the given `dst` directory.
    27  	// The directory must already exist, and the remote resource MUST BE AN ARCHIVE (e.g. `.tar.gz`).
    28  	GetToDir(dst, src string, monitor ...*progress.Manual) error
    29  }
    30  
    31  type HashiGoGetter struct {
    32  	httpGetter getter.HttpGetter
    33  }
    34  
    35  // NewGetter creates and returns a new Getter. Providing an http.Client is optional. If one is provided,
    36  // it will be used for all HTTP(S) getting; otherwise, go-getter's default getters will be used.
    37  func NewGetter(id clio.Identification, httpClient *http.Client) *HashiGoGetter {
    38  	return &HashiGoGetter{
    39  		httpGetter: getter.HttpGetter{
    40  			Client: httpClient,
    41  			Header: http.Header{
    42  				"User-Agent": []string{fmt.Sprintf("%v %v", id.Name, id.Version)},
    43  			},
    44  		},
    45  	}
    46  }
    47  
    48  func (g HashiGoGetter) GetFile(dst, src string, monitors ...*progress.Manual) error {
    49  	if len(monitors) > 1 {
    50  		return fmt.Errorf("multiple monitors provided, which is not allowed")
    51  	}
    52  
    53  	return getterClient(dst, src, false, g.httpGetter, monitors).Get()
    54  }
    55  
    56  func (g HashiGoGetter) GetToDir(dst, src string, monitors ...*progress.Manual) error {
    57  	// though there are multiple getters, only the http/https getter requires extra validation
    58  	if err := validateHTTPSource(src); err != nil {
    59  		return err
    60  	}
    61  	if len(monitors) > 1 {
    62  		return fmt.Errorf("multiple monitors provided, which is not allowed")
    63  	}
    64  
    65  	return getterClient(dst, src, true, g.httpGetter, monitors).Get()
    66  }
    67  
    68  func validateHTTPSource(src string) error {
    69  	// we are ignoring any sources that are not destined to use the http getter object
    70  	if !internal.HasAnyOfPrefixes(src, "http://", "https://") {
    71  		return nil
    72  	}
    73  
    74  	u, err := url.Parse(src)
    75  	if err != nil {
    76  		return fmt.Errorf("bad URL provided %q: %w", src, err)
    77  	}
    78  	// only allow for sources with archive extensions
    79  	if !internal.HasAnyOfSuffixes(u.Path, archiveExtensions...) {
    80  		return ErrNonArchiveSource
    81  	}
    82  	return nil
    83  }
    84  
    85  func getterClient(dst, src string, dir bool, httpGetter getter.HttpGetter, monitors []*progress.Manual) *getter.Client {
    86  	client := &getter.Client{
    87  		Src: src,
    88  		Dst: dst,
    89  		Dir: dir,
    90  		Getters: map[string]getter.Getter{
    91  			"http":  &httpGetter,
    92  			"https": &httpGetter,
    93  			// note: these are the default getters from https://github.com/hashicorp/go-getter/blob/v1.5.9/get.go#L68-L74
    94  			// it is possible that other implementations need to account for custom httpclient injection, however,
    95  			// that has not been accounted for at this time.
    96  			"file": new(getter.FileGetter),
    97  			"git":  new(getter.GitGetter),
    98  			"gcs":  new(getter.GCSGetter),
    99  			"hg":   new(getter.HgGetter),
   100  			"s3":   new(getter.S3Getter),
   101  		},
   102  		Options: mapToGetterClientOptions(monitors),
   103  	}
   104  
   105  	return client
   106  }
   107  
   108  func withProgress(monitor *progress.Manual) func(client *getter.Client) error {
   109  	return getter.WithProgress(
   110  		&progressAdapter{monitor: monitor},
   111  	)
   112  }
   113  
   114  func mapToGetterClientOptions(monitors []*progress.Manual) []getter.ClientOption {
   115  	var result []getter.ClientOption
   116  
   117  	for _, monitor := range monitors {
   118  		result = append(result, withProgress(monitor))
   119  	}
   120  
   121  	// derived from https://github.com/hashicorp/go-getter/blob/v2.2.3/decompress.go#L23-L63
   122  	fileSizeLimit := int64(5 * file.GB)
   123  
   124  	dec := getter.LimitedDecompressors(0, fileSizeLimit)
   125  
   126  	result = append(result, getter.WithDecompressors(dec))
   127  
   128  	return result
   129  }
   130  
   131  type readCloser struct {
   132  	progress.Reader
   133  }
   134  
   135  func (c *readCloser) Close() error { return nil }
   136  
   137  type progressAdapter struct {
   138  	monitor *progress.Manual
   139  }
   140  
   141  func (a *progressAdapter) TrackProgress(_ string, currentSize, totalSize int64, stream io.ReadCloser) io.ReadCloser {
   142  	a.monitor.Set(currentSize)
   143  	a.monitor.SetTotal(totalSize)
   144  	return &readCloser{
   145  		Reader: *progress.NewProxyReader(stream, a.monitor),
   146  	}
   147  }
   148  
   149  func getterDecompressorNames() (names []string) {
   150  	for name := range getter.Decompressors {
   151  		names = append(names, name)
   152  	}
   153  	return names
   154  }