sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/io/opener.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package io
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  	"net/url"
    27  	"os"
    28  	"path"
    29  	"strings"
    30  	"sync"
    31  	"time"
    32  
    33  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    34  
    35  	"cloud.google.com/go/storage"
    36  	"github.com/sirupsen/logrus"
    37  	"gocloud.dev/blob"
    38  	"gocloud.dev/gcerrors"
    39  	"google.golang.org/api/googleapi"
    40  	"google.golang.org/api/option"
    41  
    42  	"github.com/GoogleCloudPlatform/testgrid/util/gcs" // TODO(fejta): move this logic here
    43  
    44  	"sigs.k8s.io/prow/pkg/io/providers"
    45  )
    46  
    47  const (
    48  	httpsScheme = "https"
    49  )
    50  
    51  type storageClient interface {
    52  	Bucket(name string) *storage.BucketHandle
    53  }
    54  
    55  // Aliases to types in the standard library
    56  type (
    57  	ReadCloser  = io.ReadCloser
    58  	WriteCloser = io.WriteCloser
    59  	Writer      = io.Writer
    60  	Closer      = io.Closer
    61  )
    62  
    63  type Attributes struct {
    64  	// ContentEncoding specifies the encoding used for the blob's content, if any.
    65  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
    66  	ContentEncoding string
    67  	// ContentType is the MIME type of the blob, if any.
    68  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type
    69  	ContentType string
    70  	// ContentDisposition specifies whether the blob content is expected to be displayed inline or as an attachment.
    71  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition
    72  	ContentDisposition string
    73  	// ContentLanguage specifies the language used in the blob's content, if any.
    74  	// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language
    75  	ContentLanguage string
    76  	// Size is the size of the blob's content in bytes.
    77  	Size int64
    78  	// Metadata includes user-metadata associated with the file
    79  	Metadata map[string]string
    80  }
    81  
    82  type ObjectAttrsToUpdate struct {
    83  	ContentEncoding *string
    84  	Metadata        map[string]string
    85  }
    86  
    87  // Opener has methods to read and write paths
    88  type Opener interface {
    89  	Reader(ctx context.Context, path string) (ReadCloser, error)
    90  	RangeReader(ctx context.Context, path string, offset, length int64) (io.ReadCloser, error)
    91  	Writer(ctx context.Context, path string, opts ...WriterOptions) (WriteCloser, error)
    92  	Attributes(ctx context.Context, path string) (Attributes, error)
    93  	SignedURL(ctx context.Context, path string, opts SignedURLOptions) (string, error)
    94  	Iterator(ctx context.Context, prefix, delimiter string) (ObjectIterator, error)
    95  	UpdateAtributes(context.Context, string, ObjectAttrsToUpdate) (*Attributes, error)
    96  }
    97  
    98  type opener struct {
    99  	gcsCredentialsFile string
   100  	gcsClient          storageClient
   101  	s3Credentials      []byte
   102  	cachedBuckets      map[string]*blob.Bucket
   103  	cachedBucketsMutex sync.Mutex
   104  }
   105  
   106  // NewOpener returns an opener that can read GCS, S3 and local paths.
   107  // credentialsFile may also be empty
   108  // For local paths it has to be empty
   109  // In all other cases gocloud auto-discovery is used to detect credentials, if credentialsFile is empty.
   110  // For more details about the possible content of the credentialsFile see prow/io/providers.GetBucket
   111  func NewOpener(ctx context.Context, gcsCredentialsFile, s3CredentialsFile string) (Opener, error) {
   112  	gcsClient, err := createGCSClient(ctx, gcsCredentialsFile)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  	var s3Credentials []byte
   117  	if s3CredentialsFile != "" {
   118  		s3Credentials, err = os.ReadFile(s3CredentialsFile)
   119  		if err != nil {
   120  			return nil, err
   121  		}
   122  	}
   123  	return &opener{
   124  		gcsClient:          gcsClient,
   125  		gcsCredentialsFile: gcsCredentialsFile,
   126  		s3Credentials:      s3Credentials,
   127  		cachedBuckets:      map[string]*blob.Bucket{},
   128  	}, nil
   129  }
   130  
   131  // NewGCSOpener can be used for testing against a fakeGCSClient
   132  func NewGCSOpener(gcsClient *storage.Client) Opener {
   133  	return &opener{
   134  		gcsClient:     gcsClient,
   135  		cachedBuckets: map[string]*blob.Bucket{},
   136  	}
   137  }
   138  
   139  func createGCSClient(ctx context.Context, gcsCredentialsFile string) (storageClient, error) {
   140  	// if gcsCredentialsFile is set, we have to be able to create storage.Client withCredentialsFile
   141  	if gcsCredentialsFile != "" {
   142  		return storage.NewClient(ctx, option.WithCredentialsFile(gcsCredentialsFile))
   143  	}
   144  
   145  	// if gcsCredentialsFile is unset, first try to use the default credentials
   146  	gcsClient, err := storage.NewClient(ctx)
   147  	if err == nil {
   148  		return gcsClient, nil
   149  	}
   150  	logrus.WithError(err).Debug("Cannot load application default gcp credentials, falling back to anonymous client")
   151  
   152  	// if default credentials don't work, use an anonymous client, this should always work
   153  	return storage.NewClient(ctx, option.WithoutAuthentication())
   154  }
   155  
   156  // ErrNotFoundTest can be used for unit tests to simulate NotFound errors.
   157  // This is required because gocloud doesn't expose its errors.
   158  var ErrNotFoundTest = fmt.Errorf("not found error which should only be used in tests")
   159  
   160  // IsNotExist will return true if the error shows that the object does not exist.
   161  func IsNotExist(err error) bool {
   162  	if os.IsNotExist(err) {
   163  		return true
   164  	}
   165  	if errors.Is(err, ErrNotFoundTest) {
   166  		return true
   167  	}
   168  	if errors.Is(err, os.ErrNotExist) {
   169  		return true
   170  	}
   171  	if errors.Is(err, storage.ErrObjectNotExist) {
   172  		return true
   173  	}
   174  	return gcerrors.Code(err) == gcerrors.NotFound
   175  }
   176  
   177  // LogClose will attempt a close an log any error
   178  func LogClose(c io.Closer) {
   179  	if err := c.Close(); err != nil {
   180  		logrus.WithError(err).Error("Failed to close")
   181  	}
   182  }
   183  
   184  func (o *opener) openGCS(path string) (*storage.ObjectHandle, error) {
   185  	if !strings.HasPrefix(path, providers.GS+"://") {
   186  		return nil, nil
   187  	}
   188  	if o.gcsClient == nil {
   189  		return nil, errors.New("no gcs client configured")
   190  	}
   191  	var p gcs.Path
   192  	if err := p.Set(path); err != nil {
   193  		return nil, err
   194  	}
   195  	if p.Object() == "" {
   196  		return nil, errors.New("object name is empty")
   197  	}
   198  	return o.gcsClient.Bucket(p.Bucket()).Object(p.Object()), nil
   199  }
   200  
   201  // getBucket opens a bucket
   202  // The storageProvider is discovered based on the given path.
   203  // The buckets are cached per bucket name. So we don't open a bucket multiple times in the same process
   204  func (o *opener) getBucket(ctx context.Context, path string) (*blob.Bucket, string, error) {
   205  	_, bucketName, relativePath, err := providers.ParseStoragePath(path)
   206  	if err != nil {
   207  		return nil, "", fmt.Errorf("could not get bucket: %w", err)
   208  	}
   209  
   210  	o.cachedBucketsMutex.Lock()
   211  	defer o.cachedBucketsMutex.Unlock()
   212  	if bucket, ok := o.cachedBuckets[bucketName]; ok {
   213  		return bucket, relativePath, nil
   214  	}
   215  
   216  	bucket, err := providers.GetBucket(ctx, o.s3Credentials, path)
   217  	if err != nil {
   218  		return nil, "", err
   219  	}
   220  	o.cachedBuckets[bucketName] = bucket
   221  	return bucket, relativePath, nil
   222  }
   223  
   224  // Reader will open the path for reading, returning an IsNotExist() error when missing
   225  func (o *opener) Reader(ctx context.Context, path string) (io.ReadCloser, error) {
   226  	if strings.HasPrefix(path, providers.GS+"://") {
   227  		g, err := o.openGCS(path)
   228  		if err != nil {
   229  			return nil, fmt.Errorf("bad gcs path: %w", err)
   230  		}
   231  		return g.NewReader(ctx)
   232  	}
   233  	if strings.HasPrefix(path, "/") {
   234  		return os.Open(path)
   235  	}
   236  
   237  	bucket, relativePath, err := o.getBucket(ctx, path)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	reader, err := bucket.NewReader(ctx, relativePath, nil)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  	return reader, nil
   246  }
   247  
   248  func (o *opener) RangeReader(ctx context.Context, path string, offset, length int64) (io.ReadCloser, error) {
   249  	if strings.HasPrefix(path, providers.GS+"://") {
   250  		g, err := o.openGCS(path)
   251  		if err != nil {
   252  			return nil, fmt.Errorf("bad gcs path: %w", err)
   253  		}
   254  		return g.NewRangeReader(ctx, offset, length)
   255  	}
   256  
   257  	bucket, relativePath, err := o.getBucket(ctx, path)
   258  	if err != nil {
   259  		return nil, err
   260  	}
   261  	reader, err := bucket.NewRangeReader(ctx, relativePath, offset, length, nil)
   262  	if err != nil {
   263  		return nil, err
   264  	}
   265  	return reader, nil
   266  }
   267  
   268  var PreconditionFailedObjectAlreadyExists = fmt.Errorf("object already exists")
   269  
   270  // Writer returns a writer that overwrites the path.
   271  func (o *opener) Writer(ctx context.Context, p string, opts ...WriterOptions) (io.WriteCloser, error) {
   272  	options := &WriterOptions{}
   273  	for _, opt := range opts {
   274  		opt.Apply(options)
   275  	}
   276  	if strings.HasPrefix(p, providers.GS+"://") {
   277  		g, err := o.openGCS(p)
   278  		if err != nil {
   279  			return nil, fmt.Errorf("bad gcs path: %w", err)
   280  		}
   281  		if options.PreconditionDoesNotExist != nil && *options.PreconditionDoesNotExist {
   282  			g = g.If(storage.Conditions{DoesNotExist: true})
   283  		}
   284  
   285  		writer := g.NewWriter(ctx)
   286  		options.apply(writer, nil)
   287  		return writer, nil
   288  	}
   289  	if strings.HasPrefix(p, "/") || strings.HasPrefix(p, providers.File+"://") {
   290  		p := strings.TrimPrefix(p, providers.File+"://")
   291  		// create parent dir if doesn't exist
   292  		dir := path.Dir(p)
   293  		if err := os.MkdirAll(dir, 0755); err != nil {
   294  			return nil, fmt.Errorf("create directory %q: %w", dir, err)
   295  		}
   296  		return os.OpenFile(p, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666)
   297  	}
   298  
   299  	bucket, relativePath, err := o.getBucket(ctx, p)
   300  	if err != nil {
   301  		return nil, err
   302  	}
   303  	var wOpts blob.WriterOptions
   304  	options.apply(nil, &wOpts)
   305  
   306  	if options.PreconditionDoesNotExist != nil && *options.PreconditionDoesNotExist {
   307  		wOpts.BeforeWrite = func(asFunc func(interface{}) bool) error {
   308  			_, err := o.Reader(ctx, p)
   309  			if err != nil {
   310  				// we got an error, but not object not exists
   311  				if !IsNotExist(err) {
   312  					return err
   313  				}
   314  				// Precondition fulfilled, return nil
   315  				return nil
   316  			}
   317  			// Precondition failed, we got no err because object already exists
   318  			return PreconditionFailedObjectAlreadyExists
   319  		}
   320  	}
   321  
   322  	writer, err := bucket.NewWriter(ctx, relativePath, &wOpts)
   323  	if err != nil {
   324  		return nil, err
   325  	}
   326  	return writer, nil
   327  }
   328  
   329  func (o *opener) Attributes(ctx context.Context, path string) (Attributes, error) {
   330  	if strings.HasPrefix(path, providers.GS+"://") {
   331  		g, err := o.openGCS(path)
   332  		if err != nil {
   333  			return Attributes{}, fmt.Errorf("bad gcs path: %w", err)
   334  		}
   335  		attr, err := g.Attrs(ctx)
   336  		if err != nil {
   337  			return Attributes{}, err
   338  		}
   339  		return Attributes{
   340  			ContentEncoding:    attr.ContentEncoding,
   341  			ContentType:        attr.ContentType,
   342  			ContentDisposition: attr.ContentDisposition,
   343  			ContentLanguage:    attr.ContentLanguage,
   344  			Size:               attr.Size,
   345  			Metadata:           attr.Metadata,
   346  		}, nil
   347  	}
   348  
   349  	bucket, relativePath, err := o.getBucket(ctx, path)
   350  	if err != nil {
   351  		return Attributes{}, err
   352  	}
   353  
   354  	attr, err := bucket.Attributes(ctx, relativePath)
   355  	if err != nil {
   356  		return Attributes{}, err
   357  	}
   358  	return Attributes{
   359  		ContentEncoding:    attr.ContentEncoding,
   360  		ContentType:        attr.ContentType,
   361  		ContentDisposition: attr.ContentDisposition,
   362  		ContentLanguage:    attr.ContentLanguage,
   363  		Size:               attr.Size,
   364  		Metadata:           attr.Metadata,
   365  	}, nil
   366  }
   367  
   368  func (o *opener) UpdateAtributes(ctx context.Context, path string, attrs ObjectAttrsToUpdate) (*Attributes, error) {
   369  	if !strings.HasPrefix(path, providers.GS+"://") {
   370  		return nil, fmt.Errorf("unsupported provider: %q", path)
   371  	}
   372  
   373  	g, err := o.openGCS(path)
   374  	if err != nil {
   375  		return nil, fmt.Errorf("open: %w", err)
   376  	}
   377  	up := storage.ObjectAttrsToUpdate{
   378  		Metadata: attrs.Metadata,
   379  	}
   380  	if attrs.ContentEncoding != nil {
   381  		up.ContentEncoding = *attrs.ContentEncoding
   382  	}
   383  	oa, err := g.Update(ctx, up)
   384  	if err != nil {
   385  		return nil, fmt.Errorf("update: %w", err)
   386  	}
   387  	return &Attributes{
   388  		ContentEncoding: oa.ContentEncoding,
   389  		Size:            oa.Size,
   390  		Metadata:        oa.Metadata,
   391  	}, nil
   392  }
   393  
   394  const (
   395  	GSAnonHost   = "storage.googleapis.com"
   396  	GSCookieHost = "storage.cloud.google.com"
   397  )
   398  
   399  func (o *opener) SignedURL(ctx context.Context, p string, opts SignedURLOptions) (string, error) {
   400  	_, bucketName, relativePath, err := providers.ParseStoragePath(p)
   401  	if err != nil {
   402  		return "", fmt.Errorf("could not get bucket: %w", err)
   403  	}
   404  	if strings.HasPrefix(p, providers.GS+"://") {
   405  		// We specifically want to use cookie auth, see:
   406  		// https://cloud.google.com/storage/docs/access-control/cookie-based-authentication
   407  		if opts.UseGSCookieAuth {
   408  			artifactLink := &url.URL{
   409  				Scheme: httpsScheme,
   410  				Host:   GSCookieHost,
   411  				Path:   path.Join(bucketName, relativePath),
   412  			}
   413  			return artifactLink.String(), nil
   414  		}
   415  
   416  		// If we're anonymous we can just return a plain URL.
   417  		if o.gcsCredentialsFile == "" {
   418  			artifactLink := &url.URL{
   419  				Scheme: httpsScheme,
   420  				Host:   GSAnonHost,
   421  				Path:   path.Join(bucketName, relativePath),
   422  			}
   423  			return artifactLink.String(), nil
   424  		}
   425  
   426  		// TODO(fejta): do not require the json file https://github.com/kubernetes/test-infra/issues/16489
   427  		// As far as I can tell, there is no sane way to get these values other than just
   428  		// reading them out of the JSON file ourselves.
   429  		f, err := os.Open(o.gcsCredentialsFile)
   430  		if err != nil {
   431  			return "", err
   432  		}
   433  		defer f.Close()
   434  		auth := struct {
   435  			Type        string `json:"type"`
   436  			PrivateKey  string `json:"private_key"`
   437  			ClientEmail string `json:"client_email"`
   438  		}{}
   439  		if err := json.NewDecoder(f).Decode(&auth); err != nil {
   440  			return "", err
   441  		}
   442  		if auth.Type != "service_account" {
   443  			return "", fmt.Errorf("only service_account GCS auth is supported, got %q", auth.Type)
   444  		}
   445  		return storage.SignedURL(bucketName, relativePath, &storage.SignedURLOptions{
   446  			Method:         "GET",
   447  			Expires:        time.Now().Add(10 * time.Minute),
   448  			GoogleAccessID: auth.ClientEmail,
   449  			PrivateKey:     []byte(auth.PrivateKey),
   450  		})
   451  	}
   452  
   453  	bucket, relativePath, err := o.getBucket(ctx, p)
   454  	if err != nil {
   455  		return "", err
   456  	}
   457  	return bucket.SignedURL(ctx, relativePath, &blob.SignedURLOptions{
   458  		Method: "GET",
   459  		Expiry: 10 * time.Minute,
   460  	})
   461  }
   462  
   463  func (o *opener) Iterator(ctx context.Context, prefix, delimiter string) (ObjectIterator, error) {
   464  	storageProvider, bucketName, relativePath, err := providers.ParseStoragePath(prefix)
   465  	if err != nil {
   466  		return nil, fmt.Errorf("could not get bucket: %w", err)
   467  	}
   468  
   469  	if storageProvider == providers.GS {
   470  		if o.gcsClient == nil {
   471  			return nil, errors.New("no gcs client configured")
   472  		}
   473  		bkt := o.gcsClient.Bucket(bucketName)
   474  		query := &storage.Query{
   475  			Prefix:    relativePath,
   476  			Delimiter: delimiter,
   477  			Versions:  false,
   478  		}
   479  		if delimiter == "" {
   480  			// query.SetAttrSelection cannot be used in directory-like mode (when delimiter != "").
   481  			if err := query.SetAttrSelection([]string{"Name"}); err != nil {
   482  				return nil, err
   483  			}
   484  		}
   485  		return gcsObjectIterator{
   486  			Iterator: bkt.Objects(ctx, query),
   487  		}, nil
   488  	}
   489  
   490  	bucket, relativePath, err := o.getBucket(ctx, prefix)
   491  	if err != nil {
   492  		return nil, err
   493  	}
   494  	// listing a directory requires the "/" suffix except if we try to list the bucket's root directory
   495  	if relativePath != "" && !strings.HasSuffix(relativePath, "/") {
   496  		relativePath += "/"
   497  	}
   498  	return openerObjectIterator{
   499  		Iterator: bucket.List(&blob.ListOptions{
   500  			Prefix:    relativePath,
   501  			Delimiter: delimiter,
   502  		}),
   503  	}, nil
   504  }
   505  
   506  func ReadContent(ctx context.Context, logger *logrus.Entry, opener Opener, path string) ([]byte, error) {
   507  	log := logger.WithFields(logrus.Fields{"path": path})
   508  	log.Debug("Reading")
   509  	r, err := opener.Reader(ctx, path)
   510  	if err != nil {
   511  		return nil, err
   512  	}
   513  	defer r.Close()
   514  	return io.ReadAll(r)
   515  }
   516  
   517  func WriteContent(ctx context.Context, logger *logrus.Entry, opener Opener, path string, content []byte, opts ...WriterOptions) error {
   518  	log := logger.WithFields(logrus.Fields{"path": path, "write-options": opts})
   519  	log.Debug("Uploading")
   520  	w, err := opener.Writer(ctx, path, opts...)
   521  	if err != nil {
   522  		return err
   523  	}
   524  	_, err = w.Write(content)
   525  	var writeErr error
   526  	if isErrUnexpected(err) {
   527  		writeErr = err
   528  		log.WithError(err).Warn("Uploading info to storage failed (write)")
   529  	}
   530  	err = w.Close()
   531  	var closeErr error
   532  	if isErrUnexpected(err) {
   533  		closeErr = err
   534  		log.WithError(err).Warn("Uploading info to storage failed (close)")
   535  	}
   536  	return utilerrors.NewAggregate([]error{writeErr, closeErr})
   537  }
   538  
   539  func isErrUnexpected(err error) bool {
   540  	if err == nil {
   541  		return false
   542  	}
   543  	// Precondition Failed is expected and we can silently ignore it.
   544  	if e, ok := err.(*googleapi.Error); ok {
   545  		if e.Code == http.StatusPreconditionFailed {
   546  			return false
   547  		}
   548  	}
   549  	// Precondition file already exists is expected
   550  	if errors.Is(err, PreconditionFailedObjectAlreadyExists) {
   551  		return false
   552  	}
   553  
   554  	return true
   555  }