github.com/adrianjagielak/goofys@v0.24.1-0.20230810095418-94919a5d2254/internal/backend_gcs.go (about)

     1  package internal
     2  
     3  import (
     4  	"github.com/kahing/goofys/api/common"
     5  
     6  	"bytes"
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"net"
    11  	"path"
    12  	"strings"
    13  	"syscall"
    14  
    15  	"cloud.google.com/go/storage"
    16  	"github.com/jacobsa/fuse"
    17  
    18  	"golang.org/x/sync/errgroup"
    19  	syncsem "golang.org/x/sync/semaphore"
    20  
    21  	"google.golang.org/api/googleapi"
    22  	"google.golang.org/api/iterator"
    23  	"google.golang.org/api/option"
    24  )
    25  
    26  type GCSBackend struct {
    27  	bucketName string
    28  	config     *common.GCSConfig // stores user and bucket configuration
    29  	cap        Capabilities
    30  	bucket     *storage.BucketHandle // provides set of methods to operate on a bucket
    31  	logger     *common.LogHandle     // logger for GCS backend
    32  }
    33  
    34  const (
    35  	maxListKeys int = 1000 // the max limit for number of elements during listObjects
    36  )
    37  
    38  type GCSMultipartBlobCommitInput struct {
    39  	cancel context.CancelFunc // useful to abort a multipart upload in GCS
    40  	writer *storage.Writer    // used to emulate mpu under GCS, which currently used a single gcsWriter
    41  }
    42  
    43  // NewGCS initializes a GCS Backend.
    44  // It creates an authenticated client or unauthenticated client based on existing credentials in the environment.
    45  func NewGCS(bucket string, config *common.GCSConfig) (*GCSBackend, error) {
    46  	var client *storage.Client
    47  	var err error
    48  
    49  	// TODO: storage.NewClient has automated mechanisms to set up credentials together with HTTP settings.
    50  	// Currently, we are using config.Credentials only to differentiate between creating an authenticated or
    51  	// unauthenticated client not using it to initialize a client.
    52  
    53  	// If config.Credentials are configured, we'll get an authenticated client.
    54  	if config.Credentials != nil {
    55  		client, err = storage.NewClient(context.Background())
    56  	} else {
    57  		// otherwise we will get an unauthenticated client. option.WithoutAuthentication() is necessary
    58  		// because the API will generate an error if it could not find credentials and this option is unset.
    59  		client, err = storage.NewClient(context.Background(), option.WithoutAuthentication())
    60  	}
    61  
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  
    66  	return &GCSBackend{
    67  		config:     config,
    68  		bucketName: bucket,
    69  		bucket:     client.Bucket(bucket),
    70  		cap: Capabilities{
    71  			MaxMultipartSize: 5 * 1024 * 1024 * 1024,
    72  			Name:             "gcs",
    73  			// parallel multipart upload is not supported in GCS
    74  			NoParallelMultipart: true,
    75  		},
    76  		logger: common.GetLogger("gcs"),
    77  	}, nil
    78  }
    79  
    80  // Init checks user's access to bucket.
    81  func (g *GCSBackend) Init(key string) error {
    82  	// We will do a successful mount if the user can list on the bucket.
    83  	// This is different other backends because GCS does not differentiate between object not found and
    84  	// bucket not found.
    85  	prefix, _ := path.Split(key)
    86  	_, err := g.ListBlobs(&ListBlobsInput{
    87  		MaxKeys: PUInt32(1),
    88  		Prefix:  PString(prefix),
    89  	})
    90  	g.logger.Debugf("INIT GCS: ListStatus = %s", getDebugResponseStatus(err))
    91  	if err == syscall.ENXIO {
    92  		return fmt.Errorf("bucket %v does not exist", g.bucketName)
    93  	}
    94  	// Errors can be returned directly since ListBlobs converts them to syscall errors.
    95  	return err
    96  }
    97  
    98  func (g *GCSBackend) Capabilities() *Capabilities {
    99  	return &g.cap
   100  }
   101  
   102  // Bucket returns the GCSBackend's bucket name.
   103  func (g *GCSBackend) Bucket() string {
   104  	return g.bucketName
   105  }
   106  
   107  func getDebugResponseStatus(err error) string {
   108  	if err != nil {
   109  		return fmt.Sprintf("ERROR: %v", err)
   110  	}
   111  	return "SUCCESS"
   112  }
   113  
   114  // HeadBlob gets the file object metadata.
   115  func (g *GCSBackend) HeadBlob(param *HeadBlobInput) (*HeadBlobOutput, error) {
   116  	attrs, err := g.bucket.Object(param.Key).Attrs(context.Background())
   117  	g.logger.Debugf("HEAD %v = %v", param.Key, getDebugResponseStatus(err))
   118  	if err != nil {
   119  		return nil, mapGCSError(err)
   120  	}
   121  
   122  	return &HeadBlobOutput{
   123  		BlobItemOutput: BlobItemOutput{
   124  			Key:          &attrs.Name,
   125  			ETag:         &attrs.Etag,
   126  			LastModified: &attrs.Updated,
   127  			Size:         uint64(attrs.Size),
   128  			StorageClass: &attrs.StorageClass,
   129  		},
   130  		ContentType: &attrs.ContentType,
   131  		IsDirBlob:   strings.HasSuffix(param.Key, "/"),
   132  		Metadata:    PMetadata(attrs.Metadata),
   133  	}, nil
   134  }
   135  
   136  func (g *GCSBackend) ListBlobs(param *ListBlobsInput) (*ListBlobsOutput, error) {
   137  	query := storage.Query{
   138  		Prefix:      NilStr(param.Prefix),
   139  		Delimiter:   NilStr(param.Delimiter),
   140  		StartOffset: NilStr(param.StartAfter),
   141  	}
   142  	objectIterator := g.bucket.Objects(context.Background(), &query)
   143  
   144  	// Set max keys, a number > 0 is required by the SDK.
   145  	maxKeys := int(NilUint32(param.MaxKeys))
   146  	if maxKeys == 0 {
   147  		maxKeys = maxListKeys // follow the default JSON API mechanism to return 1000 items if maxKeys is not set.
   148  	}
   149  
   150  	pager := iterator.NewPager(objectIterator, maxKeys, NilStr(param.ContinuationToken))
   151  
   152  	var entries []*storage.ObjectAttrs
   153  	nextToken, err := pager.NextPage(&entries)
   154  	g.logger.Debugf("LIST %s : %s", param, getDebugResponseStatus(err))
   155  	if err != nil {
   156  		return nil, mapGCSError(err)
   157  	}
   158  
   159  	var nextContToken *string
   160  	if nextToken != "" {
   161  		nextContToken = &nextToken
   162  	}
   163  
   164  	var prefixes []BlobPrefixOutput
   165  	var items []BlobItemOutput
   166  	for _, entry := range entries {
   167  		// if blob is a prefix, then Prefix field will be set
   168  		if entry.Prefix != "" {
   169  			prefixes = append(prefixes, BlobPrefixOutput{&entry.Prefix})
   170  		} else if entry.Name != "" { // otherwise for actual blob, Name field will set
   171  			items = append(items, BlobItemOutput{
   172  				Key:          &entry.Name,
   173  				ETag:         &entry.Etag,
   174  				LastModified: &entry.Updated,
   175  				Size:         uint64(entry.Size),
   176  				StorageClass: &entry.StorageClass,
   177  			})
   178  		} else {
   179  			log.Errorf("LIST Unknown object: %v", entry)
   180  		}
   181  	}
   182  
   183  	return &ListBlobsOutput{
   184  		Prefixes:              prefixes,
   185  		Items:                 items,
   186  		NextContinuationToken: nextContToken,
   187  		IsTruncated:           nextContToken != nil,
   188  	}, nil
   189  }
   190  
   191  func (g *GCSBackend) DeleteBlob(param *DeleteBlobInput) (*DeleteBlobOutput, error) {
   192  	err := g.bucket.Object(param.Key).Delete(context.Background())
   193  
   194  	g.logger.Debugf("DELETE Object %v = %s ", param.Key, getDebugResponseStatus(err))
   195  	if err != nil {
   196  		return nil, mapGCSError(err)
   197  	}
   198  
   199  	return &DeleteBlobOutput{}, nil
   200  }
   201  
   202  // DeleteBlobs deletes multiple GCS blobs.
   203  func (g *GCSBackend) DeleteBlobs(param *DeleteBlobsInput) (*DeleteBlobsOutput, error) {
   204  	// The go sdk does not support batch requests: https://issuetracker.google.com/issues/142641783
   205  	// So we're using goroutines and errorgroup to delete multiple objects
   206  	eg, rootCtx := errgroup.WithContext(context.Background())
   207  	sem := syncsem.NewWeighted(100)
   208  
   209  	for _, item := range param.Items {
   210  		if err := sem.Acquire(rootCtx, 1); err != nil {
   211  			return nil, err
   212  		}
   213  		curItem := item
   214  		eg.Go(func() error {
   215  			defer sem.Release(1)
   216  			return g.bucket.Object(curItem).Delete(rootCtx)
   217  		})
   218  	}
   219  
   220  	if err := eg.Wait(); err != nil {
   221  		return nil, mapGCSError(err)
   222  	}
   223  
   224  	return &DeleteBlobsOutput{}, nil
   225  }
   226  
   227  // RenameBlob is not supported for GCS backend. So Goofys will do a CopyBlob followed by DeleteBlob for renames.
   228  func (g *GCSBackend) RenameBlob(param *RenameBlobInput) (*RenameBlobOutput, error) {
   229  	return nil, syscall.ENOTSUP
   230  }
   231  
   232  // CopyBlob copies a source object to another destination object under the same bucket.
   233  func (g *GCSBackend) CopyBlob(param *CopyBlobInput) (*CopyBlobOutput, error) {
   234  	src := g.bucket.Object(param.Source)
   235  	dest := g.bucket.Object(param.Destination)
   236  
   237  	copier := dest.CopierFrom(src)
   238  	copier.StorageClass = NilStr(param.StorageClass)
   239  	copier.Etag = NilStr(param.ETag)
   240  	copier.Metadata = NilMetadata(param.Metadata)
   241  
   242  	_, err := copier.Run(context.Background())
   243  	g.logger.Debugf("Copy object %s = %s ", param, getDebugResponseStatus(err))
   244  	if err != nil {
   245  		return nil, mapGCSError(err)
   246  	}
   247  
   248  	return &CopyBlobOutput{}, nil
   249  }
   250  
   251  // GetBlob returns a file reader for a GCS object.
   252  func (g *GCSBackend) GetBlob(param *GetBlobInput) (*GetBlobOutput, error) {
   253  	obj := g.bucket.Object(param.Key).ReadCompressed(true)
   254  
   255  	var reader *storage.Reader
   256  	var err error
   257  	if param.Count != 0 {
   258  		reader, err = obj.NewRangeReader(context.Background(), int64(param.Start), int64(param.Count))
   259  	} else if param.Start != 0 {
   260  		reader, err = obj.NewRangeReader(context.Background(), int64(param.Start), -1)
   261  	} else {
   262  		// If we don't limit the range, the full object will be read
   263  		reader, err = obj.NewReader(context.Background())
   264  	}
   265  
   266  	g.logger.Debugf("GET Blob %s = %v", param, getDebugResponseStatus(err))
   267  	if err != nil {
   268  		return nil, mapGCSError(err)
   269  	}
   270  
   271  	// Caveats: the SDK's reader object doesn't provide ETag, StorageClass, and Metadata attributes within a single
   272  	// API call, hence we're not returning these information in the output.
   273  	// Relevant GitHub issue: https://github.com/googleapis/google-cloud-go/issues/2740
   274  	return &GetBlobOutput{
   275  		HeadBlobOutput: HeadBlobOutput{
   276  			BlobItemOutput: BlobItemOutput{
   277  				Key:          PString(param.Key),
   278  				LastModified: &reader.Attrs.LastModified,
   279  				Size:         uint64(reader.Attrs.Size),
   280  			},
   281  			ContentType: &reader.Attrs.ContentType,
   282  		},
   283  		Body: reader,
   284  	}, nil
   285  }
   286  
   287  // PutBlob writes a file to GCS.
   288  func (g *GCSBackend) PutBlob(param *PutBlobInput) (*PutBlobOutput, error) {
   289  	// Handle nil pointer error when param.Body is nil
   290  	body := param.Body
   291  	if body == nil {
   292  		body = bytes.NewReader([]byte(""))
   293  	}
   294  
   295  	writer := g.bucket.Object(param.Key).NewWriter(context.Background())
   296  	writer.ContentType = NilStr(param.ContentType)
   297  	writer.Metadata = NilMetadata(param.Metadata)
   298  	// setting chunkSize to be equal to the file size will make this a single request upload
   299  	writer.ChunkSize = int(NilUint64(param.Size))
   300  
   301  	_, err := io.Copy(writer, body)
   302  	g.logger.Debugf("PUT Blob (to writer) %s = %s ", param, getDebugResponseStatus(err))
   303  	if err != nil {
   304  		return nil, mapGCSError(err)
   305  	}
   306  
   307  	err = writer.Close()
   308  	g.logger.Debugf("PUT Blob (Flush) %v = %s ", param.Key, getDebugResponseStatus(err))
   309  	if err != nil {
   310  		return nil, mapGCSError(err)
   311  	}
   312  
   313  	attrs := writer.Attrs()
   314  
   315  	return &PutBlobOutput{
   316  		ETag: &attrs.Etag,
   317  		//LastModified: &attrs.Updated, // this field exist in the upstream open source goofys repo
   318  		StorageClass: &attrs.StorageClass,
   319  	}, nil
   320  }
   321  
   322  // MultipartBlobBegin begins a multi part blob request.
   323  // Under GCS backend, we'll initialize the gcsWriter object and the context for the multipart blob request here.
   324  func (g *GCSBackend) MultipartBlobBegin(param *MultipartBlobBeginInput) (*MultipartBlobCommitInput, error) {
   325  	ctx, cancel := context.WithCancel(context.Background())
   326  	writer := g.bucket.Object(param.Key).NewWriter(ctx)
   327  	writer.ChunkSize = g.config.ChunkSize
   328  	writer.ContentType = NilStr(param.ContentType)
   329  	writer.Metadata = NilMetadata(param.Metadata)
   330  
   331  	g.logger.Debugf("Multipart Blob BEGIN: %s", param)
   332  
   333  	return &MultipartBlobCommitInput{
   334  		Key:      &param.Key,
   335  		Metadata: param.Metadata,
   336  		backendData: &GCSMultipartBlobCommitInput{
   337  			writer: writer,
   338  			cancel: cancel,
   339  		},
   340  	}, nil
   341  }
   342  
   343  // MultipartBlobAdd adds part of blob to the upload request.
   344  // Under GCS backend, we'll write that blob part into the gcsWriter.
   345  // TODO(deka): This is a temporary implementation to allow most tests to run.
   346  // We might change this implementation in the future.
   347  func (g *GCSBackend) MultipartBlobAdd(param *MultipartBlobAddInput) (*MultipartBlobAddOutput, error) {
   348  	commitData, ok := param.Commit.backendData.(*GCSMultipartBlobCommitInput)
   349  	if !ok {
   350  		panic("Incorrect commit data type")
   351  	}
   352  
   353  	// Handle nil pointer error when param.Body is nil
   354  	body := param.Body
   355  	if body == nil {
   356  		body = bytes.NewReader([]byte(""))
   357  	}
   358  
   359  	n, err := io.Copy(commitData.writer, body)
   360  	g.logger.Debugf("Multipart Blob ADD %s bytesWritten: %v = %s", param, n, getDebugResponseStatus(err))
   361  	if err != nil {
   362  		commitData.cancel()
   363  		return nil, err
   364  	}
   365  
   366  	return &MultipartBlobAddOutput{}, nil
   367  }
   368  
   369  func (g *GCSBackend) MultipartBlobAbort(param *MultipartBlobCommitInput) (*MultipartBlobAbortOutput, error) {
   370  	commitData, ok := param.backendData.(*GCSMultipartBlobCommitInput)
   371  	if !ok {
   372  		panic("Incorrect commit data type")
   373  	}
   374  	g.logger.Debugf("Multipart Blob ABORT %v", param.Key)
   375  	commitData.cancel()
   376  
   377  	return &MultipartBlobAbortOutput{}, nil
   378  }
   379  
   380  func (g *GCSBackend) MultipartBlobCommit(param *MultipartBlobCommitInput) (*MultipartBlobCommitOutput, error) {
   381  	commitData, ok := param.backendData.(*GCSMultipartBlobCommitInput)
   382  	if !ok {
   383  		panic("Incorrect commit data type")
   384  	}
   385  
   386  	// Flushing a writer will make GCS to fully upload the buffer
   387  	err := commitData.writer.Close()
   388  	g.logger.Debugf("Multipart Blob COMMIT %v = %s ", param.Key, getDebugResponseStatus(err))
   389  	if err != nil {
   390  		commitData.cancel()
   391  		return nil, mapGCSError(err)
   392  	}
   393  	attrs := commitData.writer.Attrs()
   394  
   395  	return &MultipartBlobCommitOutput{
   396  		ETag: &attrs.Etag,
   397  	}, nil
   398  }
   399  
   400  func (g *GCSBackend) MultipartExpire(param *MultipartExpireInput) (*MultipartExpireOutput, error) {
   401  	// No-op: GCS expires a resumable session after 7 days automatically
   402  	return &MultipartExpireOutput{}, nil
   403  }
   404  
   405  func (g *GCSBackend) RemoveBucket(param *RemoveBucketInput) (*RemoveBucketOutput, error) {
   406  	err := g.bucket.Delete(context.Background())
   407  	if err != nil {
   408  		return nil, mapGCSError(err)
   409  	}
   410  	return &RemoveBucketOutput{}, nil
   411  }
   412  
   413  func (g *GCSBackend) MakeBucket(param *MakeBucketInput) (*MakeBucketOutput, error) {
   414  	// Requires an authenticated credentials
   415  	err := g.bucket.Create(context.Background(), g.config.Credentials.ProjectID, nil)
   416  	if err != nil {
   417  		return nil, mapGCSError(err)
   418  	}
   419  
   420  	return &MakeBucketOutput{}, nil
   421  }
   422  
   423  func (g *GCSBackend) Delegate() interface{} {
   424  	return g
   425  }
   426  
   427  // mapGCSError maps an error to syscall / fuse errors.
   428  func mapGCSError(err error) error {
   429  	if err == nil {
   430  		return nil
   431  	}
   432  
   433  	if err == storage.ErrObjectNotExist {
   434  		return fuse.ENOENT
   435  	}
   436  
   437  	// this error can be returned during list operation if the bucket does not exist
   438  	if err == storage.ErrBucketNotExist {
   439  		return syscall.ENXIO
   440  	}
   441  
   442  	if e, ok := err.(*googleapi.Error); ok {
   443  		switch e.Code {
   444  		case 409:
   445  			return fuse.EEXIST
   446  		case 404:
   447  			return fuse.ENOENT
   448  		// Retryable errors:
   449  		// https://cloud.google.com/storage/docs/json_api/v1/status-codes#429_Too_Many_Requests
   450  		// https://cloud.google.com/storage/docs/json_api/v1/status-codes#500_Internal_Server_Error
   451  		case 429, 500, 502, 503, 504:
   452  			return syscall.EAGAIN
   453  		default:
   454  			// return syscall error if it's not nil
   455  			fuseErr := mapHttpError(e.Code)
   456  			if fuseErr != nil {
   457  				return fuseErr
   458  			}
   459  		}
   460  	}
   461  
   462  	if e, ok := err.(net.Error); ok {
   463  		if e.Timeout() {
   464  			return syscall.ETIMEDOUT
   465  		}
   466  	}
   467  
   468  	return err
   469  }