github.com/adrianjagielak/goofys@v0.24.1-0.20230810095418-94919a5d2254/internal/backend_gcs.go (about) 1 package internal 2 3 import ( 4 "github.com/kahing/goofys/api/common" 5 6 "bytes" 7 "context" 8 "fmt" 9 "io" 10 "net" 11 "path" 12 "strings" 13 "syscall" 14 15 "cloud.google.com/go/storage" 16 "github.com/jacobsa/fuse" 17 18 "golang.org/x/sync/errgroup" 19 syncsem "golang.org/x/sync/semaphore" 20 21 "google.golang.org/api/googleapi" 22 "google.golang.org/api/iterator" 23 "google.golang.org/api/option" 24 ) 25 26 type GCSBackend struct { 27 bucketName string 28 config *common.GCSConfig // stores user and bucket configuration 29 cap Capabilities 30 bucket *storage.BucketHandle // provides set of methods to operate on a bucket 31 logger *common.LogHandle // logger for GCS backend 32 } 33 34 const ( 35 maxListKeys int = 1000 // the max limit for number of elements during listObjects 36 ) 37 38 type GCSMultipartBlobCommitInput struct { 39 cancel context.CancelFunc // useful to abort a multipart upload in GCS 40 writer *storage.Writer // used to emulate mpu under GCS, which currently used a single gcsWriter 41 } 42 43 // NewGCS initializes a GCS Backend. 44 // It creates an authenticated client or unauthenticated client based on existing credentials in the environment. 45 func NewGCS(bucket string, config *common.GCSConfig) (*GCSBackend, error) { 46 var client *storage.Client 47 var err error 48 49 // TODO: storage.NewClient has automated mechanisms to set up credentials together with HTTP settings. 50 // Currently, we are using config.Credentials only to differentiate between creating an authenticated or 51 // unauthenticated client not using it to initialize a client. 52 53 // If config.Credentials are configured, we'll get an authenticated client. 54 if config.Credentials != nil { 55 client, err = storage.NewClient(context.Background()) 56 } else { 57 // otherwise we will get an unauthenticated client. option.WithoutAuthentication() is necessary 58 // because the API will generate an error if it could not find credentials and this option is unset. 59 client, err = storage.NewClient(context.Background(), option.WithoutAuthentication()) 60 } 61 62 if err != nil { 63 return nil, err 64 } 65 66 return &GCSBackend{ 67 config: config, 68 bucketName: bucket, 69 bucket: client.Bucket(bucket), 70 cap: Capabilities{ 71 MaxMultipartSize: 5 * 1024 * 1024 * 1024, 72 Name: "gcs", 73 // parallel multipart upload is not supported in GCS 74 NoParallelMultipart: true, 75 }, 76 logger: common.GetLogger("gcs"), 77 }, nil 78 } 79 80 // Init checks user's access to bucket. 81 func (g *GCSBackend) Init(key string) error { 82 // We will do a successful mount if the user can list on the bucket. 83 // This is different other backends because GCS does not differentiate between object not found and 84 // bucket not found. 85 prefix, _ := path.Split(key) 86 _, err := g.ListBlobs(&ListBlobsInput{ 87 MaxKeys: PUInt32(1), 88 Prefix: PString(prefix), 89 }) 90 g.logger.Debugf("INIT GCS: ListStatus = %s", getDebugResponseStatus(err)) 91 if err == syscall.ENXIO { 92 return fmt.Errorf("bucket %v does not exist", g.bucketName) 93 } 94 // Errors can be returned directly since ListBlobs converts them to syscall errors. 95 return err 96 } 97 98 func (g *GCSBackend) Capabilities() *Capabilities { 99 return &g.cap 100 } 101 102 // Bucket returns the GCSBackend's bucket name. 103 func (g *GCSBackend) Bucket() string { 104 return g.bucketName 105 } 106 107 func getDebugResponseStatus(err error) string { 108 if err != nil { 109 return fmt.Sprintf("ERROR: %v", err) 110 } 111 return "SUCCESS" 112 } 113 114 // HeadBlob gets the file object metadata. 115 func (g *GCSBackend) HeadBlob(param *HeadBlobInput) (*HeadBlobOutput, error) { 116 attrs, err := g.bucket.Object(param.Key).Attrs(context.Background()) 117 g.logger.Debugf("HEAD %v = %v", param.Key, getDebugResponseStatus(err)) 118 if err != nil { 119 return nil, mapGCSError(err) 120 } 121 122 return &HeadBlobOutput{ 123 BlobItemOutput: BlobItemOutput{ 124 Key: &attrs.Name, 125 ETag: &attrs.Etag, 126 LastModified: &attrs.Updated, 127 Size: uint64(attrs.Size), 128 StorageClass: &attrs.StorageClass, 129 }, 130 ContentType: &attrs.ContentType, 131 IsDirBlob: strings.HasSuffix(param.Key, "/"), 132 Metadata: PMetadata(attrs.Metadata), 133 }, nil 134 } 135 136 func (g *GCSBackend) ListBlobs(param *ListBlobsInput) (*ListBlobsOutput, error) { 137 query := storage.Query{ 138 Prefix: NilStr(param.Prefix), 139 Delimiter: NilStr(param.Delimiter), 140 StartOffset: NilStr(param.StartAfter), 141 } 142 objectIterator := g.bucket.Objects(context.Background(), &query) 143 144 // Set max keys, a number > 0 is required by the SDK. 145 maxKeys := int(NilUint32(param.MaxKeys)) 146 if maxKeys == 0 { 147 maxKeys = maxListKeys // follow the default JSON API mechanism to return 1000 items if maxKeys is not set. 148 } 149 150 pager := iterator.NewPager(objectIterator, maxKeys, NilStr(param.ContinuationToken)) 151 152 var entries []*storage.ObjectAttrs 153 nextToken, err := pager.NextPage(&entries) 154 g.logger.Debugf("LIST %s : %s", param, getDebugResponseStatus(err)) 155 if err != nil { 156 return nil, mapGCSError(err) 157 } 158 159 var nextContToken *string 160 if nextToken != "" { 161 nextContToken = &nextToken 162 } 163 164 var prefixes []BlobPrefixOutput 165 var items []BlobItemOutput 166 for _, entry := range entries { 167 // if blob is a prefix, then Prefix field will be set 168 if entry.Prefix != "" { 169 prefixes = append(prefixes, BlobPrefixOutput{&entry.Prefix}) 170 } else if entry.Name != "" { // otherwise for actual blob, Name field will set 171 items = append(items, BlobItemOutput{ 172 Key: &entry.Name, 173 ETag: &entry.Etag, 174 LastModified: &entry.Updated, 175 Size: uint64(entry.Size), 176 StorageClass: &entry.StorageClass, 177 }) 178 } else { 179 log.Errorf("LIST Unknown object: %v", entry) 180 } 181 } 182 183 return &ListBlobsOutput{ 184 Prefixes: prefixes, 185 Items: items, 186 NextContinuationToken: nextContToken, 187 IsTruncated: nextContToken != nil, 188 }, nil 189 } 190 191 func (g *GCSBackend) DeleteBlob(param *DeleteBlobInput) (*DeleteBlobOutput, error) { 192 err := g.bucket.Object(param.Key).Delete(context.Background()) 193 194 g.logger.Debugf("DELETE Object %v = %s ", param.Key, getDebugResponseStatus(err)) 195 if err != nil { 196 return nil, mapGCSError(err) 197 } 198 199 return &DeleteBlobOutput{}, nil 200 } 201 202 // DeleteBlobs deletes multiple GCS blobs. 203 func (g *GCSBackend) DeleteBlobs(param *DeleteBlobsInput) (*DeleteBlobsOutput, error) { 204 // The go sdk does not support batch requests: https://issuetracker.google.com/issues/142641783 205 // So we're using goroutines and errorgroup to delete multiple objects 206 eg, rootCtx := errgroup.WithContext(context.Background()) 207 sem := syncsem.NewWeighted(100) 208 209 for _, item := range param.Items { 210 if err := sem.Acquire(rootCtx, 1); err != nil { 211 return nil, err 212 } 213 curItem := item 214 eg.Go(func() error { 215 defer sem.Release(1) 216 return g.bucket.Object(curItem).Delete(rootCtx) 217 }) 218 } 219 220 if err := eg.Wait(); err != nil { 221 return nil, mapGCSError(err) 222 } 223 224 return &DeleteBlobsOutput{}, nil 225 } 226 227 // RenameBlob is not supported for GCS backend. So Goofys will do a CopyBlob followed by DeleteBlob for renames. 228 func (g *GCSBackend) RenameBlob(param *RenameBlobInput) (*RenameBlobOutput, error) { 229 return nil, syscall.ENOTSUP 230 } 231 232 // CopyBlob copies a source object to another destination object under the same bucket. 233 func (g *GCSBackend) CopyBlob(param *CopyBlobInput) (*CopyBlobOutput, error) { 234 src := g.bucket.Object(param.Source) 235 dest := g.bucket.Object(param.Destination) 236 237 copier := dest.CopierFrom(src) 238 copier.StorageClass = NilStr(param.StorageClass) 239 copier.Etag = NilStr(param.ETag) 240 copier.Metadata = NilMetadata(param.Metadata) 241 242 _, err := copier.Run(context.Background()) 243 g.logger.Debugf("Copy object %s = %s ", param, getDebugResponseStatus(err)) 244 if err != nil { 245 return nil, mapGCSError(err) 246 } 247 248 return &CopyBlobOutput{}, nil 249 } 250 251 // GetBlob returns a file reader for a GCS object. 252 func (g *GCSBackend) GetBlob(param *GetBlobInput) (*GetBlobOutput, error) { 253 obj := g.bucket.Object(param.Key).ReadCompressed(true) 254 255 var reader *storage.Reader 256 var err error 257 if param.Count != 0 { 258 reader, err = obj.NewRangeReader(context.Background(), int64(param.Start), int64(param.Count)) 259 } else if param.Start != 0 { 260 reader, err = obj.NewRangeReader(context.Background(), int64(param.Start), -1) 261 } else { 262 // If we don't limit the range, the full object will be read 263 reader, err = obj.NewReader(context.Background()) 264 } 265 266 g.logger.Debugf("GET Blob %s = %v", param, getDebugResponseStatus(err)) 267 if err != nil { 268 return nil, mapGCSError(err) 269 } 270 271 // Caveats: the SDK's reader object doesn't provide ETag, StorageClass, and Metadata attributes within a single 272 // API call, hence we're not returning these information in the output. 273 // Relevant GitHub issue: https://github.com/googleapis/google-cloud-go/issues/2740 274 return &GetBlobOutput{ 275 HeadBlobOutput: HeadBlobOutput{ 276 BlobItemOutput: BlobItemOutput{ 277 Key: PString(param.Key), 278 LastModified: &reader.Attrs.LastModified, 279 Size: uint64(reader.Attrs.Size), 280 }, 281 ContentType: &reader.Attrs.ContentType, 282 }, 283 Body: reader, 284 }, nil 285 } 286 287 // PutBlob writes a file to GCS. 288 func (g *GCSBackend) PutBlob(param *PutBlobInput) (*PutBlobOutput, error) { 289 // Handle nil pointer error when param.Body is nil 290 body := param.Body 291 if body == nil { 292 body = bytes.NewReader([]byte("")) 293 } 294 295 writer := g.bucket.Object(param.Key).NewWriter(context.Background()) 296 writer.ContentType = NilStr(param.ContentType) 297 writer.Metadata = NilMetadata(param.Metadata) 298 // setting chunkSize to be equal to the file size will make this a single request upload 299 writer.ChunkSize = int(NilUint64(param.Size)) 300 301 _, err := io.Copy(writer, body) 302 g.logger.Debugf("PUT Blob (to writer) %s = %s ", param, getDebugResponseStatus(err)) 303 if err != nil { 304 return nil, mapGCSError(err) 305 } 306 307 err = writer.Close() 308 g.logger.Debugf("PUT Blob (Flush) %v = %s ", param.Key, getDebugResponseStatus(err)) 309 if err != nil { 310 return nil, mapGCSError(err) 311 } 312 313 attrs := writer.Attrs() 314 315 return &PutBlobOutput{ 316 ETag: &attrs.Etag, 317 //LastModified: &attrs.Updated, // this field exist in the upstream open source goofys repo 318 StorageClass: &attrs.StorageClass, 319 }, nil 320 } 321 322 // MultipartBlobBegin begins a multi part blob request. 323 // Under GCS backend, we'll initialize the gcsWriter object and the context for the multipart blob request here. 324 func (g *GCSBackend) MultipartBlobBegin(param *MultipartBlobBeginInput) (*MultipartBlobCommitInput, error) { 325 ctx, cancel := context.WithCancel(context.Background()) 326 writer := g.bucket.Object(param.Key).NewWriter(ctx) 327 writer.ChunkSize = g.config.ChunkSize 328 writer.ContentType = NilStr(param.ContentType) 329 writer.Metadata = NilMetadata(param.Metadata) 330 331 g.logger.Debugf("Multipart Blob BEGIN: %s", param) 332 333 return &MultipartBlobCommitInput{ 334 Key: ¶m.Key, 335 Metadata: param.Metadata, 336 backendData: &GCSMultipartBlobCommitInput{ 337 writer: writer, 338 cancel: cancel, 339 }, 340 }, nil 341 } 342 343 // MultipartBlobAdd adds part of blob to the upload request. 344 // Under GCS backend, we'll write that blob part into the gcsWriter. 345 // TODO(deka): This is a temporary implementation to allow most tests to run. 346 // We might change this implementation in the future. 347 func (g *GCSBackend) MultipartBlobAdd(param *MultipartBlobAddInput) (*MultipartBlobAddOutput, error) { 348 commitData, ok := param.Commit.backendData.(*GCSMultipartBlobCommitInput) 349 if !ok { 350 panic("Incorrect commit data type") 351 } 352 353 // Handle nil pointer error when param.Body is nil 354 body := param.Body 355 if body == nil { 356 body = bytes.NewReader([]byte("")) 357 } 358 359 n, err := io.Copy(commitData.writer, body) 360 g.logger.Debugf("Multipart Blob ADD %s bytesWritten: %v = %s", param, n, getDebugResponseStatus(err)) 361 if err != nil { 362 commitData.cancel() 363 return nil, err 364 } 365 366 return &MultipartBlobAddOutput{}, nil 367 } 368 369 func (g *GCSBackend) MultipartBlobAbort(param *MultipartBlobCommitInput) (*MultipartBlobAbortOutput, error) { 370 commitData, ok := param.backendData.(*GCSMultipartBlobCommitInput) 371 if !ok { 372 panic("Incorrect commit data type") 373 } 374 g.logger.Debugf("Multipart Blob ABORT %v", param.Key) 375 commitData.cancel() 376 377 return &MultipartBlobAbortOutput{}, nil 378 } 379 380 func (g *GCSBackend) MultipartBlobCommit(param *MultipartBlobCommitInput) (*MultipartBlobCommitOutput, error) { 381 commitData, ok := param.backendData.(*GCSMultipartBlobCommitInput) 382 if !ok { 383 panic("Incorrect commit data type") 384 } 385 386 // Flushing a writer will make GCS to fully upload the buffer 387 err := commitData.writer.Close() 388 g.logger.Debugf("Multipart Blob COMMIT %v = %s ", param.Key, getDebugResponseStatus(err)) 389 if err != nil { 390 commitData.cancel() 391 return nil, mapGCSError(err) 392 } 393 attrs := commitData.writer.Attrs() 394 395 return &MultipartBlobCommitOutput{ 396 ETag: &attrs.Etag, 397 }, nil 398 } 399 400 func (g *GCSBackend) MultipartExpire(param *MultipartExpireInput) (*MultipartExpireOutput, error) { 401 // No-op: GCS expires a resumable session after 7 days automatically 402 return &MultipartExpireOutput{}, nil 403 } 404 405 func (g *GCSBackend) RemoveBucket(param *RemoveBucketInput) (*RemoveBucketOutput, error) { 406 err := g.bucket.Delete(context.Background()) 407 if err != nil { 408 return nil, mapGCSError(err) 409 } 410 return &RemoveBucketOutput{}, nil 411 } 412 413 func (g *GCSBackend) MakeBucket(param *MakeBucketInput) (*MakeBucketOutput, error) { 414 // Requires an authenticated credentials 415 err := g.bucket.Create(context.Background(), g.config.Credentials.ProjectID, nil) 416 if err != nil { 417 return nil, mapGCSError(err) 418 } 419 420 return &MakeBucketOutput{}, nil 421 } 422 423 func (g *GCSBackend) Delegate() interface{} { 424 return g 425 } 426 427 // mapGCSError maps an error to syscall / fuse errors. 428 func mapGCSError(err error) error { 429 if err == nil { 430 return nil 431 } 432 433 if err == storage.ErrObjectNotExist { 434 return fuse.ENOENT 435 } 436 437 // this error can be returned during list operation if the bucket does not exist 438 if err == storage.ErrBucketNotExist { 439 return syscall.ENXIO 440 } 441 442 if e, ok := err.(*googleapi.Error); ok { 443 switch e.Code { 444 case 409: 445 return fuse.EEXIST 446 case 404: 447 return fuse.ENOENT 448 // Retryable errors: 449 // https://cloud.google.com/storage/docs/json_api/v1/status-codes#429_Too_Many_Requests 450 // https://cloud.google.com/storage/docs/json_api/v1/status-codes#500_Internal_Server_Error 451 case 429, 500, 502, 503, 504: 452 return syscall.EAGAIN 453 default: 454 // return syscall error if it's not nil 455 fuseErr := mapHttpError(e.Code) 456 if fuseErr != nil { 457 return fuseErr 458 } 459 } 460 } 461 462 if e, ok := err.(net.Error); ok { 463 if e.Timeout() { 464 return syscall.ETIMEDOUT 465 } 466 } 467 468 return err 469 }