github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/gs/adapter.go (about) 1 package gs 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "net/http" 9 "net/url" 10 "sort" 11 "strings" 12 "time" 13 14 "cloud.google.com/go/storage" 15 "github.com/treeverse/lakefs/pkg/block" 16 "github.com/treeverse/lakefs/pkg/logging" 17 "google.golang.org/api/iterator" 18 ) 19 20 const ( 21 MaxMultipartObjects = 10000 22 23 delimiter = "/" 24 partSuffix = ".part_" 25 markerSuffix = ".multipart" 26 ) 27 28 var ( 29 ErrMismatchPartETag = errors.New("mismatch part ETag") 30 ErrMismatchPartName = errors.New("mismatch part name") 31 ErrMaxMultipartObjects = errors.New("maximum multipart object reached") 32 ErrPartListMismatch = errors.New("multipart part list mismatch") 33 ErrMissingTargetAttrs = errors.New("missing target attributes") 34 ) 35 36 type Adapter struct { 37 client *storage.Client 38 preSignedExpiry time.Duration 39 disablePreSigned bool 40 disablePreSignedUI bool 41 } 42 43 func WithPreSignedExpiry(v time.Duration) func(a *Adapter) { 44 return func(a *Adapter) { 45 if v == 0 { 46 a.preSignedExpiry = block.DefaultPreSignExpiryDuration 47 } else { 48 a.preSignedExpiry = v 49 } 50 } 51 } 52 53 func WithDisablePreSigned(b bool) func(a *Adapter) { 54 return func(a *Adapter) { 55 if b { 56 a.disablePreSigned = true 57 } 58 } 59 } 60 61 func WithDisablePreSignedUI(b bool) func(a *Adapter) { 62 return func(a *Adapter) { 63 if b { 64 a.disablePreSignedUI = true 65 } 66 } 67 } 68 69 func NewAdapter(client *storage.Client, opts ...func(adapter *Adapter)) *Adapter { 70 a := &Adapter{ 71 client: client, 72 preSignedExpiry: block.DefaultPreSignExpiryDuration, 73 } 74 for _, opt := range opts { 75 opt(a) 76 } 77 return a 78 } 79 80 func (a *Adapter) log(ctx context.Context) logging.Logger { 81 return logging.FromContext(ctx) 82 } 83 84 func (a *Adapter) newPreSignedTime() time.Time { 85 return time.Now().UTC().Add(a.preSignedExpiry) 86 } 87 88 func (a *Adapter) Put(ctx context.Context, obj block.ObjectPointer, sizeBytes int64, reader io.Reader, _ block.PutOpts) error { 89 var err error 90 defer reportMetrics("Put", time.Now(), &sizeBytes, &err) 91 bucket, key, err := a.extractParamsFromObj(obj) 92 if err != nil { 93 return err 94 } 95 w := a.client.Bucket(bucket).Object(key).NewWriter(ctx) 96 _, err = io.Copy(w, reader) 97 if err != nil { 98 return fmt.Errorf("io.Copy: %w", err) 99 } 100 err = w.Close() 101 if err != nil { 102 return fmt.Errorf("writer.Close: %w", err) 103 } 104 return nil 105 } 106 107 func (a *Adapter) Get(ctx context.Context, obj block.ObjectPointer, _ int64) (io.ReadCloser, error) { 108 var err error 109 defer reportMetrics("Get", time.Now(), nil, &err) 110 bucket, key, err := a.extractParamsFromObj(obj) 111 if err != nil { 112 return nil, err 113 } 114 r, err := a.client.Bucket(bucket).Object(key).NewReader(ctx) 115 if isErrNotFound(err) { 116 return nil, block.ErrDataNotFound 117 } 118 if err != nil { 119 a.log(ctx).WithError(err).Errorf("failed to get object bucket %s key %s", bucket, key) 120 return nil, err 121 } 122 return r, nil 123 } 124 125 func (a *Adapter) GetWalker(uri *url.URL) (block.Walker, error) { 126 if err := block.ValidateStorageType(uri, block.StorageTypeGS); err != nil { 127 return nil, err 128 } 129 return NewGCSWalker(a.client), nil 130 } 131 132 func (a *Adapter) GetPreSignedURL(ctx context.Context, obj block.ObjectPointer, mode block.PreSignMode) (string, time.Time, error) { 133 if a.disablePreSigned { 134 return "", time.Time{}, block.ErrOperationNotSupported 135 } 136 137 var err error 138 defer reportMetrics("GetPreSignedURL", time.Now(), nil, &err) 139 140 bucket, key, err := a.extractParamsFromObj(obj) 141 if err != nil { 142 return "", time.Time{}, err 143 } 144 method := http.MethodGet 145 if mode == block.PreSignModeWrite { 146 method = http.MethodPut 147 } 148 opts := &storage.SignedURLOptions{ 149 Scheme: storage.SigningSchemeV4, 150 Method: method, 151 Expires: a.newPreSignedTime(), 152 } 153 k, err := a.client.Bucket(bucket).SignedURL(key, opts) 154 if err != nil { 155 a.log(ctx).WithError(err).Error("error generating pre-signed URL") 156 return "", time.Time{}, err 157 } 158 // TODO(#6347): Report expiry. 159 return k, time.Time{}, nil 160 } 161 162 func isErrNotFound(err error) bool { 163 return errors.Is(err, storage.ErrObjectNotExist) 164 } 165 166 func (a *Adapter) Exists(ctx context.Context, obj block.ObjectPointer) (bool, error) { 167 var err error 168 defer reportMetrics("Exists", time.Now(), nil, &err) 169 bucket, key, err := a.extractParamsFromObj(obj) 170 if err != nil { 171 return false, err 172 } 173 _, err = a.client.Bucket(bucket).Object(key).Attrs(ctx) 174 if isErrNotFound(err) { 175 return false, nil 176 } 177 if err != nil { 178 return false, err 179 } 180 return true, nil 181 } 182 183 func (a *Adapter) GetRange(ctx context.Context, obj block.ObjectPointer, startPosition int64, endPosition int64) (io.ReadCloser, error) { 184 var err error 185 defer reportMetrics("GetRange", time.Now(), nil, &err) 186 bucket, key, err := a.extractParamsFromObj(obj) 187 if err != nil { 188 return nil, err 189 } 190 r, err := a.client.Bucket(bucket).Object(key).NewRangeReader(ctx, startPosition, endPosition-startPosition+1) 191 if isErrNotFound(err) { 192 return nil, block.ErrDataNotFound 193 } 194 if err != nil { 195 a.log(ctx).WithError(err).Errorf("failed to get object bucket %s key %s", bucket, key) 196 return nil, err 197 } 198 return r, nil 199 } 200 201 func (a *Adapter) GetProperties(ctx context.Context, obj block.ObjectPointer) (block.Properties, error) { 202 var err error 203 defer reportMetrics("GetProperties", time.Now(), nil, &err) 204 var props block.Properties 205 bucket, key, err := a.extractParamsFromObj(obj) 206 if err != nil { 207 return props, err 208 } 209 _, err = a.client.Bucket(bucket).Object(key).Attrs(ctx) 210 if err != nil { 211 return props, err 212 } 213 return props, nil 214 } 215 216 func (a *Adapter) Remove(ctx context.Context, obj block.ObjectPointer) error { 217 var err error 218 defer reportMetrics("Remove", time.Now(), nil, &err) 219 bucket, key, err := a.extractParamsFromObj(obj) 220 if err != nil { 221 return err 222 } 223 err = a.client.Bucket(bucket).Object(key).Delete(ctx) 224 if err != nil { 225 return fmt.Errorf("Object(%q).Delete: %w", key, err) 226 } 227 return nil 228 } 229 230 func (a *Adapter) Copy(ctx context.Context, sourceObj, destinationObj block.ObjectPointer) error { 231 var err error 232 defer reportMetrics("Copy", time.Now(), nil, &err) 233 dstBucket, dstKey, err := a.extractParamsFromObj(destinationObj) 234 if err != nil { 235 return fmt.Errorf("resolve destination: %w", err) 236 } 237 srcBucket, srcKey, err := a.extractParamsFromObj(sourceObj) 238 if err != nil { 239 return fmt.Errorf("resolve source: %w", err) 240 } 241 destinationObjectHandle := a.client.Bucket(dstBucket).Object(dstKey) 242 sourceObjectHandle := a.client.Bucket(srcBucket).Object(srcKey) 243 _, err = destinationObjectHandle.CopierFrom(sourceObjectHandle).Run(ctx) 244 if err != nil { 245 return fmt.Errorf("copy: %w", err) 246 } 247 return nil 248 } 249 250 func (a *Adapter) CreateMultiPartUpload(ctx context.Context, obj block.ObjectPointer, _ *http.Request, _ block.CreateMultiPartUploadOpts) (*block.CreateMultiPartUploadResponse, error) { 251 var err error 252 defer reportMetrics("CreateMultiPartUpload", time.Now(), nil, &err) 253 bucket, uploadID, err := a.extractParamsFromObj(obj) 254 if err != nil { 255 return nil, err 256 } 257 // we keep a marker file to identify multipart in progress 258 objName := formatMultipartMarkerFilename(uploadID) 259 o := a.client.Bucket(bucket).Object(objName) 260 w := o.NewWriter(ctx) 261 _, err = io.WriteString(w, uploadID) 262 if err != nil { 263 return nil, fmt.Errorf("io.WriteString: %w", err) 264 } 265 err = w.Close() 266 if err != nil { 267 return nil, fmt.Errorf("writer.Close: %w", err) 268 } 269 // log information 270 a.log(ctx).WithFields(logging.Fields{ 271 "upload_id": uploadID, 272 "qualified_ns": bucket, 273 "qualified_key": uploadID, 274 "key": obj.Identifier, 275 }).Debug("created multipart upload") 276 return &block.CreateMultiPartUploadResponse{ 277 UploadID: uploadID, 278 }, nil 279 } 280 281 func (a *Adapter) UploadPart(ctx context.Context, obj block.ObjectPointer, sizeBytes int64, reader io.Reader, uploadID string, partNumber int) (*block.UploadPartResponse, error) { 282 var err error 283 defer reportMetrics("UploadPart", time.Now(), &sizeBytes, &err) 284 bucket, _, err := a.extractParamsFromObj(obj) 285 if err != nil { 286 return nil, err 287 } 288 objName := formatMultipartFilename(uploadID, partNumber) 289 o := a.client.Bucket(bucket).Object(objName) 290 w := o.NewWriter(ctx) 291 _, err = io.Copy(w, reader) 292 if err != nil { 293 return nil, fmt.Errorf("io.Copy: %w", err) 294 } 295 err = w.Close() 296 if err != nil { 297 return nil, fmt.Errorf("writer.Close: %w", err) 298 } 299 attrs, err := o.Attrs(ctx) 300 if err != nil { 301 return nil, fmt.Errorf("object.Attrs: %w", err) 302 } 303 return &block.UploadPartResponse{ 304 ETag: attrs.Etag, 305 }, nil 306 } 307 308 func (a *Adapter) UploadCopyPart(ctx context.Context, sourceObj, destinationObj block.ObjectPointer, uploadID string, partNumber int) (*block.UploadPartResponse, error) { 309 var err error 310 defer reportMetrics("UploadCopyPart", time.Now(), nil, &err) 311 bucket, _, err := a.extractParamsFromObj(destinationObj) 312 if err != nil { 313 return nil, err 314 } 315 objName := formatMultipartFilename(uploadID, partNumber) 316 o := a.client.Bucket(bucket).Object(objName) 317 318 srcBucket, srcKey, err := a.extractParamsFromObj(sourceObj) 319 if err != nil { 320 return nil, fmt.Errorf("resolve source: %w", err) 321 } 322 sourceObjectHandle := a.client.Bucket(srcBucket).Object(srcKey) 323 324 attrs, err := o.CopierFrom(sourceObjectHandle).Run(ctx) 325 if err != nil { 326 return nil, fmt.Errorf("CopierFrom: %w", err) 327 } 328 return &block.UploadPartResponse{ 329 ETag: attrs.Etag, 330 }, nil 331 } 332 333 func (a *Adapter) UploadCopyPartRange(ctx context.Context, sourceObj, destinationObj block.ObjectPointer, uploadID string, partNumber int, startPosition, endPosition int64) (*block.UploadPartResponse, error) { 334 var err error 335 defer reportMetrics("UploadCopyPartRange", time.Now(), nil, &err) 336 bucket, _, err := a.extractParamsFromObj(destinationObj) 337 if err != nil { 338 return nil, err 339 } 340 objName := formatMultipartFilename(uploadID, partNumber) 341 o := a.client.Bucket(bucket).Object(objName) 342 343 reader, err := a.GetRange(ctx, sourceObj, startPosition, endPosition) 344 if err != nil { 345 return nil, fmt.Errorf("GetRange: %w", err) 346 } 347 w := o.NewWriter(ctx) 348 _, err = io.Copy(w, reader) 349 if err != nil { 350 return nil, fmt.Errorf("copy: %w", err) 351 } 352 err = w.Close() 353 if err != nil { 354 _ = reader.Close() 355 return nil, fmt.Errorf("WriterClose: %w", err) 356 } 357 err = reader.Close() 358 if err != nil { 359 return nil, fmt.Errorf("ReaderClose: %w", err) 360 } 361 362 attrs, err := o.Attrs(ctx) 363 if err != nil { 364 return nil, fmt.Errorf("object.Attrs: %w", err) 365 } 366 return &block.UploadPartResponse{ 367 ETag: attrs.Etag, 368 }, nil 369 } 370 371 func (a *Adapter) AbortMultiPartUpload(ctx context.Context, obj block.ObjectPointer, uploadID string) error { 372 var err error 373 defer reportMetrics("AbortMultiPartUpload", time.Now(), nil, &err) 374 bucketName, _, err := a.extractParamsFromObj(obj) 375 if err != nil { 376 return err 377 } 378 bucket := a.client.Bucket(bucketName) 379 380 // delete all related files by listing the prefix 381 it := bucket.Objects(ctx, &storage.Query{ 382 Prefix: uploadID, 383 Delimiter: delimiter, 384 }) 385 for { 386 attrs, err := it.Next() 387 if errors.Is(err, iterator.Done) { 388 break 389 } 390 if err != nil { 391 return fmt.Errorf("bucket(%s).Objects(): %w", bucketName, err) 392 } 393 if err := bucket.Object(attrs.Name).Delete(ctx); err != nil { 394 return fmt.Errorf("bucket(%s).object(%s).Delete(): %w", bucketName, attrs.Name, err) 395 } 396 } 397 return nil 398 } 399 400 func (a *Adapter) CompleteMultiPartUpload(ctx context.Context, obj block.ObjectPointer, uploadID string, multipartList *block.MultipartUploadCompletion) (*block.CompleteMultiPartUploadResponse, error) { 401 var err error 402 defer reportMetrics("CompleteMultiPartUpload", time.Now(), nil, &err) 403 bucketName, key, err := a.extractParamsFromObj(obj) 404 if err != nil { 405 return nil, err 406 } 407 lg := a.log(ctx).WithFields(logging.Fields{ 408 "upload_id": uploadID, 409 "qualified_ns": bucketName, 410 "qualified_key": key, 411 "key": obj.Identifier, 412 }) 413 414 // list bucket parts and validate request match 415 bucketParts, err := a.listMultipartUploadParts(ctx, bucketName, uploadID) 416 if err != nil { 417 return nil, err 418 } 419 // validate bucketParts match the request multipartList 420 err = a.validateMultipartUploadParts(uploadID, multipartList, bucketParts) 421 if err != nil { 422 return nil, err 423 } 424 425 // prepare names 426 parts := make([]string, len(bucketParts)) 427 for i, part := range bucketParts { 428 parts[i] = part.Name 429 } 430 431 // compose target object 432 targetAttrs, err := a.composeMultipartUploadParts(ctx, bucketName, uploadID, parts) 433 if err != nil { 434 lg.WithError(err).Error("CompleteMultipartUpload failed") 435 return nil, err 436 } 437 438 // delete marker 439 bucket := a.client.Bucket(bucketName) 440 objMarker := bucket.Object(formatMultipartMarkerFilename(uploadID)) 441 if err := objMarker.Delete(ctx); err != nil { 442 a.log(ctx).WithError(err).Warn("Failed to delete multipart upload marker") 443 } 444 lg.Debug("completed multipart upload") 445 return &block.CompleteMultiPartUploadResponse{ 446 ETag: targetAttrs.Etag, 447 ContentLength: targetAttrs.Size, 448 }, nil 449 } 450 451 func (a *Adapter) validateMultipartUploadParts(uploadID string, multipartList *block.MultipartUploadCompletion, bucketParts []*storage.ObjectAttrs) error { 452 if len(multipartList.Part) != len(bucketParts) { 453 return ErrPartListMismatch 454 } 455 for i, p := range multipartList.Part { 456 objName := formatMultipartFilename(uploadID, p.PartNumber) 457 if objName != bucketParts[i].Name { 458 return fmt.Errorf("invalid part at position %d: %w", i, ErrMismatchPartName) 459 } 460 if p.ETag != bucketParts[i].Etag { 461 return fmt.Errorf("invalid part at position %d: %w", i, ErrMismatchPartETag) 462 } 463 } 464 return nil 465 } 466 467 func (a *Adapter) listMultipartUploadParts(ctx context.Context, bucketName string, uploadID string) ([]*storage.ObjectAttrs, error) { 468 bucket := a.client.Bucket(bucketName) 469 var bucketParts []*storage.ObjectAttrs 470 it := bucket.Objects(ctx, &storage.Query{ 471 Delimiter: delimiter, 472 Prefix: uploadID + partSuffix, 473 }) 474 for { 475 attrs, err := it.Next() 476 if errors.Is(err, iterator.Done) { 477 break 478 } 479 if err != nil { 480 return nil, fmt.Errorf("listing bucket '%s' upload '%s': %w", bucketName, uploadID, err) 481 } 482 bucketParts = append(bucketParts, attrs) 483 if len(bucketParts) > MaxMultipartObjects { 484 return nil, fmt.Errorf("listing bucket '%s' upload '%s': %w", bucketName, uploadID, ErrMaxMultipartObjects) 485 } 486 } 487 // sort by name - assume natual sort order 488 sort.Slice(bucketParts, func(i, j int) bool { 489 return bucketParts[i].Name < bucketParts[j].Name 490 }) 491 return bucketParts, nil 492 } 493 494 func (a *Adapter) composeMultipartUploadParts(ctx context.Context, bucketName string, uploadID string, parts []string) (*storage.ObjectAttrs, error) { 495 // compose target from all parts 496 bucket := a.client.Bucket(bucketName) 497 var targetAttrs *storage.ObjectAttrs 498 err := ComposeAll(uploadID, parts, func(target string, parts []string) error { 499 objs := make([]*storage.ObjectHandle, len(parts)) 500 for i := range parts { 501 objs[i] = bucket.Object(parts[i]) 502 } 503 // compose target from parts 504 attrs, err := bucket.Object(target).ComposerFrom(objs...).Run(ctx) 505 if err != nil { 506 return err 507 } 508 if target == uploadID { 509 targetAttrs = attrs 510 } 511 // delete parts 512 for _, o := range objs { 513 if err := o.Delete(ctx); err != nil { 514 a.log(ctx).WithError(err).WithFields(logging.Fields{ 515 "bucket": bucketName, 516 "parts": parts, 517 }).Warn("Failed to delete multipart upload part while compose") 518 } 519 } 520 return nil 521 }) 522 if err == nil && targetAttrs == nil { 523 return nil, ErrMissingTargetAttrs 524 } 525 if err != nil { 526 return nil, err 527 } 528 return targetAttrs, nil 529 } 530 531 func (a *Adapter) Close() error { 532 return a.client.Close() 533 } 534 535 func (a *Adapter) BlockstoreType() string { 536 return block.BlockstoreTypeGS 537 } 538 539 func (a *Adapter) GetStorageNamespaceInfo() block.StorageNamespaceInfo { 540 info := block.DefaultStorageNamespaceInfo(block.BlockstoreTypeGS) 541 if a.disablePreSigned { 542 info.PreSignSupport = false 543 } 544 if !(a.disablePreSignedUI || a.disablePreSigned) { 545 info.PreSignSupportUI = true 546 } 547 return info 548 } 549 550 func (a *Adapter) extractParamsFromObj(obj block.ObjectPointer) (string, string, error) { 551 qk, err := a.ResolveNamespace(obj.StorageNamespace, obj.Identifier, obj.IdentifierType) 552 if err != nil { 553 return "", "", err 554 } 555 bucket, prefix, _ := strings.Cut(qk.GetStorageNamespace(), "/") 556 key := qk.GetKey() 557 if len(prefix) > 0 { // Avoid situations where prefix is empty or "/" 558 key = prefix + "/" + key 559 } 560 return bucket, key, nil 561 } 562 563 func (a *Adapter) ResolveNamespace(storageNamespace, key string, identifierType block.IdentifierType) (block.QualifiedKey, error) { 564 qualifiedKey, err := block.DefaultResolveNamespace(storageNamespace, key, identifierType) 565 if err != nil { 566 return qualifiedKey, err 567 } 568 if qualifiedKey.GetStorageType() != block.StorageTypeGS { 569 return qualifiedKey, fmt.Errorf("expected storage type gs: %w", block.ErrInvalidAddress) 570 } 571 return qualifiedKey, nil 572 } 573 574 func (a *Adapter) RuntimeStats() map[string]string { 575 return nil 576 } 577 578 func formatMultipartFilename(uploadID string, partNumber int) string { 579 // keep natural sort order with zero padding 580 return fmt.Sprintf("%s"+partSuffix+"%05d", uploadID, partNumber) 581 } 582 583 func formatMultipartMarkerFilename(uploadID string) string { 584 return uploadID + markerSuffix 585 } 586 587 func (a *Adapter) GetPresignUploadPartURL(_ context.Context, _ block.ObjectPointer, _ string, _ int) (string, error) { 588 return "", block.ErrOperationNotSupported 589 } 590 591 func (a *Adapter) ListParts(_ context.Context, _ block.ObjectPointer, _ string, _ block.ListPartsOpts) (*block.ListPartsResponse, error) { 592 return nil, block.ErrOperationNotSupported 593 }