storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/bucket-replication.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "context" 21 "fmt" 22 "net/http" 23 "reflect" 24 "strings" 25 "sync" 26 "time" 27 28 minio "github.com/minio/minio-go/v7" 29 miniogo "github.com/minio/minio-go/v7" 30 "github.com/minio/minio-go/v7/pkg/encrypt" 31 "github.com/minio/minio-go/v7/pkg/tags" 32 33 "storj.io/minio/cmd/crypto" 34 xhttp "storj.io/minio/cmd/http" 35 "storj.io/minio/cmd/logger" 36 "storj.io/minio/pkg/bucket/bandwidth" 37 "storj.io/minio/pkg/bucket/replication" 38 "storj.io/minio/pkg/event" 39 iampolicy "storj.io/minio/pkg/iam/policy" 40 "storj.io/minio/pkg/madmin" 41 ) 42 43 // gets replication config associated to a given bucket name. 44 func getReplicationConfig(ctx context.Context, bucketName string) (rc *replication.Config, err error) { 45 if GlobalIsGateway { 46 objAPI := newObjectLayerFn() 47 if objAPI == nil { 48 return nil, errServerNotInitialized 49 } 50 51 return nil, BucketReplicationConfigNotFound{Bucket: bucketName} 52 } 53 54 return globalBucketMetadataSys.GetReplicationConfig(ctx, bucketName) 55 } 56 57 // validateReplicationDestination returns error if replication destination bucket missing or not configured 58 // It also returns true if replication destination is same as this server. 59 func validateReplicationDestination(ctx context.Context, bucket string, rCfg *replication.Config) (bool, error) { 60 arn, err := madmin.ParseARN(rCfg.RoleArn) 61 if err != nil { 62 return false, BucketRemoteArnInvalid{} 63 } 64 if arn.Type != madmin.ReplicationService { 65 return false, BucketRemoteArnTypeInvalid{} 66 } 67 clnt := globalBucketTargetSys.GetRemoteTargetClient(ctx, rCfg.RoleArn) 68 if clnt == nil { 69 return false, BucketRemoteTargetNotFound{Bucket: bucket} 70 } 71 if found, _ := clnt.BucketExists(ctx, rCfg.GetDestination().Bucket); !found { 72 return false, BucketRemoteDestinationNotFound{Bucket: rCfg.GetDestination().Bucket} 73 } 74 if ret, err := globalBucketObjectLockSys.Get(bucket); err == nil { 75 if ret.LockEnabled { 76 lock, _, _, _, err := clnt.GetObjectLockConfig(ctx, rCfg.GetDestination().Bucket) 77 if err != nil || lock != "Enabled" { 78 return false, BucketReplicationDestinationMissingLock{Bucket: rCfg.GetDestination().Bucket} 79 } 80 } 81 } 82 // validate replication ARN against target endpoint 83 c, ok := globalBucketTargetSys.arnRemotesMap[rCfg.RoleArn] 84 if ok { 85 if c.EndpointURL().String() == clnt.EndpointURL().String() { 86 sameTarget, _ := isLocalHost(clnt.EndpointURL().Hostname(), clnt.EndpointURL().Port(), globalMinioPort) 87 return sameTarget, nil 88 } 89 } 90 return false, BucketRemoteTargetNotFound{Bucket: bucket} 91 } 92 93 func mustReplicateWeb(ctx context.Context, r *http.Request, bucket, object string, meta map[string]string, replStatus string, permErr APIErrorCode) (replicate bool, sync bool) { 94 if permErr != ErrNone { 95 return 96 } 97 return mustReplicater(ctx, bucket, object, meta, replStatus) 98 } 99 100 // mustReplicate returns 2 booleans - true if object meets replication criteria and true if replication is to be done in 101 // a synchronous manner. 102 func mustReplicate(ctx context.Context, r *http.Request, bucket, object string, meta map[string]string, replStatus string) (replicate bool, sync bool) { 103 if s3Err := isPutActionAllowed(ctx, getRequestAuthType(r), bucket, "", r, iampolicy.GetReplicationConfigurationAction); s3Err != ErrNone { 104 return 105 } 106 return mustReplicater(ctx, bucket, object, meta, replStatus) 107 } 108 109 // mustReplicater returns 2 booleans - true if object meets replication criteria and true if replication is to be done in 110 // a synchronous manner. 111 func mustReplicater(ctx context.Context, bucket, object string, meta map[string]string, replStatus string) (replicate bool, sync bool) { 112 if GlobalIsGateway { 113 return replicate, sync 114 } 115 if rs, ok := meta[xhttp.AmzBucketReplicationStatus]; ok { 116 replStatus = rs 117 } 118 if replication.StatusType(replStatus) == replication.Replica { 119 return replicate, sync 120 } 121 cfg, err := getReplicationConfig(ctx, bucket) 122 if err != nil { 123 return replicate, sync 124 } 125 opts := replication.ObjectOpts{ 126 Name: object, 127 SSEC: crypto.SSEC.IsEncrypted(meta), 128 } 129 tagStr, ok := meta[xhttp.AmzObjectTagging] 130 if ok { 131 opts.UserTags = tagStr 132 } 133 tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, cfg.RoleArn) 134 // the target online status should not be used here while deciding 135 // whether to replicate as the target could be temporarily down 136 if tgt != nil { 137 return cfg.Replicate(opts), tgt.replicateSync 138 } 139 return cfg.Replicate(opts), false 140 } 141 142 // Standard headers that needs to be extracted from User metadata. 143 var standardHeaders = []string{ 144 xhttp.ContentType, 145 xhttp.CacheControl, 146 xhttp.ContentEncoding, 147 xhttp.ContentLanguage, 148 xhttp.ContentDisposition, 149 xhttp.AmzStorageClass, 150 xhttp.AmzObjectTagging, 151 xhttp.AmzBucketReplicationStatus, 152 xhttp.AmzObjectLockMode, 153 xhttp.AmzObjectLockRetainUntilDate, 154 xhttp.AmzObjectLockLegalHold, 155 xhttp.AmzTagCount, 156 xhttp.AmzServerSideEncryption, 157 } 158 159 // returns true if any of the objects being deleted qualifies for replication. 160 func hasReplicationRules(ctx context.Context, bucket string, objects []ObjectToDelete) bool { 161 c, err := getReplicationConfig(ctx, bucket) 162 if err != nil || c == nil { 163 return false 164 } 165 for _, obj := range objects { 166 if c.HasActiveRules(obj.ObjectName, true) { 167 return true 168 } 169 } 170 return false 171 } 172 173 // isStandardHeader returns true if header is a supported header and not a custom header 174 func isStandardHeader(matchHeaderKey string) bool { 175 return equals(matchHeaderKey, standardHeaders...) 176 } 177 178 // returns whether object version is a deletemarker and if object qualifies for replication 179 func checkReplicateDelete(ctx context.Context, bucket string, dobj ObjectToDelete, oi ObjectInfo, gerr error) (replicate, sync bool) { 180 rcfg, err := getReplicationConfig(ctx, bucket) 181 if err != nil || rcfg == nil { 182 return false, sync 183 } 184 opts := replication.ObjectOpts{ 185 Name: dobj.ObjectName, 186 SSEC: crypto.SSEC.IsEncrypted(oi.UserDefined), 187 UserTags: oi.UserTags, 188 DeleteMarker: oi.DeleteMarker, 189 VersionID: dobj.VersionID, 190 OpType: replication.DeleteReplicationType, 191 } 192 replicate = rcfg.Replicate(opts) 193 // when incoming delete is removal of a delete marker( a.k.a versioned delete), 194 // GetObjectInfo returns extra information even though it returns errFileNotFound 195 if gerr != nil { 196 validReplStatus := false 197 switch oi.ReplicationStatus { 198 case replication.Pending, replication.Completed, replication.Failed: 199 validReplStatus = true 200 } 201 if oi.DeleteMarker && (validReplStatus || replicate) { 202 return true, sync 203 } 204 // can be the case that other cluster is down and duplicate `mc rm --vid` 205 // is issued - this still needs to be replicated back to the other target 206 return oi.VersionPurgeStatus == Pending || oi.VersionPurgeStatus == Failed, sync 207 } 208 tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, rcfg.RoleArn) 209 // the target online status should not be used here while deciding 210 // whether to replicate deletes as the target could be temporarily down 211 if tgt == nil { 212 return false, false 213 } 214 return replicate, tgt.replicateSync 215 } 216 217 // replicate deletes to the designated replication target if replication configuration 218 // has delete marker replication or delete replication (MinIO extension to allow deletes where version id 219 // is specified) enabled. 220 // Similar to bucket replication for PUT operation, soft delete (a.k.a setting delete marker) and 221 // permanent deletes (by specifying a version ID in the delete operation) have three states "Pending", "Complete" 222 // and "Failed" to mark the status of the replication of "DELETE" operation. All failed operations can 223 // then be retried by healing. In the case of permanent deletes, until the replication is completed on the 224 // target cluster, the object version is marked deleted on the source and hidden from listing. It is permanently 225 // deleted from the source when the VersionPurgeStatus changes to "Complete", i.e after replication succeeds 226 // on target. 227 func replicateDelete(ctx context.Context, dobj DeletedObjectVersionInfo, objectAPI ObjectLayer) { 228 bucket := dobj.Bucket 229 versionID := dobj.DeleteMarkerVersionID 230 if versionID == "" { 231 versionID = dobj.VersionID 232 } 233 234 rcfg, err := getReplicationConfig(ctx, bucket) 235 if err != nil || rcfg == nil { 236 logger.LogIf(ctx, err) 237 sendEvent(eventArgs{ 238 BucketName: bucket, 239 Object: ObjectInfo{ 240 Bucket: bucket, 241 Name: dobj.ObjectName, 242 VersionID: versionID, 243 DeleteMarker: dobj.DeleteMarker, 244 }, 245 Host: "Internal: [Replication]", 246 EventName: event.ObjectReplicationNotTracked, 247 }) 248 return 249 } 250 251 tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, rcfg.RoleArn) 252 if tgt == nil { 253 logger.LogIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, rcfg.RoleArn)) 254 sendEvent(eventArgs{ 255 BucketName: bucket, 256 Object: ObjectInfo{ 257 Bucket: bucket, 258 Name: dobj.ObjectName, 259 VersionID: versionID, 260 DeleteMarker: dobj.DeleteMarker, 261 }, 262 Host: "Internal: [Replication]", 263 EventName: event.ObjectReplicationNotTracked, 264 }) 265 return 266 } 267 268 rmErr := tgt.RemoveObject(ctx, rcfg.GetDestination().Bucket, dobj.ObjectName, miniogo.RemoveObjectOptions{ 269 VersionID: versionID, 270 Internal: miniogo.AdvancedRemoveOptions{ 271 ReplicationDeleteMarker: dobj.DeleteMarkerVersionID != "", 272 ReplicationMTime: dobj.DeleteMarkerMTime.Time, 273 ReplicationStatus: miniogo.ReplicationStatusReplica, 274 ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside 275 }, 276 }) 277 278 replicationStatus := dobj.DeleteMarkerReplicationStatus 279 versionPurgeStatus := dobj.VersionPurgeStatus 280 281 if rmErr != nil { 282 if dobj.VersionID == "" { 283 replicationStatus = string(replication.Failed) 284 } else { 285 versionPurgeStatus = Failed 286 } 287 logger.LogIf(ctx, fmt.Errorf("Unable to replicate delete marker to %s/%s(%s): %s", rcfg.GetDestination().Bucket, dobj.ObjectName, versionID, rmErr)) 288 } else { 289 if dobj.VersionID == "" { 290 replicationStatus = string(replication.Completed) 291 } else { 292 versionPurgeStatus = Complete 293 } 294 } 295 prevStatus := dobj.DeleteMarkerReplicationStatus 296 currStatus := replicationStatus 297 if dobj.VersionID != "" { 298 prevStatus = string(dobj.VersionPurgeStatus) 299 currStatus = string(versionPurgeStatus) 300 } 301 // to decrement pending count later. 302 globalReplicationStats.Update(dobj.Bucket, 0, replication.StatusType(currStatus), replication.StatusType(prevStatus), replication.DeleteReplicationType) 303 304 var eventName = event.ObjectReplicationComplete 305 if replicationStatus == string(replication.Failed) || versionPurgeStatus == Failed { 306 eventName = event.ObjectReplicationFailed 307 } 308 309 // Update metadata on the delete marker or purge permanent delete if replication success. 310 dobjInfo, err := objectAPI.DeleteObject(ctx, bucket, dobj.ObjectName, ObjectOptions{ 311 VersionID: versionID, 312 DeleteMarkerReplicationStatus: replicationStatus, 313 VersionPurgeStatus: versionPurgeStatus, 314 Versioned: globalBucketVersioningSys.Enabled(bucket), 315 VersionSuspended: globalBucketVersioningSys.Suspended(bucket), 316 }) 317 if err != nil && !isErrVersionNotFound(err) { // VersionNotFound would be reported by pool that object version is missing on. 318 logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %s", bucket, dobj.ObjectName, versionID, err)) 319 sendEvent(eventArgs{ 320 BucketName: bucket, 321 Object: ObjectInfo{ 322 Bucket: bucket, 323 Name: dobj.ObjectName, 324 VersionID: versionID, 325 DeleteMarker: dobj.DeleteMarker, 326 }, 327 Host: "Internal: [Replication]", 328 EventName: eventName, 329 }) 330 } else { 331 sendEvent(eventArgs{ 332 BucketName: bucket, 333 Object: dobjInfo, 334 Host: "Internal: [Replication]", 335 EventName: eventName, 336 }) 337 } 338 } 339 340 func getCopyObjMetadata(oi ObjectInfo, dest replication.Destination) map[string]string { 341 meta := make(map[string]string, len(oi.UserDefined)) 342 for k, v := range oi.UserDefined { 343 if strings.HasPrefix(strings.ToLower(k), ReservedMetadataPrefixLower) { 344 continue 345 } 346 347 if equals(k, xhttp.AmzBucketReplicationStatus) { 348 continue 349 } 350 351 // https://github.com/google/security-research/security/advisories/GHSA-76wf-9vgp-pj7w 352 if equals(k, xhttp.AmzMetaUnencryptedContentLength, xhttp.AmzMetaUnencryptedContentMD5) { 353 continue 354 } 355 356 meta[k] = v 357 } 358 359 if oi.ContentEncoding != "" { 360 meta[xhttp.ContentEncoding] = oi.ContentEncoding 361 } 362 363 if oi.ContentType != "" { 364 meta[xhttp.ContentType] = oi.ContentType 365 } 366 367 if oi.UserTags != "" { 368 meta[xhttp.AmzObjectTagging] = oi.UserTags 369 meta[xhttp.AmzTagDirective] = "REPLACE" 370 } 371 372 sc := dest.StorageClass 373 if sc == "" { 374 sc = oi.StorageClass 375 } 376 if sc != "" { 377 meta[xhttp.AmzStorageClass] = sc 378 } 379 meta[xhttp.MinIOSourceETag] = oi.ETag 380 meta[xhttp.MinIOSourceMTime] = oi.ModTime.Format(time.RFC3339Nano) 381 meta[xhttp.AmzBucketReplicationStatus] = replication.Replica.String() 382 return meta 383 } 384 385 type caseInsensitiveMap map[string]string 386 387 // Lookup map entry case insensitively. 388 func (m caseInsensitiveMap) Lookup(key string) (string, bool) { 389 if len(m) == 0 { 390 return "", false 391 } 392 for _, k := range []string{ 393 key, 394 strings.ToLower(key), 395 http.CanonicalHeaderKey(key), 396 } { 397 v, ok := m[k] 398 if ok { 399 return v, ok 400 } 401 } 402 return "", false 403 } 404 405 func putReplicationOpts(ctx context.Context, dest replication.Destination, objInfo ObjectInfo) (putOpts miniogo.PutObjectOptions, err error) { 406 meta := make(map[string]string) 407 for k, v := range objInfo.UserDefined { 408 if strings.HasPrefix(strings.ToLower(k), ReservedMetadataPrefixLower) { 409 continue 410 } 411 if isStandardHeader(k) { 412 continue 413 } 414 meta[k] = v 415 } 416 417 sc := dest.StorageClass 418 if sc == "" { 419 sc = objInfo.StorageClass 420 } 421 putOpts = miniogo.PutObjectOptions{ 422 UserMetadata: meta, 423 ContentType: objInfo.ContentType, 424 ContentEncoding: objInfo.ContentEncoding, 425 StorageClass: sc, 426 Internal: miniogo.AdvancedPutOptions{ 427 SourceVersionID: objInfo.VersionID, 428 ReplicationStatus: miniogo.ReplicationStatusReplica, 429 SourceMTime: objInfo.ModTime, 430 SourceETag: objInfo.ETag, 431 ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside 432 }, 433 } 434 if objInfo.UserTags != "" { 435 tag, _ := tags.ParseObjectTags(objInfo.UserTags) 436 if tag != nil { 437 putOpts.UserTags = tag.ToMap() 438 } 439 } 440 441 lkMap := caseInsensitiveMap(objInfo.UserDefined) 442 if lang, ok := lkMap.Lookup(xhttp.ContentLanguage); ok { 443 putOpts.ContentLanguage = lang 444 } 445 if disp, ok := lkMap.Lookup(xhttp.ContentDisposition); ok { 446 putOpts.ContentDisposition = disp 447 } 448 if cc, ok := lkMap.Lookup(xhttp.CacheControl); ok { 449 putOpts.CacheControl = cc 450 } 451 if mode, ok := lkMap.Lookup(xhttp.AmzObjectLockMode); ok { 452 rmode := miniogo.RetentionMode(mode) 453 putOpts.Mode = rmode 454 } 455 if retainDateStr, ok := lkMap.Lookup(xhttp.AmzObjectLockRetainUntilDate); ok { 456 rdate, err := time.Parse(time.RFC3339, retainDateStr) 457 if err != nil { 458 return putOpts, err 459 } 460 putOpts.RetainUntilDate = rdate 461 } 462 if lhold, ok := lkMap.Lookup(xhttp.AmzObjectLockLegalHold); ok { 463 putOpts.LegalHold = miniogo.LegalHoldStatus(lhold) 464 } 465 if crypto.S3.IsEncrypted(objInfo.UserDefined) { 466 putOpts.ServerSideEncryption = encrypt.NewSSE() 467 } 468 return 469 } 470 471 type replicationAction string 472 473 const ( 474 replicateMetadata replicationAction = "metadata" 475 replicateNone replicationAction = "none" 476 replicateAll replicationAction = "all" 477 ) 478 479 // matches k1 with all keys, returns 'true' if one of them matches 480 func equals(k1 string, keys ...string) bool { 481 for _, k2 := range keys { 482 if strings.ToLower(k1) == strings.ToLower(k2) { 483 return true 484 } 485 } 486 return false 487 } 488 489 // returns replicationAction by comparing metadata between source and target 490 func getReplicationAction(oi1 ObjectInfo, oi2 minio.ObjectInfo) replicationAction { 491 // needs full replication 492 if oi1.ETag != oi2.ETag || 493 oi1.VersionID != oi2.VersionID || 494 oi1.Size != oi2.Size || 495 oi1.DeleteMarker != oi2.IsDeleteMarker || 496 oi1.ModTime.Unix() != oi2.LastModified.Unix() { 497 return replicateAll 498 } 499 500 if oi1.ContentType != oi2.ContentType { 501 return replicateMetadata 502 } 503 504 if oi1.ContentEncoding != "" { 505 enc, ok := oi2.Metadata[xhttp.ContentEncoding] 506 if !ok { 507 enc, ok = oi2.Metadata[strings.ToLower(xhttp.ContentEncoding)] 508 if !ok { 509 return replicateMetadata 510 } 511 } 512 if strings.Join(enc, ",") != oi1.ContentEncoding { 513 return replicateMetadata 514 } 515 } 516 517 t, _ := tags.ParseObjectTags(oi1.UserTags) 518 if !reflect.DeepEqual(oi2.UserTags, t.ToMap()) { 519 return replicateMetadata 520 } 521 522 // Compare only necessary headers 523 compareKeys := []string{ 524 "Expires", 525 "Cache-Control", 526 "Content-Language", 527 "Content-Disposition", 528 "X-Amz-Object-Lock-Mode", 529 "X-Amz-Object-Lock-Retain-Until-Date", 530 "X-Amz-Object-Lock-Legal-Hold", 531 "X-Amz-Website-Redirect-Location", 532 "X-Amz-Meta-", 533 } 534 535 // compare metadata on both maps to see if meta is identical 536 compareMeta1 := make(map[string]string) 537 for k, v := range oi1.UserDefined { 538 var found bool 539 for _, prefix := range compareKeys { 540 if !strings.HasPrefix(strings.ToLower(k), strings.ToLower(prefix)) { 541 continue 542 } 543 found = true 544 break 545 } 546 if found { 547 compareMeta1[strings.ToLower(k)] = v 548 } 549 } 550 551 compareMeta2 := make(map[string]string) 552 for k, v := range oi2.Metadata { 553 var found bool 554 for _, prefix := range compareKeys { 555 if !strings.HasPrefix(strings.ToLower(k), strings.ToLower(prefix)) { 556 continue 557 } 558 found = true 559 break 560 } 561 if found { 562 compareMeta2[strings.ToLower(k)] = strings.Join(v, ",") 563 } 564 } 565 566 if !reflect.DeepEqual(compareMeta1, compareMeta2) { 567 return replicateMetadata 568 } 569 570 return replicateNone 571 } 572 573 // replicateObject replicates the specified version of the object to destination bucket 574 // The source object is then updated to reflect the replication status. 575 func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI ObjectLayer) { 576 objInfo := ri.ObjectInfo 577 bucket := objInfo.Bucket 578 object := objInfo.Name 579 580 cfg, err := getReplicationConfig(ctx, bucket) 581 if err != nil { 582 logger.LogIf(ctx, err) 583 sendEvent(eventArgs{ 584 EventName: event.ObjectReplicationNotTracked, 585 BucketName: bucket, 586 Object: objInfo, 587 Host: "Internal: [Replication]", 588 }) 589 return 590 } 591 tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, cfg.RoleArn) 592 if tgt == nil { 593 logger.LogIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, cfg.RoleArn)) 594 sendEvent(eventArgs{ 595 EventName: event.ObjectReplicationNotTracked, 596 BucketName: bucket, 597 Object: objInfo, 598 Host: "Internal: [Replication]", 599 }) 600 return 601 } 602 gr, err := objectAPI.GetObjectNInfo(ctx, bucket, object, nil, http.Header{}, writeLock, ObjectOptions{ 603 VersionID: objInfo.VersionID, 604 }) 605 if err != nil { 606 sendEvent(eventArgs{ 607 EventName: event.ObjectReplicationNotTracked, 608 BucketName: bucket, 609 Object: objInfo, 610 Host: "Internal: [Replication]", 611 }) 612 logger.LogIf(ctx, fmt.Errorf("Unable to update replicate for %s/%s(%s): %w", bucket, object, objInfo.VersionID, err)) 613 return 614 } 615 defer gr.Close() // hold write lock for entire transaction 616 617 objInfo = gr.ObjInfo 618 size, err := objInfo.GetActualSize() 619 if err != nil { 620 logger.LogIf(ctx, err) 621 sendEvent(eventArgs{ 622 EventName: event.ObjectReplicationNotTracked, 623 BucketName: bucket, 624 Object: objInfo, 625 Host: "Internal: [Replication]", 626 }) 627 return 628 } 629 630 dest := cfg.GetDestination() 631 if dest.Bucket == "" { 632 logger.LogIf(ctx, fmt.Errorf("Unable to replicate object %s(%s), bucket is empty", objInfo.Name, objInfo.VersionID)) 633 sendEvent(eventArgs{ 634 EventName: event.ObjectReplicationNotTracked, 635 BucketName: bucket, 636 Object: objInfo, 637 Host: "Internal: [Replication]", 638 }) 639 return 640 } 641 642 rtype := replicateAll 643 oi, err := tgt.StatObject(ctx, dest.Bucket, object, miniogo.StatObjectOptions{ 644 VersionID: objInfo.VersionID, 645 Internal: miniogo.AdvancedGetOptions{ 646 ReplicationProxyRequest: "false", 647 }}) 648 if err == nil { 649 rtype = getReplicationAction(objInfo, oi) 650 if rtype == replicateNone { 651 // object with same VersionID already exists, replication kicked off by 652 // PutObject might have completed 653 return 654 } 655 } 656 replicationStatus := replication.Completed 657 // use core client to avoid doing multipart on PUT 658 c := &miniogo.Core{Client: tgt.Client} 659 if rtype != replicateAll { 660 // replicate metadata for object tagging/copy with metadata replacement 661 srcOpts := miniogo.CopySrcOptions{ 662 Bucket: dest.Bucket, 663 Object: object, 664 VersionID: objInfo.VersionID, 665 } 666 dstOpts := miniogo.PutObjectOptions{ 667 Internal: miniogo.AdvancedPutOptions{ 668 SourceVersionID: objInfo.VersionID, 669 ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside 670 }} 671 if _, err = c.CopyObject(ctx, dest.Bucket, object, dest.Bucket, object, getCopyObjMetadata(objInfo, dest), srcOpts, dstOpts); err != nil { 672 replicationStatus = replication.Failed 673 logger.LogIf(ctx, fmt.Errorf("Unable to replicate metadata for object %s/%s(%s): %s", bucket, objInfo.Name, objInfo.VersionID, err)) 674 } 675 } else { 676 target, err := globalBucketMetadataSys.GetBucketTarget(bucket, cfg.RoleArn) 677 if err != nil { 678 logger.LogIf(ctx, fmt.Errorf("failed to get target for replication bucket:%s cfg:%s err:%s", bucket, cfg.RoleArn, err)) 679 sendEvent(eventArgs{ 680 EventName: event.ObjectReplicationNotTracked, 681 BucketName: bucket, 682 Object: objInfo, 683 Host: "Internal: [Replication]", 684 }) 685 return 686 } 687 688 putOpts, err := putReplicationOpts(ctx, dest, objInfo) 689 if err != nil { 690 logger.LogIf(ctx, fmt.Errorf("failed to get target for replication bucket:%s cfg:%s err:%w", bucket, cfg.RoleArn, err)) 691 sendEvent(eventArgs{ 692 EventName: event.ObjectReplicationNotTracked, 693 BucketName: bucket, 694 Object: objInfo, 695 Host: "Internal: [Replication]", 696 }) 697 return 698 } 699 700 // Setup bandwidth throttling 701 peers, _ := globalEndpoints.peers() 702 totalNodesCount := len(peers) 703 if totalNodesCount == 0 { 704 totalNodesCount = 1 // For standalone erasure coding 705 } 706 707 var headerSize int 708 for k, v := range putOpts.Header() { 709 headerSize += len(k) + len(v) 710 } 711 712 opts := &bandwidth.MonitorReaderOptions{ 713 Bucket: objInfo.Bucket, 714 Object: objInfo.Name, 715 HeaderSize: headerSize, 716 BandwidthBytesPerSec: target.BandwidthLimit / int64(totalNodesCount), 717 ClusterBandwidth: target.BandwidthLimit, 718 } 719 720 r := bandwidth.NewMonitoredReader(ctx, globalBucketMonitor, gr, opts) 721 if _, err = c.PutObject(ctx, dest.Bucket, object, r, size, "", "", putOpts); err != nil { 722 replicationStatus = replication.Failed 723 logger.LogIf(ctx, fmt.Errorf("Unable to replicate for object %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err)) 724 } 725 } 726 727 prevReplStatus := objInfo.ReplicationStatus 728 objInfo.UserDefined[xhttp.AmzBucketReplicationStatus] = replicationStatus.String() 729 if objInfo.UserTags != "" { 730 objInfo.UserDefined[xhttp.AmzObjectTagging] = objInfo.UserTags 731 } 732 733 // FIXME: add support for missing replication events 734 // - event.ObjectReplicationMissedThreshold 735 // - event.ObjectReplicationReplicatedAfterThreshold 736 var eventName = event.ObjectReplicationComplete 737 if replicationStatus == replication.Failed { 738 eventName = event.ObjectReplicationFailed 739 } 740 741 z, ok := objectAPI.(*erasureServerPools) 742 if !ok { 743 return 744 } 745 // Leave metadata in `PENDING` state if inline replication fails to save iops 746 if ri.OpType == replication.HealReplicationType || replicationStatus == replication.Completed { 747 // This lower level implementation is necessary to avoid write locks from CopyObject. 748 poolIdx, err := z.getPoolIdx(ctx, bucket, object, objInfo.Size) 749 if err != nil { 750 logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err)) 751 } else { 752 fi := FileInfo{} 753 fi.VersionID = objInfo.VersionID 754 fi.Metadata = make(map[string]string, len(objInfo.UserDefined)) 755 for k, v := range objInfo.UserDefined { 756 fi.Metadata[k] = v 757 } 758 if err = z.serverPools[poolIdx].getHashedSet(object).updateObjectMeta(ctx, bucket, object, fi); err != nil { 759 logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err)) 760 } 761 } 762 opType := replication.MetadataReplicationType 763 if rtype == replicateAll { 764 opType = replication.ObjectReplicationType 765 } 766 globalReplicationStats.Update(bucket, size, replicationStatus, prevReplStatus, opType) 767 sendEvent(eventArgs{ 768 EventName: eventName, 769 BucketName: bucket, 770 Object: objInfo, 771 Host: "Internal: [Replication]", 772 }) 773 } 774 // re-queue failures once more - keep a retry count to avoid flooding the queue if 775 // the target site is down. Leave it to scanner to catch up instead. 776 if replicationStatus == replication.Failed && ri.RetryCount < 1 { 777 ri.OpType = replication.HealReplicationType 778 ri.RetryCount++ 779 globalReplicationPool.queueReplicaTask(ctx, ri) 780 } 781 } 782 783 // filterReplicationStatusMetadata filters replication status metadata for COPY 784 func filterReplicationStatusMetadata(metadata map[string]string) map[string]string { 785 // Copy on write 786 dst := metadata 787 var copied bool 788 delKey := func(key string) { 789 if _, ok := metadata[key]; !ok { 790 return 791 } 792 if !copied { 793 dst = make(map[string]string, len(metadata)) 794 for k, v := range metadata { 795 dst[k] = v 796 } 797 copied = true 798 } 799 delete(dst, key) 800 } 801 802 delKey(xhttp.AmzBucketReplicationStatus) 803 return dst 804 } 805 806 // DeletedObjectVersionInfo has info on deleted object 807 type DeletedObjectVersionInfo struct { 808 DeletedObject 809 Bucket string 810 } 811 812 var ( 813 globalReplicationPool *ReplicationPool 814 globalReplicationStats *ReplicationStats 815 ) 816 817 // ReplicationPool describes replication pool 818 type ReplicationPool struct { 819 once sync.Once 820 mu sync.Mutex 821 size int 822 replicaCh chan ReplicateObjectInfo 823 replicaDeleteCh chan DeletedObjectVersionInfo 824 mrfReplicaCh chan ReplicateObjectInfo 825 mrfReplicaDeleteCh chan DeletedObjectVersionInfo 826 killCh chan struct{} 827 wg sync.WaitGroup 828 ctx context.Context 829 objLayer ObjectLayer 830 } 831 832 // NewReplicationPool creates a pool of replication workers of specified size 833 func NewReplicationPool(ctx context.Context, o ObjectLayer, sz int) *ReplicationPool { 834 pool := &ReplicationPool{ 835 replicaCh: make(chan ReplicateObjectInfo, 1000), 836 replicaDeleteCh: make(chan DeletedObjectVersionInfo, 1000), 837 mrfReplicaCh: make(chan ReplicateObjectInfo, 100000), 838 mrfReplicaDeleteCh: make(chan DeletedObjectVersionInfo, 100000), 839 ctx: ctx, 840 objLayer: o, 841 } 842 pool.Resize(sz) 843 // add long running worker for handling most recent failures/pending replications 844 go pool.AddMRFWorker() 845 return pool 846 } 847 848 // AddMRFWorker adds a pending/failed replication worker to handle requests that could not be queued 849 // to the other workers 850 func (p *ReplicationPool) AddMRFWorker() { 851 for { 852 select { 853 case <-p.ctx.Done(): 854 return 855 case oi, ok := <-p.mrfReplicaCh: 856 if !ok { 857 return 858 } 859 replicateObject(p.ctx, oi, p.objLayer) 860 case doi, ok := <-p.mrfReplicaDeleteCh: 861 if !ok { 862 return 863 } 864 replicateDelete(p.ctx, doi, p.objLayer) 865 } 866 } 867 } 868 869 // AddWorker adds a replication worker to the pool 870 func (p *ReplicationPool) AddWorker() { 871 defer p.wg.Done() 872 for { 873 select { 874 case <-p.ctx.Done(): 875 return 876 case oi, ok := <-p.replicaCh: 877 if !ok { 878 return 879 } 880 replicateObject(p.ctx, oi, p.objLayer) 881 case doi, ok := <-p.replicaDeleteCh: 882 if !ok { 883 return 884 } 885 replicateDelete(p.ctx, doi, p.objLayer) 886 case <-p.killCh: 887 return 888 } 889 } 890 891 } 892 893 //Resize replication pool to new size 894 func (p *ReplicationPool) Resize(n int) { 895 p.mu.Lock() 896 defer p.mu.Unlock() 897 898 for p.size < n { 899 p.size++ 900 p.wg.Add(1) 901 go p.AddWorker() 902 } 903 for p.size > n { 904 p.size-- 905 go func() { p.killCh <- struct{}{} }() 906 } 907 } 908 909 func (p *ReplicationPool) queueReplicaTask(ctx context.Context, ri ReplicateObjectInfo) { 910 if p == nil { 911 return 912 } 913 select { 914 case <-ctx.Done(): 915 p.once.Do(func() { 916 close(p.replicaCh) 917 close(p.mrfReplicaCh) 918 }) 919 case p.replicaCh <- ri: 920 case p.mrfReplicaCh <- ri: 921 // queue all overflows into the mrfReplicaCh to handle incoming pending/failed operations 922 default: 923 } 924 } 925 926 func (p *ReplicationPool) queueReplicaDeleteTask(ctx context.Context, doi DeletedObjectVersionInfo) { 927 if p == nil { 928 return 929 } 930 select { 931 case <-ctx.Done(): 932 p.once.Do(func() { 933 close(p.replicaDeleteCh) 934 close(p.mrfReplicaDeleteCh) 935 }) 936 case p.replicaDeleteCh <- doi: 937 case p.mrfReplicaDeleteCh <- doi: 938 // queue all overflows into the mrfReplicaDeleteCh to handle incoming pending/failed operations 939 default: 940 } 941 } 942 943 func initBackgroundReplication(ctx context.Context, objectAPI ObjectLayer) { 944 globalReplicationPool = NewReplicationPool(ctx, objectAPI, globalAPIConfig.getReplicationWorkers()) 945 globalReplicationStats = NewReplicationStats(ctx, objectAPI) 946 } 947 948 // get Reader from replication target if active-active replication is in place and 949 // this node returns a 404 950 func proxyGetToReplicationTarget(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, opts ObjectOptions) (gr *GetObjectReader, proxy bool) { 951 tgt, oi, proxy, err := proxyHeadToRepTarget(ctx, bucket, object, opts) 952 if !proxy || err != nil { 953 return nil, false 954 } 955 fn, off, length, err := NewGetObjectReader(rs, oi, opts) 956 if err != nil { 957 return nil, false 958 } 959 gopts := miniogo.GetObjectOptions{ 960 VersionID: opts.VersionID, 961 ServerSideEncryption: opts.ServerSideEncryption, 962 Internal: miniogo.AdvancedGetOptions{ 963 ReplicationProxyRequest: "true", 964 }, 965 } 966 // get correct offsets for encrypted object 967 if off >= 0 && length >= 0 { 968 if err := gopts.SetRange(off, off+length-1); err != nil { 969 return nil, false 970 } 971 } 972 // Make sure to match ETag when proxying. 973 if err = gopts.SetMatchETag(oi.ETag); err != nil { 974 return nil, false 975 } 976 c := miniogo.Core{Client: tgt.Client} 977 obj, _, _, err := c.GetObject(ctx, bucket, object, gopts) 978 if err != nil { 979 return nil, false 980 } 981 closeReader := func() { obj.Close() } 982 983 reader, err := fn(obj, h, opts.CheckPrecondFn, closeReader) 984 if err != nil { 985 return nil, false 986 } 987 reader.ObjInfo = oi.Clone() 988 return reader, true 989 } 990 991 // isProxyable returns true if replication config found for this bucket 992 func isProxyable(ctx context.Context, bucket string) bool { 993 cfg, err := getReplicationConfig(ctx, bucket) 994 if err != nil { 995 return false 996 } 997 dest := cfg.GetDestination() 998 return dest.Bucket == bucket 999 } 1000 1001 func proxyHeadToRepTarget(ctx context.Context, bucket, object string, opts ObjectOptions) (tgt *TargetClient, oi ObjectInfo, proxy bool, err error) { 1002 // this option is set when active-active replication is in place between site A -> B, 1003 // and site B does not have the object yet. 1004 if opts.ProxyRequest || (opts.ProxyHeaderSet && !opts.ProxyRequest) { // true only when site B sets MinIOSourceProxyRequest header 1005 return nil, oi, false, nil 1006 } 1007 cfg, err := getReplicationConfig(ctx, bucket) 1008 if err != nil { 1009 return nil, oi, false, err 1010 } 1011 dest := cfg.GetDestination() 1012 if dest.Bucket != bucket { // not active-active 1013 return nil, oi, false, err 1014 } 1015 ssec := false 1016 if opts.ServerSideEncryption != nil { 1017 ssec = opts.ServerSideEncryption.Type() == encrypt.SSEC 1018 } 1019 ropts := replication.ObjectOpts{ 1020 Name: object, 1021 SSEC: ssec, 1022 } 1023 if !cfg.Replicate(ropts) { // no matching rule for object prefix 1024 return nil, oi, false, nil 1025 } 1026 tgt = globalBucketTargetSys.GetRemoteTargetClient(ctx, cfg.RoleArn) 1027 if tgt == nil || tgt.isOffline() { 1028 return nil, oi, false, fmt.Errorf("target is offline or not configured") 1029 } 1030 1031 gopts := miniogo.GetObjectOptions{ 1032 VersionID: opts.VersionID, 1033 ServerSideEncryption: opts.ServerSideEncryption, 1034 Internal: miniogo.AdvancedGetOptions{ 1035 ReplicationProxyRequest: "true", 1036 }, 1037 } 1038 1039 objInfo, err := tgt.StatObject(ctx, dest.Bucket, object, gopts) 1040 if err != nil { 1041 return nil, oi, false, err 1042 } 1043 1044 tags, _ := tags.MapToObjectTags(objInfo.UserTags) 1045 oi = ObjectInfo{ 1046 Bucket: bucket, 1047 Name: object, 1048 ModTime: objInfo.LastModified, 1049 Size: objInfo.Size, 1050 ETag: objInfo.ETag, 1051 VersionID: objInfo.VersionID, 1052 IsLatest: objInfo.IsLatest, 1053 DeleteMarker: objInfo.IsDeleteMarker, 1054 ContentType: objInfo.ContentType, 1055 Expires: objInfo.Expires, 1056 StorageClass: objInfo.StorageClass, 1057 ReplicationStatus: replication.StatusType(objInfo.ReplicationStatus), 1058 UserTags: tags.String(), 1059 } 1060 oi.UserDefined = make(map[string]string, len(objInfo.Metadata)) 1061 for k, v := range objInfo.Metadata { 1062 oi.UserDefined[k] = v[0] 1063 } 1064 ce, ok := oi.UserDefined[xhttp.ContentEncoding] 1065 if !ok { 1066 ce, ok = oi.UserDefined[strings.ToLower(xhttp.ContentEncoding)] 1067 } 1068 if ok { 1069 oi.ContentEncoding = ce 1070 } 1071 return tgt, oi, true, nil 1072 } 1073 1074 // get object info from replication target if active-active replication is in place and 1075 // this node returns a 404 1076 func proxyHeadToReplicationTarget(ctx context.Context, bucket, object string, opts ObjectOptions) (oi ObjectInfo, proxy bool, err error) { 1077 _, oi, proxy, err = proxyHeadToRepTarget(ctx, bucket, object, opts) 1078 return oi, proxy, err 1079 } 1080 1081 func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, sync bool, opType replication.Type) { 1082 if sync { 1083 replicateObject(ctx, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType}, o) 1084 } else { 1085 globalReplicationPool.queueReplicaTask(GlobalContext, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType}) 1086 } 1087 if sz, err := objInfo.GetActualSize(); err == nil { 1088 globalReplicationStats.Update(objInfo.Bucket, sz, objInfo.ReplicationStatus, replication.StatusType(""), opType) 1089 } 1090 } 1091 1092 func scheduleReplicationDelete(ctx context.Context, dv DeletedObjectVersionInfo, o ObjectLayer, sync bool) { 1093 globalReplicationPool.queueReplicaDeleteTask(GlobalContext, dv) 1094 globalReplicationStats.Update(dv.Bucket, 0, replication.Pending, replication.StatusType(""), replication.DeleteReplicationType) 1095 }