github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/bucket-replication.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "encoding/binary" 23 "errors" 24 "fmt" 25 "io" 26 "math/rand" 27 "net/http" 28 "net/url" 29 "path" 30 "reflect" 31 "strings" 32 "sync" 33 "sync/atomic" 34 "time" 35 36 "github.com/dustin/go-humanize" 37 "github.com/minio/madmin-go/v3" 38 "github.com/minio/minio-go/v7" 39 "github.com/minio/minio-go/v7/pkg/encrypt" 40 "github.com/minio/minio-go/v7/pkg/tags" 41 "github.com/minio/minio/internal/amztime" 42 "github.com/minio/minio/internal/bucket/bandwidth" 43 objectlock "github.com/minio/minio/internal/bucket/object/lock" 44 "github.com/minio/minio/internal/bucket/replication" 45 "github.com/minio/minio/internal/config/storageclass" 46 "github.com/minio/minio/internal/crypto" 47 "github.com/minio/minio/internal/event" 48 "github.com/minio/minio/internal/hash" 49 xhttp "github.com/minio/minio/internal/http" 50 xioutil "github.com/minio/minio/internal/ioutil" 51 "github.com/minio/minio/internal/logger" 52 "github.com/tinylib/msgp/msgp" 53 "github.com/zeebo/xxh3" 54 "golang.org/x/exp/maps" 55 "golang.org/x/exp/slices" 56 ) 57 58 const ( 59 throttleDeadline = 1 * time.Hour 60 // ReplicationReset has reset id and timestamp of last reset operation 61 ReplicationReset = "replication-reset" 62 // ReplicationStatus has internal replication status - stringified representation of target's replication status for all replication 63 // activity initiated from this cluster 64 ReplicationStatus = "replication-status" 65 // ReplicationTimestamp - the last time replication was initiated on this cluster for this object version 66 ReplicationTimestamp = "replication-timestamp" 67 // ReplicaStatus - this header is present if a replica was received by this cluster for this object version 68 ReplicaStatus = "replica-status" 69 // ReplicaTimestamp - the last time a replica was received by this cluster for this object version 70 ReplicaTimestamp = "replica-timestamp" 71 // TaggingTimestamp - the last time a tag metadata modification happened on this cluster for this object version 72 TaggingTimestamp = "tagging-timestamp" 73 // ObjectLockRetentionTimestamp - the last time a object lock metadata modification happened on this cluster for this object version 74 ObjectLockRetentionTimestamp = "objectlock-retention-timestamp" 75 // ObjectLockLegalHoldTimestamp - the last time a legal hold metadata modification happened on this cluster for this object version 76 ObjectLockLegalHoldTimestamp = "objectlock-legalhold-timestamp" 77 // ReplicationWorkerMultiplier is suggested worker multiplier if traffic exceeds replication worker capacity 78 ReplicationWorkerMultiplier = 1.5 79 ) 80 81 // gets replication config associated to a given bucket name. 82 func getReplicationConfig(ctx context.Context, bucketName string) (rc *replication.Config, err error) { 83 rCfg, _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, bucketName) 84 if err != nil { 85 if errors.Is(err, BucketReplicationConfigNotFound{Bucket: bucketName}) || errors.Is(err, errInvalidArgument) { 86 return rCfg, err 87 } 88 logger.CriticalIf(ctx, err) 89 } 90 return rCfg, err 91 } 92 93 // validateReplicationDestination returns error if replication destination bucket missing or not configured 94 // It also returns true if replication destination is same as this server. 95 func validateReplicationDestination(ctx context.Context, bucket string, rCfg *replication.Config, checkRemote bool) (bool, APIError) { 96 var arns []string 97 if rCfg.RoleArn != "" { 98 arns = append(arns, rCfg.RoleArn) 99 } else { 100 for _, rule := range rCfg.Rules { 101 arns = append(arns, rule.Destination.String()) 102 } 103 } 104 var sameTarget bool 105 for _, arnStr := range arns { 106 arn, err := madmin.ParseARN(arnStr) 107 if err != nil { 108 return sameTarget, errorCodes.ToAPIErrWithErr(ErrBucketRemoteArnInvalid, err) 109 } 110 if arn.Type != madmin.ReplicationService { 111 return sameTarget, toAPIError(ctx, BucketRemoteArnTypeInvalid{Bucket: bucket}) 112 } 113 clnt := globalBucketTargetSys.GetRemoteTargetClient(bucket, arnStr) 114 if clnt == nil { 115 return sameTarget, toAPIError(ctx, BucketRemoteTargetNotFound{Bucket: bucket}) 116 } 117 if checkRemote { // validate remote bucket 118 found, err := clnt.BucketExists(ctx, arn.Bucket) 119 if err != nil { 120 return sameTarget, errorCodes.ToAPIErrWithErr(ErrRemoteDestinationNotFoundError, err) 121 } 122 if !found { 123 return sameTarget, errorCodes.ToAPIErrWithErr(ErrRemoteDestinationNotFoundError, BucketRemoteTargetNotFound{Bucket: arn.Bucket}) 124 } 125 if ret, err := globalBucketObjectLockSys.Get(bucket); err == nil { 126 if ret.LockEnabled { 127 lock, _, _, _, err := clnt.GetObjectLockConfig(ctx, arn.Bucket) 128 if err != nil { 129 return sameTarget, errorCodes.ToAPIErrWithErr(ErrReplicationDestinationMissingLock, err) 130 } 131 if lock != objectlock.Enabled { 132 return sameTarget, errorCodes.ToAPIErrWithErr(ErrReplicationDestinationMissingLock, nil) 133 } 134 } 135 } 136 } 137 // validate replication ARN against target endpoint 138 c := globalBucketTargetSys.GetRemoteTargetClient(bucket, arnStr) 139 if c != nil { 140 if err := checkRemoteEndpoint(ctx, c.EndpointURL()); err != nil { 141 switch err.(type) { 142 case BucketRemoteIdenticalToSource: 143 return true, errorCodes.ToAPIErrWithErr(ErrBucketRemoteIdenticalToSource, fmt.Errorf("remote target endpoint %s is self referential", c.EndpointURL().String())) 144 default: 145 } 146 } 147 if c.EndpointURL().String() == clnt.EndpointURL().String() { 148 selfTarget, _ := isLocalHost(clnt.EndpointURL().Hostname(), clnt.EndpointURL().Port(), globalMinioPort) 149 if !sameTarget { 150 sameTarget = selfTarget 151 } 152 continue 153 } 154 } 155 } 156 157 if len(arns) == 0 { 158 return false, toAPIError(ctx, BucketRemoteTargetNotFound{Bucket: bucket}) 159 } 160 return sameTarget, toAPIError(ctx, nil) 161 } 162 163 // performs a http request to remote endpoint to check if deployment id of remote endpoint is same as 164 // local cluster deployment id. This is to prevent replication to self, especially in case of a loadbalancer 165 // in front of MinIO. 166 func checkRemoteEndpoint(ctx context.Context, epURL *url.URL) error { 167 reqURL := &url.URL{ 168 Scheme: epURL.Scheme, 169 Host: epURL.Host, 170 Path: healthCheckPathPrefix + healthCheckReadinessPath, 171 } 172 173 req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL.String(), nil) 174 if err != nil { 175 return err 176 } 177 178 client := &http.Client{ 179 Transport: globalRemoteTargetTransport, 180 Timeout: 10 * time.Second, 181 } 182 183 resp, err := client.Do(req) 184 if err != nil { 185 return err 186 } 187 if err == nil { 188 // Drain the connection. 189 xhttp.DrainBody(resp.Body) 190 } 191 if resp != nil { 192 amzid := resp.Header.Get(xhttp.AmzRequestHostID) 193 if _, ok := globalNodeNamesHex[amzid]; ok { 194 return BucketRemoteIdenticalToSource{ 195 Endpoint: epURL.String(), 196 } 197 } 198 } 199 return nil 200 } 201 202 type mustReplicateOptions struct { 203 meta map[string]string 204 status replication.StatusType 205 opType replication.Type 206 replicationRequest bool // incoming request is a replication request 207 } 208 209 func (o mustReplicateOptions) ReplicationStatus() (s replication.StatusType) { 210 if rs, ok := o.meta[xhttp.AmzBucketReplicationStatus]; ok { 211 return replication.StatusType(rs) 212 } 213 return s 214 } 215 216 func (o mustReplicateOptions) isExistingObjectReplication() bool { 217 return o.opType == replication.ExistingObjectReplicationType 218 } 219 220 func (o mustReplicateOptions) isMetadataReplication() bool { 221 return o.opType == replication.MetadataReplicationType 222 } 223 224 func (o ObjectInfo) getMustReplicateOptions(op replication.Type, opts ObjectOptions) mustReplicateOptions { 225 return getMustReplicateOptions(o.UserDefined, o.UserTags, o.ReplicationStatus, op, opts) 226 } 227 228 func getMustReplicateOptions(userDefined map[string]string, userTags string, status replication.StatusType, op replication.Type, opts ObjectOptions) mustReplicateOptions { 229 meta := cloneMSS(userDefined) 230 if userTags != "" { 231 meta[xhttp.AmzObjectTagging] = userTags 232 } 233 234 return mustReplicateOptions{ 235 meta: meta, 236 status: status, 237 opType: op, 238 replicationRequest: opts.ReplicationRequest, 239 } 240 } 241 242 // mustReplicate returns 2 booleans - true if object meets replication criteria and true if replication is to be done in 243 // a synchronous manner. 244 func mustReplicate(ctx context.Context, bucket, object string, mopts mustReplicateOptions) (dsc ReplicateDecision) { 245 // object layer not initialized we return with no decision. 246 if newObjectLayerFn() == nil { 247 return 248 } 249 250 // Disable server-side replication on object prefixes which are excluded 251 // from versioning via the MinIO bucket versioning extension. 252 if !globalBucketVersioningSys.PrefixEnabled(bucket, object) { 253 return 254 } 255 256 replStatus := mopts.ReplicationStatus() 257 if replStatus == replication.Replica && !mopts.isMetadataReplication() { 258 return 259 } 260 261 if mopts.replicationRequest { // incoming replication request on target cluster 262 return 263 } 264 cfg, err := getReplicationConfig(ctx, bucket) 265 if err != nil { 266 return 267 } 268 opts := replication.ObjectOpts{ 269 Name: object, 270 SSEC: crypto.SSEC.IsEncrypted(mopts.meta), 271 Replica: replStatus == replication.Replica, 272 ExistingObject: mopts.isExistingObjectReplication(), 273 } 274 tagStr, ok := mopts.meta[xhttp.AmzObjectTagging] 275 if ok { 276 opts.UserTags = tagStr 277 } 278 tgtArns := cfg.FilterTargetArns(opts) 279 for _, tgtArn := range tgtArns { 280 tgt := globalBucketTargetSys.GetRemoteTargetClient(bucket, tgtArn) 281 // the target online status should not be used here while deciding 282 // whether to replicate as the target could be temporarily down 283 opts.TargetArn = tgtArn 284 replicate := cfg.Replicate(opts) 285 var synchronous bool 286 if tgt != nil { 287 synchronous = tgt.replicateSync 288 } 289 dsc.Set(newReplicateTargetDecision(tgtArn, replicate, synchronous)) 290 } 291 return dsc 292 } 293 294 // Standard headers that needs to be extracted from User metadata. 295 var standardHeaders = []string{ 296 xhttp.ContentType, 297 xhttp.CacheControl, 298 xhttp.ContentEncoding, 299 xhttp.ContentLanguage, 300 xhttp.ContentDisposition, 301 xhttp.AmzStorageClass, 302 xhttp.AmzObjectTagging, 303 xhttp.AmzBucketReplicationStatus, 304 xhttp.AmzObjectLockMode, 305 xhttp.AmzObjectLockRetainUntilDate, 306 xhttp.AmzObjectLockLegalHold, 307 xhttp.AmzTagCount, 308 xhttp.AmzServerSideEncryption, 309 } 310 311 // returns true if any of the objects being deleted qualifies for replication. 312 func hasReplicationRules(ctx context.Context, bucket string, objects []ObjectToDelete) bool { 313 c, err := getReplicationConfig(ctx, bucket) 314 if err != nil || c == nil { 315 return false 316 } 317 for _, obj := range objects { 318 if c.HasActiveRules(obj.ObjectName, true) { 319 return true 320 } 321 } 322 return false 323 } 324 325 // isStandardHeader returns true if header is a supported header and not a custom header 326 func isStandardHeader(matchHeaderKey string) bool { 327 return equals(matchHeaderKey, standardHeaders...) 328 } 329 330 // returns whether object version is a deletemarker and if object qualifies for replication 331 func checkReplicateDelete(ctx context.Context, bucket string, dobj ObjectToDelete, oi ObjectInfo, delOpts ObjectOptions, gerr error) (dsc ReplicateDecision) { 332 rcfg, err := getReplicationConfig(ctx, bucket) 333 if err != nil || rcfg == nil { 334 return 335 } 336 // If incoming request is a replication request, it does not need to be re-replicated. 337 if delOpts.ReplicationRequest { 338 return 339 } 340 // Skip replication if this object's prefix is excluded from being 341 // versioned. 342 if !delOpts.Versioned { 343 return 344 } 345 opts := replication.ObjectOpts{ 346 Name: dobj.ObjectName, 347 SSEC: crypto.SSEC.IsEncrypted(oi.UserDefined), 348 UserTags: oi.UserTags, 349 DeleteMarker: oi.DeleteMarker, 350 VersionID: dobj.VersionID, 351 OpType: replication.DeleteReplicationType, 352 } 353 tgtArns := rcfg.FilterTargetArns(opts) 354 dsc.targetsMap = make(map[string]replicateTargetDecision, len(tgtArns)) 355 if len(tgtArns) == 0 { 356 return dsc 357 } 358 var sync, replicate bool 359 for _, tgtArn := range tgtArns { 360 opts.TargetArn = tgtArn 361 replicate = rcfg.Replicate(opts) 362 // when incoming delete is removal of a delete marker(a.k.a versioned delete), 363 // GetObjectInfo returns extra information even though it returns errFileNotFound 364 if gerr != nil { 365 validReplStatus := false 366 switch oi.TargetReplicationStatus(tgtArn) { 367 case replication.Pending, replication.Completed, replication.Failed: 368 validReplStatus = true 369 } 370 if oi.DeleteMarker && (validReplStatus || replicate) { 371 dsc.Set(newReplicateTargetDecision(tgtArn, replicate, sync)) 372 continue 373 } 374 // can be the case that other cluster is down and duplicate `mc rm --vid` 375 // is issued - this still needs to be replicated back to the other target 376 if !oi.VersionPurgeStatus.Empty() { 377 replicate = oi.VersionPurgeStatus == Pending || oi.VersionPurgeStatus == Failed 378 dsc.Set(newReplicateTargetDecision(tgtArn, replicate, sync)) 379 } 380 continue 381 } 382 tgt := globalBucketTargetSys.GetRemoteTargetClient(bucket, tgtArn) 383 // the target online status should not be used here while deciding 384 // whether to replicate deletes as the target could be temporarily down 385 tgtDsc := newReplicateTargetDecision(tgtArn, false, false) 386 if tgt != nil { 387 tgtDsc = newReplicateTargetDecision(tgtArn, replicate, tgt.replicateSync) 388 } 389 dsc.Set(tgtDsc) 390 } 391 return dsc 392 } 393 394 // replicate deletes to the designated replication target if replication configuration 395 // has delete marker replication or delete replication (MinIO extension to allow deletes where version id 396 // is specified) enabled. 397 // Similar to bucket replication for PUT operation, soft delete (a.k.a setting delete marker) and 398 // permanent deletes (by specifying a version ID in the delete operation) have three states "Pending", "Complete" 399 // and "Failed" to mark the status of the replication of "DELETE" operation. All failed operations can 400 // then be retried by healing. In the case of permanent deletes, until the replication is completed on the 401 // target cluster, the object version is marked deleted on the source and hidden from listing. It is permanently 402 // deleted from the source when the VersionPurgeStatus changes to "Complete", i.e after replication succeeds 403 // on target. 404 func replicateDelete(ctx context.Context, dobj DeletedObjectReplicationInfo, objectAPI ObjectLayer) { 405 var replicationStatus replication.StatusType 406 bucket := dobj.Bucket 407 versionID := dobj.DeleteMarkerVersionID 408 if versionID == "" { 409 versionID = dobj.VersionID 410 } 411 412 defer func() { 413 replStatus := string(replicationStatus) 414 auditLogInternal(context.Background(), AuditLogOptions{ 415 Event: dobj.EventType, 416 APIName: ReplicateDeleteAPI, 417 Bucket: bucket, 418 Object: dobj.ObjectName, 419 VersionID: versionID, 420 Status: replStatus, 421 }) 422 }() 423 424 rcfg, err := getReplicationConfig(ctx, bucket) 425 if err != nil || rcfg == nil { 426 logger.LogOnceIf(ctx, fmt.Errorf("unable to obtain replication config for bucket: %s: err: %s", bucket, err), bucket) 427 sendEvent(eventArgs{ 428 BucketName: bucket, 429 Object: ObjectInfo{ 430 Bucket: bucket, 431 Name: dobj.ObjectName, 432 VersionID: versionID, 433 DeleteMarker: dobj.DeleteMarker, 434 }, 435 UserAgent: "Internal: [Replication]", 436 Host: globalLocalNodeName, 437 EventName: event.ObjectReplicationNotTracked, 438 }) 439 return 440 } 441 dsc, err := parseReplicateDecision(ctx, bucket, dobj.ReplicationState.ReplicateDecisionStr) 442 if err != nil { 443 logger.LogOnceIf(ctx, fmt.Errorf("unable to parse replication decision parameters for bucket: %s, err: %s, decision: %s", 444 bucket, err, dobj.ReplicationState.ReplicateDecisionStr), dobj.ReplicationState.ReplicateDecisionStr) 445 sendEvent(eventArgs{ 446 BucketName: bucket, 447 Object: ObjectInfo{ 448 Bucket: bucket, 449 Name: dobj.ObjectName, 450 VersionID: versionID, 451 DeleteMarker: dobj.DeleteMarker, 452 }, 453 UserAgent: "Internal: [Replication]", 454 Host: globalLocalNodeName, 455 EventName: event.ObjectReplicationNotTracked, 456 }) 457 return 458 } 459 460 // Lock the object name before starting replication operation. 461 // Use separate lock that doesn't collide with regular objects. 462 lk := objectAPI.NewNSLock(bucket, "/[replicate]/"+dobj.ObjectName) 463 lkctx, err := lk.GetLock(ctx, globalOperationTimeout) 464 if err != nil { 465 globalReplicationPool.queueMRFSave(dobj.ToMRFEntry()) 466 sendEvent(eventArgs{ 467 BucketName: bucket, 468 Object: ObjectInfo{ 469 Bucket: bucket, 470 Name: dobj.ObjectName, 471 VersionID: versionID, 472 DeleteMarker: dobj.DeleteMarker, 473 }, 474 UserAgent: "Internal: [Replication]", 475 Host: globalLocalNodeName, 476 EventName: event.ObjectReplicationNotTracked, 477 }) 478 return 479 } 480 ctx = lkctx.Context() 481 defer lk.Unlock(lkctx) 482 483 rinfos := replicatedInfos{Targets: make([]replicatedTargetInfo, 0, len(dsc.targetsMap))} 484 var wg sync.WaitGroup 485 var mu sync.Mutex 486 for _, tgtEntry := range dsc.targetsMap { 487 if !tgtEntry.Replicate { 488 continue 489 } 490 // if dobj.TargetArn is not empty string, this is a case of specific target being re-synced. 491 if dobj.TargetArn != "" && dobj.TargetArn != tgtEntry.Arn { 492 continue 493 } 494 tgtClnt := globalBucketTargetSys.GetRemoteTargetClient(bucket, tgtEntry.Arn) 495 if tgtClnt == nil { 496 // Skip stale targets if any and log them to be missing at least once. 497 logger.LogOnceIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, tgtEntry.Arn), tgtEntry.Arn) 498 sendEvent(eventArgs{ 499 EventName: event.ObjectReplicationNotTracked, 500 BucketName: bucket, 501 Object: ObjectInfo{ 502 Bucket: bucket, 503 Name: dobj.ObjectName, 504 VersionID: versionID, 505 DeleteMarker: dobj.DeleteMarker, 506 }, 507 UserAgent: "Internal: [Replication]", 508 Host: globalLocalNodeName, 509 }) 510 continue 511 } 512 wg.Add(1) 513 go func(tgt *TargetClient) { 514 defer wg.Done() 515 tgtInfo := replicateDeleteToTarget(ctx, dobj, tgt) 516 517 mu.Lock() 518 rinfos.Targets = append(rinfos.Targets, tgtInfo) 519 mu.Unlock() 520 }(tgtClnt) 521 } 522 wg.Wait() 523 524 replicationStatus = rinfos.ReplicationStatus() 525 prevStatus := dobj.DeleteMarkerReplicationStatus() 526 527 if dobj.VersionID != "" { 528 prevStatus = replication.StatusType(dobj.VersionPurgeStatus()) 529 replicationStatus = replication.StatusType(rinfos.VersionPurgeStatus()) 530 } 531 532 // to decrement pending count later. 533 for _, rinfo := range rinfos.Targets { 534 if rinfo.ReplicationStatus != rinfo.PrevReplicationStatus { 535 globalReplicationStats.Update(dobj.Bucket, rinfo, replicationStatus, 536 prevStatus) 537 } 538 } 539 540 eventName := event.ObjectReplicationComplete 541 if replicationStatus == replication.Failed { 542 eventName = event.ObjectReplicationFailed 543 globalReplicationPool.queueMRFSave(dobj.ToMRFEntry()) 544 } 545 drs := getReplicationState(rinfos, dobj.ReplicationState, dobj.VersionID) 546 if replicationStatus != prevStatus { 547 drs.ReplicationTimeStamp = UTCNow() 548 } 549 550 dobjInfo, err := objectAPI.DeleteObject(ctx, bucket, dobj.ObjectName, ObjectOptions{ 551 VersionID: versionID, 552 MTime: dobj.DeleteMarkerMTime.Time, 553 DeleteReplication: drs, 554 Versioned: globalBucketVersioningSys.PrefixEnabled(bucket, dobj.ObjectName), 555 // Objects matching prefixes should not leave delete markers, 556 // dramatically reduces namespace pollution while keeping the 557 // benefits of replication, make sure to apply version suspension 558 // only at bucket level instead. 559 VersionSuspended: globalBucketVersioningSys.Suspended(bucket), 560 }) 561 if err != nil && !isErrVersionNotFound(err) { // VersionNotFound would be reported by pool that object version is missing on. 562 sendEvent(eventArgs{ 563 BucketName: bucket, 564 Object: ObjectInfo{ 565 Bucket: bucket, 566 Name: dobj.ObjectName, 567 VersionID: versionID, 568 DeleteMarker: dobj.DeleteMarker, 569 }, 570 UserAgent: "Internal: [Replication]", 571 Host: globalLocalNodeName, 572 EventName: eventName, 573 }) 574 } else { 575 sendEvent(eventArgs{ 576 BucketName: bucket, 577 Object: dobjInfo, 578 UserAgent: "Internal: [Replication]", 579 Host: globalLocalNodeName, 580 EventName: eventName, 581 }) 582 } 583 } 584 585 func replicateDeleteToTarget(ctx context.Context, dobj DeletedObjectReplicationInfo, tgt *TargetClient) (rinfo replicatedTargetInfo) { 586 versionID := dobj.DeleteMarkerVersionID 587 if versionID == "" { 588 versionID = dobj.VersionID 589 } 590 591 rinfo = dobj.ReplicationState.targetState(tgt.ARN) 592 rinfo.OpType = dobj.OpType 593 rinfo.endpoint = tgt.EndpointURL().Host 594 rinfo.secure = tgt.EndpointURL().Scheme == "https" 595 defer func() { 596 if rinfo.ReplicationStatus == replication.Completed && tgt.ResetID != "" && dobj.OpType == replication.ExistingObjectReplicationType { 597 rinfo.ResyncTimestamp = fmt.Sprintf("%s;%s", UTCNow().Format(http.TimeFormat), tgt.ResetID) 598 } 599 }() 600 601 if dobj.VersionID == "" && rinfo.PrevReplicationStatus == replication.Completed && dobj.OpType != replication.ExistingObjectReplicationType { 602 rinfo.ReplicationStatus = rinfo.PrevReplicationStatus 603 return 604 } 605 if dobj.VersionID != "" && rinfo.VersionPurgeStatus == Complete { 606 return 607 } 608 if globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 609 logger.LogOnceIf(ctx, fmt.Errorf("remote target is offline for bucket:%s arn:%s", dobj.Bucket, tgt.ARN), "replication-target-offline-delete-"+tgt.ARN) 610 sendEvent(eventArgs{ 611 BucketName: dobj.Bucket, 612 Object: ObjectInfo{ 613 Bucket: dobj.Bucket, 614 Name: dobj.ObjectName, 615 VersionID: dobj.VersionID, 616 DeleteMarker: dobj.DeleteMarker, 617 }, 618 UserAgent: "Internal: [Replication]", 619 Host: globalLocalNodeName, 620 EventName: event.ObjectReplicationNotTracked, 621 }) 622 if dobj.VersionID == "" { 623 rinfo.ReplicationStatus = replication.Failed 624 } else { 625 rinfo.VersionPurgeStatus = Failed 626 } 627 return 628 } 629 // early return if already replicated delete marker for existing object replication/ healing delete markers 630 if dobj.DeleteMarkerVersionID != "" { 631 toi, err := tgt.StatObject(ctx, tgt.Bucket, dobj.ObjectName, minio.StatObjectOptions{ 632 VersionID: versionID, 633 Internal: minio.AdvancedGetOptions{ 634 ReplicationProxyRequest: "false", 635 IsReplicationReadyForDeleteMarker: true, 636 }, 637 }) 638 serr := ErrorRespToObjectError(err, dobj.Bucket, dobj.ObjectName, dobj.VersionID) 639 switch { 640 case isErrMethodNotAllowed(serr): 641 // delete marker already replicated 642 if dobj.VersionID == "" && rinfo.VersionPurgeStatus.Empty() { 643 rinfo.ReplicationStatus = replication.Completed 644 return 645 } 646 case isErrObjectNotFound(serr), isErrVersionNotFound(serr): 647 // version being purged is already not found on target. 648 if !rinfo.VersionPurgeStatus.Empty() { 649 rinfo.VersionPurgeStatus = Complete 650 return 651 } 652 case isErrReadQuorum(serr), isErrWriteQuorum(serr): 653 // destination has some quorum issues, perform removeObject() anyways 654 // to complete the operation. 655 default: 656 if err != nil && minio.IsNetworkOrHostDown(err, true) && !globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 657 globalBucketTargetSys.markOffline(tgt.EndpointURL()) 658 } 659 // mark delete marker replication as failed if target cluster not ready to receive 660 // this request yet (object version not replicated yet) 661 if err != nil && !toi.ReplicationReady { 662 rinfo.ReplicationStatus = replication.Failed 663 rinfo.Err = err 664 return 665 } 666 } 667 } 668 rmErr := tgt.RemoveObject(ctx, tgt.Bucket, dobj.ObjectName, minio.RemoveObjectOptions{ 669 VersionID: versionID, 670 Internal: minio.AdvancedRemoveOptions{ 671 ReplicationDeleteMarker: dobj.DeleteMarkerVersionID != "", 672 ReplicationMTime: dobj.DeleteMarkerMTime.Time, 673 ReplicationStatus: minio.ReplicationStatusReplica, 674 ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside 675 }, 676 }) 677 if rmErr != nil { 678 rinfo.Err = rmErr 679 if dobj.VersionID == "" { 680 rinfo.ReplicationStatus = replication.Failed 681 } else { 682 rinfo.VersionPurgeStatus = Failed 683 } 684 logger.LogIf(ctx, fmt.Errorf("unable to replicate delete marker to %s: %s/%s(%s): %w", tgt.EndpointURL(), tgt.Bucket, dobj.ObjectName, versionID, rmErr)) 685 if rmErr != nil && minio.IsNetworkOrHostDown(rmErr, true) && !globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 686 globalBucketTargetSys.markOffline(tgt.EndpointURL()) 687 } 688 } else { 689 if dobj.VersionID == "" { 690 rinfo.ReplicationStatus = replication.Completed 691 } else { 692 rinfo.VersionPurgeStatus = Complete 693 } 694 } 695 return 696 } 697 698 func getCopyObjMetadata(oi ObjectInfo, sc string) map[string]string { 699 meta := make(map[string]string, len(oi.UserDefined)) 700 for k, v := range oi.UserDefined { 701 if stringsHasPrefixFold(k, ReservedMetadataPrefixLower) { 702 continue 703 } 704 705 if equals(k, xhttp.AmzBucketReplicationStatus) { 706 continue 707 } 708 709 // https://github.com/google/security-research/security/advisories/GHSA-76wf-9vgp-pj7w 710 if equals(k, xhttp.AmzMetaUnencryptedContentLength, xhttp.AmzMetaUnencryptedContentMD5) { 711 continue 712 } 713 meta[k] = v 714 } 715 716 if oi.ContentEncoding != "" { 717 meta[xhttp.ContentEncoding] = oi.ContentEncoding 718 } 719 720 if oi.ContentType != "" { 721 meta[xhttp.ContentType] = oi.ContentType 722 } 723 724 meta[xhttp.AmzObjectTagging] = oi.UserTags 725 meta[xhttp.AmzTagDirective] = "REPLACE" 726 727 if sc == "" { 728 sc = oi.StorageClass 729 } 730 // drop non standard storage classes for tiering from replication 731 if sc != "" && (sc == storageclass.RRS || sc == storageclass.STANDARD) { 732 meta[xhttp.AmzStorageClass] = sc 733 } 734 735 meta[xhttp.MinIOSourceETag] = oi.ETag 736 meta[xhttp.MinIOSourceMTime] = oi.ModTime.UTC().Format(time.RFC3339Nano) 737 meta[xhttp.AmzBucketReplicationStatus] = replication.Replica.String() 738 return meta 739 } 740 741 type caseInsensitiveMap map[string]string 742 743 // Lookup map entry case insensitively. 744 func (m caseInsensitiveMap) Lookup(key string) (string, bool) { 745 if len(m) == 0 { 746 return "", false 747 } 748 for _, k := range []string{ 749 key, 750 strings.ToLower(key), 751 http.CanonicalHeaderKey(key), 752 } { 753 v, ok := m[k] 754 if ok { 755 return v, ok 756 } 757 } 758 return "", false 759 } 760 761 func putReplicationOpts(ctx context.Context, sc string, objInfo ObjectInfo) (putOpts minio.PutObjectOptions, err error) { 762 meta := make(map[string]string) 763 for k, v := range objInfo.UserDefined { 764 // In case of SSE-C objects copy the allowed internal headers as well 765 if !crypto.SSEC.IsEncrypted(objInfo.UserDefined) || !slices.Contains(maps.Keys(validSSEReplicationHeaders), k) { 766 if stringsHasPrefixFold(k, ReservedMetadataPrefixLower) { 767 continue 768 } 769 if isStandardHeader(k) { 770 continue 771 } 772 } 773 if slices.Contains(maps.Keys(validSSEReplicationHeaders), k) { 774 meta[validSSEReplicationHeaders[k]] = v 775 } else { 776 meta[k] = v 777 } 778 } 779 780 if sc == "" && (objInfo.StorageClass == storageclass.STANDARD || objInfo.StorageClass == storageclass.RRS) { 781 sc = objInfo.StorageClass 782 } 783 putOpts = minio.PutObjectOptions{ 784 UserMetadata: meta, 785 ContentType: objInfo.ContentType, 786 ContentEncoding: objInfo.ContentEncoding, 787 Expires: objInfo.Expires, 788 StorageClass: sc, 789 Internal: minio.AdvancedPutOptions{ 790 SourceVersionID: objInfo.VersionID, 791 ReplicationStatus: minio.ReplicationStatusReplica, 792 SourceMTime: objInfo.ModTime, 793 SourceETag: objInfo.ETag, 794 ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside 795 }, 796 } 797 if objInfo.UserTags != "" { 798 tag, _ := tags.ParseObjectTags(objInfo.UserTags) 799 if tag != nil { 800 putOpts.UserTags = tag.ToMap() 801 // set tag timestamp in opts 802 tagTimestamp := objInfo.ModTime 803 if tagTmstampStr, ok := objInfo.UserDefined[ReservedMetadataPrefixLower+TaggingTimestamp]; ok { 804 tagTimestamp, err = time.Parse(time.RFC3339Nano, tagTmstampStr) 805 if err != nil { 806 return putOpts, err 807 } 808 } 809 putOpts.Internal.TaggingTimestamp = tagTimestamp 810 } 811 } 812 813 lkMap := caseInsensitiveMap(objInfo.UserDefined) 814 if lang, ok := lkMap.Lookup(xhttp.ContentLanguage); ok { 815 putOpts.ContentLanguage = lang 816 } 817 if disp, ok := lkMap.Lookup(xhttp.ContentDisposition); ok { 818 putOpts.ContentDisposition = disp 819 } 820 if cc, ok := lkMap.Lookup(xhttp.CacheControl); ok { 821 putOpts.CacheControl = cc 822 } 823 if mode, ok := lkMap.Lookup(xhttp.AmzObjectLockMode); ok { 824 rmode := minio.RetentionMode(mode) 825 putOpts.Mode = rmode 826 } 827 if retainDateStr, ok := lkMap.Lookup(xhttp.AmzObjectLockRetainUntilDate); ok { 828 rdate, err := amztime.ISO8601Parse(retainDateStr) 829 if err != nil { 830 return putOpts, err 831 } 832 putOpts.RetainUntilDate = rdate 833 // set retention timestamp in opts 834 retTimestamp := objInfo.ModTime 835 if retainTmstampStr, ok := objInfo.UserDefined[ReservedMetadataPrefixLower+ObjectLockRetentionTimestamp]; ok { 836 retTimestamp, err = time.Parse(time.RFC3339Nano, retainTmstampStr) 837 if err != nil { 838 return putOpts, err 839 } 840 } 841 putOpts.Internal.RetentionTimestamp = retTimestamp 842 } 843 if lhold, ok := lkMap.Lookup(xhttp.AmzObjectLockLegalHold); ok { 844 putOpts.LegalHold = minio.LegalHoldStatus(lhold) 845 // set legalhold timestamp in opts 846 lholdTimestamp := objInfo.ModTime 847 if lholdTmstampStr, ok := objInfo.UserDefined[ReservedMetadataPrefixLower+ObjectLockLegalHoldTimestamp]; ok { 848 lholdTimestamp, err = time.Parse(time.RFC3339Nano, lholdTmstampStr) 849 if err != nil { 850 return putOpts, err 851 } 852 } 853 putOpts.Internal.LegalholdTimestamp = lholdTimestamp 854 } 855 if crypto.S3.IsEncrypted(objInfo.UserDefined) { 856 putOpts.ServerSideEncryption = encrypt.NewSSE() 857 } 858 return 859 } 860 861 type replicationAction string 862 863 const ( 864 replicateMetadata replicationAction = "metadata" 865 replicateNone replicationAction = "none" 866 replicateAll replicationAction = "all" 867 ) 868 869 // matches k1 with all keys, returns 'true' if one of them matches 870 func equals(k1 string, keys ...string) bool { 871 for _, k2 := range keys { 872 if strings.EqualFold(k1, k2) { 873 return true 874 } 875 } 876 return false 877 } 878 879 // returns replicationAction by comparing metadata between source and target 880 func getReplicationAction(oi1 ObjectInfo, oi2 minio.ObjectInfo, opType replication.Type) replicationAction { 881 // Avoid resyncing null versions created prior to enabling replication if target has a newer copy 882 if opType == replication.ExistingObjectReplicationType && 883 oi1.ModTime.Unix() > oi2.LastModified.Unix() && oi1.VersionID == nullVersionID { 884 return replicateNone 885 } 886 sz, _ := oi1.GetActualSize() 887 888 // needs full replication 889 if oi1.ETag != oi2.ETag || 890 oi1.VersionID != oi2.VersionID || 891 sz != oi2.Size || 892 oi1.DeleteMarker != oi2.IsDeleteMarker || 893 oi1.ModTime.Unix() != oi2.LastModified.Unix() { 894 return replicateAll 895 } 896 897 if oi1.ContentType != oi2.ContentType { 898 return replicateMetadata 899 } 900 901 if oi1.ContentEncoding != "" { 902 enc, ok := oi2.Metadata[xhttp.ContentEncoding] 903 if !ok { 904 enc, ok = oi2.Metadata[strings.ToLower(xhttp.ContentEncoding)] 905 if !ok { 906 return replicateMetadata 907 } 908 } 909 if strings.Join(enc, ",") != oi1.ContentEncoding { 910 return replicateMetadata 911 } 912 } 913 914 t, _ := tags.ParseObjectTags(oi1.UserTags) 915 if (oi2.UserTagCount > 0 && !reflect.DeepEqual(oi2.UserTags, t.ToMap())) || (oi2.UserTagCount != len(t.ToMap())) { 916 return replicateMetadata 917 } 918 919 // Compare only necessary headers 920 compareKeys := []string{ 921 "Expires", 922 "Cache-Control", 923 "Content-Language", 924 "Content-Disposition", 925 "X-Amz-Object-Lock-Mode", 926 "X-Amz-Object-Lock-Retain-Until-Date", 927 "X-Amz-Object-Lock-Legal-Hold", 928 "X-Amz-Website-Redirect-Location", 929 "X-Amz-Meta-", 930 } 931 932 // compare metadata on both maps to see if meta is identical 933 compareMeta1 := make(map[string]string) 934 for k, v := range oi1.UserDefined { 935 var found bool 936 for _, prefix := range compareKeys { 937 if !stringsHasPrefixFold(k, prefix) { 938 continue 939 } 940 found = true 941 break 942 } 943 if found { 944 compareMeta1[strings.ToLower(k)] = v 945 } 946 } 947 948 compareMeta2 := make(map[string]string) 949 for k, v := range oi2.Metadata { 950 var found bool 951 for _, prefix := range compareKeys { 952 if !stringsHasPrefixFold(k, prefix) { 953 continue 954 } 955 found = true 956 break 957 } 958 if found { 959 compareMeta2[strings.ToLower(k)] = strings.Join(v, ",") 960 } 961 } 962 963 if !reflect.DeepEqual(compareMeta1, compareMeta2) { 964 return replicateMetadata 965 } 966 967 return replicateNone 968 } 969 970 // replicateObject replicates the specified version of the object to destination bucket 971 // The source object is then updated to reflect the replication status. 972 func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI ObjectLayer) { 973 var replicationStatus replication.StatusType 974 defer func() { 975 if replicationStatus.Empty() { 976 // replication status is empty means 977 // replication was not attempted for some 978 // reason, notify the state of the object 979 // on disk. 980 replicationStatus = ri.ReplicationStatus 981 } 982 auditLogInternal(ctx, AuditLogOptions{ 983 Event: ri.EventType, 984 APIName: ReplicateObjectAPI, 985 Bucket: ri.Bucket, 986 Object: ri.Name, 987 VersionID: ri.VersionID, 988 Status: replicationStatus.String(), 989 }) 990 }() 991 992 bucket := ri.Bucket 993 object := ri.Name 994 995 cfg, err := getReplicationConfig(ctx, bucket) 996 if err != nil { 997 logger.LogOnceIf(ctx, err, "get-replication-config-"+bucket) 998 sendEvent(eventArgs{ 999 EventName: event.ObjectReplicationNotTracked, 1000 BucketName: bucket, 1001 Object: ri.ToObjectInfo(), 1002 UserAgent: "Internal: [Replication]", 1003 Host: globalLocalNodeName, 1004 }) 1005 return 1006 } 1007 tgtArns := cfg.FilterTargetArns(replication.ObjectOpts{ 1008 Name: object, 1009 SSEC: ri.SSEC, 1010 UserTags: ri.UserTags, 1011 }) 1012 // Lock the object name before starting replication. 1013 // Use separate lock that doesn't collide with regular objects. 1014 lk := objectAPI.NewNSLock(bucket, "/[replicate]/"+object) 1015 lkctx, err := lk.GetLock(ctx, globalOperationTimeout) 1016 if err != nil { 1017 sendEvent(eventArgs{ 1018 EventName: event.ObjectReplicationNotTracked, 1019 BucketName: bucket, 1020 Object: ri.ToObjectInfo(), 1021 UserAgent: "Internal: [Replication]", 1022 Host: globalLocalNodeName, 1023 }) 1024 globalReplicationPool.queueMRFSave(ri.ToMRFEntry()) 1025 return 1026 } 1027 ctx = lkctx.Context() 1028 defer lk.Unlock(lkctx) 1029 1030 rinfos := replicatedInfos{Targets: make([]replicatedTargetInfo, 0, len(tgtArns))} 1031 var wg sync.WaitGroup 1032 var mu sync.Mutex 1033 for _, tgtArn := range tgtArns { 1034 tgt := globalBucketTargetSys.GetRemoteTargetClient(bucket, tgtArn) 1035 if tgt == nil { 1036 logger.LogOnceIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, tgtArn), tgtArn) 1037 sendEvent(eventArgs{ 1038 EventName: event.ObjectReplicationNotTracked, 1039 BucketName: bucket, 1040 Object: ri.ToObjectInfo(), 1041 UserAgent: "Internal: [Replication]", 1042 Host: globalLocalNodeName, 1043 }) 1044 continue 1045 } 1046 wg.Add(1) 1047 go func(tgt *TargetClient) { 1048 defer wg.Done() 1049 1050 var tgtInfo replicatedTargetInfo 1051 if ri.OpType == replication.ObjectReplicationType { 1052 // all incoming calls go through optimized path. 1053 tgtInfo = ri.replicateObject(ctx, objectAPI, tgt) 1054 } else { 1055 tgtInfo = ri.replicateAll(ctx, objectAPI, tgt) 1056 } 1057 1058 mu.Lock() 1059 rinfos.Targets = append(rinfos.Targets, tgtInfo) 1060 mu.Unlock() 1061 }(tgt) 1062 } 1063 wg.Wait() 1064 1065 replicationStatus = rinfos.ReplicationStatus() // used in defer function 1066 // FIXME: add support for missing replication events 1067 // - event.ObjectReplicationMissedThreshold 1068 // - event.ObjectReplicationReplicatedAfterThreshold 1069 eventName := event.ObjectReplicationComplete 1070 if replicationStatus == replication.Failed { 1071 eventName = event.ObjectReplicationFailed 1072 } 1073 newReplStatusInternal := rinfos.ReplicationStatusInternal() 1074 // Note that internal replication status(es) may match for previously replicated objects - in such cases 1075 // metadata should be updated with last resync timestamp. 1076 objInfo := ri.ToObjectInfo() 1077 if ri.ReplicationStatusInternal != newReplStatusInternal || rinfos.ReplicationResynced() { 1078 popts := ObjectOptions{ 1079 MTime: ri.ModTime, 1080 VersionID: ri.VersionID, 1081 EvalMetadataFn: func(oi *ObjectInfo, gerr error) (dsc ReplicateDecision, err error) { 1082 oi.UserDefined[ReservedMetadataPrefixLower+ReplicationStatus] = newReplStatusInternal 1083 oi.UserDefined[ReservedMetadataPrefixLower+ReplicationTimestamp] = UTCNow().Format(time.RFC3339Nano) 1084 oi.UserDefined[xhttp.AmzBucketReplicationStatus] = string(rinfos.ReplicationStatus()) 1085 for _, rinfo := range rinfos.Targets { 1086 if rinfo.ResyncTimestamp != "" { 1087 oi.UserDefined[targetResetHeader(rinfo.Arn)] = rinfo.ResyncTimestamp 1088 } 1089 } 1090 if ri.UserTags != "" { 1091 oi.UserDefined[xhttp.AmzObjectTagging] = ri.UserTags 1092 } 1093 return dsc, nil 1094 }, 1095 } 1096 1097 uobjInfo, _ := objectAPI.PutObjectMetadata(ctx, bucket, object, popts) 1098 if uobjInfo.Name != "" { 1099 objInfo = uobjInfo 1100 } 1101 1102 opType := replication.MetadataReplicationType 1103 if rinfos.Action() == replicateAll { 1104 opType = replication.ObjectReplicationType 1105 } 1106 for _, rinfo := range rinfos.Targets { 1107 if rinfo.ReplicationStatus != rinfo.PrevReplicationStatus { 1108 rinfo.OpType = opType // update optype to reflect correct operation. 1109 globalReplicationStats.Update(bucket, rinfo, rinfo.ReplicationStatus, rinfo.PrevReplicationStatus) 1110 } 1111 } 1112 } 1113 1114 sendEvent(eventArgs{ 1115 EventName: eventName, 1116 BucketName: bucket, 1117 Object: objInfo, 1118 UserAgent: "Internal: [Replication]", 1119 Host: globalLocalNodeName, 1120 }) 1121 1122 // re-queue failures once more - keep a retry count to avoid flooding the queue if 1123 // the target site is down. Leave it to scanner to catch up instead. 1124 if rinfos.ReplicationStatus() != replication.Completed { 1125 ri.OpType = replication.HealReplicationType 1126 ri.EventType = ReplicateMRF 1127 ri.ReplicationStatusInternal = rinfos.ReplicationStatusInternal() 1128 ri.RetryCount++ 1129 globalReplicationPool.queueMRFSave(ri.ToMRFEntry()) 1130 } 1131 } 1132 1133 // replicateObject replicates object data for specified version of the object to destination bucket 1134 // The source object is then updated to reflect the replication status. 1135 func (ri ReplicateObjectInfo) replicateObject(ctx context.Context, objectAPI ObjectLayer, tgt *TargetClient) (rinfo replicatedTargetInfo) { 1136 startTime := time.Now() 1137 bucket := ri.Bucket 1138 object := ri.Name 1139 1140 rAction := replicateAll 1141 rinfo = replicatedTargetInfo{ 1142 Size: ri.ActualSize, 1143 Arn: tgt.ARN, 1144 PrevReplicationStatus: ri.TargetReplicationStatus(tgt.ARN), 1145 ReplicationStatus: replication.Failed, 1146 OpType: ri.OpType, 1147 ReplicationAction: rAction, 1148 endpoint: tgt.EndpointURL().Host, 1149 secure: tgt.EndpointURL().Scheme == "https", 1150 } 1151 if ri.TargetReplicationStatus(tgt.ARN) == replication.Completed && !ri.ExistingObjResync.Empty() && !ri.ExistingObjResync.mustResyncTarget(tgt.ARN) { 1152 rinfo.ReplicationStatus = replication.Completed 1153 rinfo.ReplicationResynced = true 1154 return 1155 } 1156 1157 if globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 1158 logger.LogOnceIf(ctx, fmt.Errorf("remote target is offline for bucket:%s arn:%s retry:%d", bucket, tgt.ARN, ri.RetryCount), "replication-target-offline"+tgt.ARN) 1159 sendEvent(eventArgs{ 1160 EventName: event.ObjectReplicationNotTracked, 1161 BucketName: bucket, 1162 Object: ri.ToObjectInfo(), 1163 UserAgent: "Internal: [Replication]", 1164 Host: globalLocalNodeName, 1165 }) 1166 return 1167 } 1168 1169 versioned := globalBucketVersioningSys.PrefixEnabled(bucket, object) 1170 versionSuspended := globalBucketVersioningSys.PrefixSuspended(bucket, object) 1171 1172 gr, err := objectAPI.GetObjectNInfo(ctx, bucket, object, nil, http.Header{}, ObjectOptions{ 1173 VersionID: ri.VersionID, 1174 Versioned: versioned, 1175 VersionSuspended: versionSuspended, 1176 ReplicationRequest: true, 1177 }) 1178 if err != nil { 1179 if !isErrVersionNotFound(err) && !isErrObjectNotFound(err) { 1180 objInfo := ri.ToObjectInfo() 1181 sendEvent(eventArgs{ 1182 EventName: event.ObjectReplicationNotTracked, 1183 BucketName: bucket, 1184 Object: objInfo, 1185 UserAgent: "Internal: [Replication]", 1186 Host: globalLocalNodeName, 1187 }) 1188 logger.LogOnceIf(ctx, fmt.Errorf("unable to read source object %s/%s(%s): %w", bucket, object, objInfo.VersionID, err), object+":"+objInfo.VersionID) 1189 } 1190 return 1191 } 1192 defer gr.Close() 1193 1194 objInfo := gr.ObjInfo 1195 1196 // make sure we have the latest metadata for metrics calculation 1197 rinfo.PrevReplicationStatus = objInfo.TargetReplicationStatus(tgt.ARN) 1198 1199 size, err := objInfo.GetActualSize() 1200 if err != nil { 1201 logger.LogIf(ctx, err) 1202 sendEvent(eventArgs{ 1203 EventName: event.ObjectReplicationNotTracked, 1204 BucketName: bucket, 1205 Object: objInfo, 1206 UserAgent: "Internal: [Replication]", 1207 Host: globalLocalNodeName, 1208 }) 1209 return 1210 } 1211 1212 if tgt.Bucket == "" { 1213 logger.LogIf(ctx, fmt.Errorf("unable to replicate object %s(%s), bucket is empty for target %s", objInfo.Name, objInfo.VersionID, tgt.EndpointURL())) 1214 sendEvent(eventArgs{ 1215 EventName: event.ObjectReplicationNotTracked, 1216 BucketName: bucket, 1217 Object: objInfo, 1218 UserAgent: "Internal: [Replication]", 1219 Host: globalLocalNodeName, 1220 }) 1221 return rinfo 1222 } 1223 defer func() { 1224 if rinfo.ReplicationStatus == replication.Completed && ri.OpType == replication.ExistingObjectReplicationType && tgt.ResetID != "" { 1225 rinfo.ResyncTimestamp = fmt.Sprintf("%s;%s", UTCNow().Format(http.TimeFormat), tgt.ResetID) 1226 rinfo.ReplicationResynced = true 1227 } 1228 rinfo.Duration = time.Since(startTime) 1229 }() 1230 1231 rinfo.ReplicationStatus = replication.Completed 1232 rinfo.Size = size 1233 rinfo.ReplicationAction = rAction 1234 // use core client to avoid doing multipart on PUT 1235 c := &minio.Core{Client: tgt.Client} 1236 1237 putOpts, err := putReplicationOpts(ctx, tgt.StorageClass, objInfo) 1238 if err != nil { 1239 logger.LogIf(ctx, fmt.Errorf("failure setting options for replication bucket:%s err:%w", bucket, err)) 1240 sendEvent(eventArgs{ 1241 EventName: event.ObjectReplicationNotTracked, 1242 BucketName: bucket, 1243 Object: objInfo, 1244 UserAgent: "Internal: [Replication]", 1245 Host: globalLocalNodeName, 1246 }) 1247 return 1248 } 1249 1250 var headerSize int 1251 for k, v := range putOpts.Header() { 1252 headerSize += len(k) + len(v) 1253 } 1254 1255 opts := &bandwidth.MonitorReaderOptions{ 1256 BucketOptions: bandwidth.BucketOptions{ 1257 Name: ri.Bucket, 1258 ReplicationARN: tgt.ARN, 1259 }, 1260 HeaderSize: headerSize, 1261 } 1262 newCtx := ctx 1263 if globalBucketMonitor.IsThrottled(bucket, tgt.ARN) { 1264 var cancel context.CancelFunc 1265 newCtx, cancel = context.WithTimeout(ctx, throttleDeadline) 1266 defer cancel() 1267 } 1268 r := bandwidth.NewMonitoredReader(newCtx, globalBucketMonitor, gr, opts) 1269 if objInfo.isMultipart() { 1270 if rinfo.Err = replicateObjectWithMultipart(ctx, c, tgt.Bucket, object, 1271 r, objInfo, putOpts); rinfo.Err != nil { 1272 if minio.ToErrorResponse(rinfo.Err).Code != "PreconditionFailed" { 1273 rinfo.ReplicationStatus = replication.Failed 1274 logger.LogIf(ctx, fmt.Errorf("unable to replicate for object %s/%s(%s): %s (target: %s)", bucket, objInfo.Name, objInfo.VersionID, rinfo.Err, tgt.EndpointURL())) 1275 } 1276 } 1277 } else { 1278 if _, rinfo.Err = c.PutObject(ctx, tgt.Bucket, object, r, size, "", "", putOpts); rinfo.Err != nil { 1279 if minio.ToErrorResponse(rinfo.Err).Code != "PreconditionFailed" { 1280 rinfo.ReplicationStatus = replication.Failed 1281 logger.LogIf(ctx, fmt.Errorf("unable to replicate for object %s/%s(%s): %s (target: %s)", bucket, objInfo.Name, objInfo.VersionID, rinfo.Err, tgt.EndpointURL())) 1282 } 1283 } 1284 } 1285 if rinfo.Err != nil && minio.IsNetworkOrHostDown(rinfo.Err, true) && !globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 1286 globalBucketTargetSys.markOffline(tgt.EndpointURL()) 1287 } 1288 return 1289 } 1290 1291 // replicateAll replicates metadata for specified version of the object to destination bucket 1292 // if the destination version is missing it automatically does fully copy as well. 1293 // The source object is then updated to reflect the replication status. 1294 func (ri ReplicateObjectInfo) replicateAll(ctx context.Context, objectAPI ObjectLayer, tgt *TargetClient) (rinfo replicatedTargetInfo) { 1295 startTime := time.Now() 1296 bucket := ri.Bucket 1297 object := ri.Name 1298 1299 // set defaults for replication action based on operation being performed - actual 1300 // replication action can only be determined after stat on remote. This default is 1301 // needed for updating replication metrics correctly when target is offline. 1302 rAction := replicateMetadata 1303 1304 rinfo = replicatedTargetInfo{ 1305 Size: ri.ActualSize, 1306 Arn: tgt.ARN, 1307 PrevReplicationStatus: ri.TargetReplicationStatus(tgt.ARN), 1308 ReplicationStatus: replication.Failed, 1309 OpType: ri.OpType, 1310 ReplicationAction: rAction, 1311 endpoint: tgt.EndpointURL().Host, 1312 secure: tgt.EndpointURL().Scheme == "https", 1313 } 1314 1315 if globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 1316 logger.LogOnceIf(ctx, fmt.Errorf("remote target is offline for bucket:%s arn:%s retry:%d", bucket, tgt.ARN, ri.RetryCount), "replication-target-offline-heal"+tgt.ARN) 1317 sendEvent(eventArgs{ 1318 EventName: event.ObjectReplicationNotTracked, 1319 BucketName: bucket, 1320 Object: ri.ToObjectInfo(), 1321 UserAgent: "Internal: [Replication]", 1322 Host: globalLocalNodeName, 1323 }) 1324 return 1325 } 1326 1327 versioned := globalBucketVersioningSys.PrefixEnabled(bucket, object) 1328 versionSuspended := globalBucketVersioningSys.PrefixSuspended(bucket, object) 1329 1330 gr, err := objectAPI.GetObjectNInfo(ctx, bucket, object, nil, http.Header{}, 1331 ObjectOptions{ 1332 VersionID: ri.VersionID, 1333 Versioned: versioned, 1334 VersionSuspended: versionSuspended, 1335 ReplicationRequest: true, 1336 }) 1337 if err != nil { 1338 if !isErrVersionNotFound(err) && !isErrObjectNotFound(err) { 1339 objInfo := ri.ToObjectInfo() 1340 sendEvent(eventArgs{ 1341 EventName: event.ObjectReplicationNotTracked, 1342 BucketName: bucket, 1343 Object: objInfo, 1344 UserAgent: "Internal: [Replication]", 1345 Host: globalLocalNodeName, 1346 }) 1347 logger.LogIf(ctx, fmt.Errorf("unable to replicate to target %s for %s/%s(%s): %w", tgt.EndpointURL(), bucket, object, objInfo.VersionID, err)) 1348 } 1349 return 1350 } 1351 defer gr.Close() 1352 1353 objInfo := gr.ObjInfo 1354 1355 // make sure we have the latest metadata for metrics calculation 1356 rinfo.PrevReplicationStatus = objInfo.TargetReplicationStatus(tgt.ARN) 1357 1358 // use latest ObjectInfo to check if previous replication attempt succeeded 1359 if objInfo.TargetReplicationStatus(tgt.ARN) == replication.Completed && !ri.ExistingObjResync.Empty() && !ri.ExistingObjResync.mustResyncTarget(tgt.ARN) { 1360 rinfo.ReplicationStatus = replication.Completed 1361 rinfo.ReplicationResynced = true 1362 return 1363 } 1364 1365 size, err := objInfo.GetActualSize() 1366 if err != nil { 1367 logger.LogIf(ctx, err) 1368 sendEvent(eventArgs{ 1369 EventName: event.ObjectReplicationNotTracked, 1370 BucketName: bucket, 1371 Object: objInfo, 1372 UserAgent: "Internal: [Replication]", 1373 Host: globalLocalNodeName, 1374 }) 1375 return 1376 } 1377 1378 // Set the encrypted size for SSE-C objects 1379 if crypto.SSEC.IsEncrypted(objInfo.UserDefined) { 1380 size = objInfo.Size 1381 } 1382 1383 if tgt.Bucket == "" { 1384 logger.LogIf(ctx, fmt.Errorf("unable to replicate object %s(%s) to %s, target bucket is missing", objInfo.Name, objInfo.VersionID, tgt.EndpointURL())) 1385 sendEvent(eventArgs{ 1386 EventName: event.ObjectReplicationNotTracked, 1387 BucketName: bucket, 1388 Object: objInfo, 1389 UserAgent: "Internal: [Replication]", 1390 Host: globalLocalNodeName, 1391 }) 1392 return rinfo 1393 } 1394 defer func() { 1395 if rinfo.ReplicationStatus == replication.Completed && ri.OpType == replication.ExistingObjectReplicationType && tgt.ResetID != "" { 1396 rinfo.ResyncTimestamp = fmt.Sprintf("%s;%s", UTCNow().Format(http.TimeFormat), tgt.ResetID) 1397 rinfo.ReplicationResynced = true 1398 } 1399 rinfo.Duration = time.Since(startTime) 1400 }() 1401 1402 oi, cerr := tgt.StatObject(ctx, tgt.Bucket, object, minio.StatObjectOptions{ 1403 VersionID: objInfo.VersionID, 1404 Internal: minio.AdvancedGetOptions{ 1405 ReplicationProxyRequest: "false", 1406 }, 1407 }) 1408 if cerr == nil { 1409 rAction = getReplicationAction(objInfo, oi, ri.OpType) 1410 rinfo.ReplicationStatus = replication.Completed 1411 if rAction == replicateNone { 1412 if ri.OpType == replication.ExistingObjectReplicationType && 1413 objInfo.ModTime.Unix() > oi.LastModified.Unix() && objInfo.VersionID == nullVersionID { 1414 logger.LogIf(ctx, fmt.Errorf("unable to replicate %s/%s (null). Newer version exists on target %s", bucket, object, tgt.EndpointURL())) 1415 sendEvent(eventArgs{ 1416 EventName: event.ObjectReplicationNotTracked, 1417 BucketName: bucket, 1418 Object: objInfo, 1419 UserAgent: "Internal: [Replication]", 1420 Host: globalLocalNodeName, 1421 }) 1422 } 1423 // object with same VersionID already exists, replication kicked off by 1424 // PutObject might have completed 1425 if objInfo.TargetReplicationStatus(tgt.ARN) == replication.Pending || 1426 objInfo.TargetReplicationStatus(tgt.ARN) == replication.Failed || 1427 ri.OpType == replication.ExistingObjectReplicationType { 1428 // if metadata is not updated for some reason after replication, such as 1429 // 503 encountered while updating metadata - make sure to set ReplicationStatus 1430 // as Completed. 1431 // 1432 // Note: Replication Stats would have been updated despite metadata update failure. 1433 rinfo.ReplicationAction = rAction 1434 rinfo.ReplicationStatus = replication.Completed 1435 } 1436 return 1437 } 1438 } else { 1439 // if target returns error other than NoSuchKey, defer replication attempt 1440 if minio.IsNetworkOrHostDown(cerr, true) && !globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 1441 globalBucketTargetSys.markOffline(tgt.EndpointURL()) 1442 } 1443 1444 serr := ErrorRespToObjectError(cerr, bucket, object, objInfo.VersionID) 1445 switch { 1446 case isErrMethodNotAllowed(serr): 1447 rAction = replicateAll 1448 case isErrObjectNotFound(serr), isErrVersionNotFound(serr): 1449 rAction = replicateAll 1450 case isErrReadQuorum(serr), isErrWriteQuorum(serr): 1451 rAction = replicateAll 1452 default: 1453 rinfo.Err = cerr 1454 logger.LogIf(ctx, fmt.Errorf("unable to replicate %s/%s (%s). Target (%s) returned %s error on HEAD", 1455 bucket, object, objInfo.VersionID, tgt.EndpointURL(), cerr)) 1456 sendEvent(eventArgs{ 1457 EventName: event.ObjectReplicationNotTracked, 1458 BucketName: bucket, 1459 Object: objInfo, 1460 UserAgent: "Internal: [Replication]", 1461 Host: globalLocalNodeName, 1462 }) 1463 return 1464 } 1465 } 1466 rinfo.ReplicationStatus = replication.Completed 1467 rinfo.Size = size 1468 rinfo.ReplicationAction = rAction 1469 // use core client to avoid doing multipart on PUT 1470 c := &minio.Core{Client: tgt.Client} 1471 if rAction != replicateAll { 1472 // replicate metadata for object tagging/copy with metadata replacement 1473 srcOpts := minio.CopySrcOptions{ 1474 Bucket: tgt.Bucket, 1475 Object: object, 1476 VersionID: objInfo.VersionID, 1477 } 1478 dstOpts := minio.PutObjectOptions{ 1479 Internal: minio.AdvancedPutOptions{ 1480 SourceVersionID: objInfo.VersionID, 1481 ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside 1482 }, 1483 } 1484 if tagTmStr, ok := objInfo.UserDefined[ReservedMetadataPrefixLower+TaggingTimestamp]; ok { 1485 ondiskTimestamp, err := time.Parse(time.RFC3339, tagTmStr) 1486 if err == nil { 1487 dstOpts.Internal.TaggingTimestamp = ondiskTimestamp 1488 } 1489 } 1490 if retTmStr, ok := objInfo.UserDefined[ReservedMetadataPrefixLower+ObjectLockRetentionTimestamp]; ok { 1491 ondiskTimestamp, err := time.Parse(time.RFC3339, retTmStr) 1492 if err == nil { 1493 dstOpts.Internal.RetentionTimestamp = ondiskTimestamp 1494 } 1495 } 1496 if lholdTmStr, ok := objInfo.UserDefined[ReservedMetadataPrefixLower+ObjectLockLegalHoldTimestamp]; ok { 1497 ondiskTimestamp, err := time.Parse(time.RFC3339, lholdTmStr) 1498 if err == nil { 1499 dstOpts.Internal.LegalholdTimestamp = ondiskTimestamp 1500 } 1501 } 1502 if _, rinfo.Err = c.CopyObject(ctx, tgt.Bucket, object, tgt.Bucket, object, getCopyObjMetadata(objInfo, tgt.StorageClass), srcOpts, dstOpts); rinfo.Err != nil { 1503 rinfo.ReplicationStatus = replication.Failed 1504 logger.LogIf(ctx, fmt.Errorf("unable to replicate metadata for object %s/%s(%s) to target %s: %w", bucket, objInfo.Name, objInfo.VersionID, tgt.EndpointURL(), rinfo.Err)) 1505 } 1506 } else { 1507 var putOpts minio.PutObjectOptions 1508 putOpts, err = putReplicationOpts(ctx, tgt.StorageClass, objInfo) 1509 if err != nil { 1510 logger.LogIf(ctx, fmt.Errorf("failed to set replicate options for object %s/%s(%s) (target %s) err:%w", bucket, objInfo.Name, objInfo.VersionID, tgt.EndpointURL(), err)) 1511 sendEvent(eventArgs{ 1512 EventName: event.ObjectReplicationNotTracked, 1513 BucketName: bucket, 1514 Object: objInfo, 1515 UserAgent: "Internal: [Replication]", 1516 Host: globalLocalNodeName, 1517 }) 1518 return 1519 } 1520 var headerSize int 1521 for k, v := range putOpts.Header() { 1522 headerSize += len(k) + len(v) 1523 } 1524 1525 opts := &bandwidth.MonitorReaderOptions{ 1526 BucketOptions: bandwidth.BucketOptions{ 1527 Name: objInfo.Bucket, 1528 ReplicationARN: tgt.ARN, 1529 }, 1530 HeaderSize: headerSize, 1531 } 1532 newCtx := ctx 1533 if globalBucketMonitor.IsThrottled(bucket, tgt.ARN) { 1534 var cancel context.CancelFunc 1535 newCtx, cancel = context.WithTimeout(ctx, throttleDeadline) 1536 defer cancel() 1537 } 1538 r := bandwidth.NewMonitoredReader(newCtx, globalBucketMonitor, gr, opts) 1539 if objInfo.isMultipart() { 1540 if rinfo.Err = replicateObjectWithMultipart(ctx, c, tgt.Bucket, object, 1541 r, objInfo, putOpts); rinfo.Err != nil { 1542 if minio.ToErrorResponse(rinfo.Err).Code != "PreconditionFailed" { 1543 rinfo.ReplicationStatus = replication.Failed 1544 logger.LogIf(ctx, fmt.Errorf("unable to replicate for object %s/%s(%s) to target %s: %w", bucket, objInfo.Name, objInfo.VersionID, tgt.EndpointURL(), rinfo.Err)) 1545 } else { 1546 rinfo.ReplicationStatus = replication.Completed 1547 } 1548 } 1549 } else { 1550 if _, rinfo.Err = c.PutObject(ctx, tgt.Bucket, object, r, size, "", "", putOpts); rinfo.Err != nil { 1551 if minio.ToErrorResponse(rinfo.Err).Code != "PreconditionFailed" { 1552 rinfo.ReplicationStatus = replication.Failed 1553 logger.LogIf(ctx, fmt.Errorf("unable to replicate for object %s/%s(%s) to target %s: %w", bucket, objInfo.Name, objInfo.VersionID, tgt.EndpointURL(), rinfo.Err)) 1554 } else { 1555 rinfo.ReplicationStatus = replication.Completed 1556 } 1557 } 1558 } 1559 if rinfo.Err != nil && minio.IsNetworkOrHostDown(rinfo.Err, true) && !globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 1560 globalBucketTargetSys.markOffline(tgt.EndpointURL()) 1561 } 1562 } 1563 return 1564 } 1565 1566 func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, object string, r io.Reader, objInfo ObjectInfo, opts minio.PutObjectOptions) (err error) { 1567 var uploadedParts []minio.CompletePart 1568 // new multipart must not set mtime as it may lead to erroneous cleanups at various intervals. 1569 opts.Internal.SourceMTime = time.Time{} // this value is saved properly in CompleteMultipartUpload() 1570 var uploadID string 1571 attempts := 1 1572 for attempts <= 3 { 1573 nctx, cancel := context.WithTimeout(ctx, time.Minute) 1574 uploadID, err = c.NewMultipartUpload(nctx, bucket, object, opts) 1575 cancel() 1576 if err == nil { 1577 break 1578 } 1579 if minio.ToErrorResponse(err).Code == "PreconditionFailed" { 1580 return err 1581 } 1582 attempts++ 1583 time.Sleep(time.Duration(rand.Int63n(int64(time.Second)))) 1584 } 1585 if err != nil { 1586 return err 1587 } 1588 1589 defer func() { 1590 if err != nil { 1591 // block and abort remote upload upon failure. 1592 attempts := 1 1593 for attempts <= 3 { 1594 actx, acancel := context.WithTimeout(ctx, time.Minute) 1595 aerr := c.AbortMultipartUpload(actx, bucket, object, uploadID) 1596 if aerr == nil { 1597 acancel() 1598 return 1599 } 1600 acancel() 1601 logger.LogIf(actx, 1602 fmt.Errorf("trying %s: Unable to cleanup failed multipart replication %s on remote %s/%s: %w - this may consume space on remote cluster", 1603 humanize.Ordinal(attempts), uploadID, bucket, object, aerr)) 1604 attempts++ 1605 time.Sleep(time.Duration(rand.Int63n(int64(time.Second)))) 1606 } 1607 } 1608 }() 1609 1610 var ( 1611 hr *hash.Reader 1612 pInfo minio.ObjectPart 1613 ) 1614 1615 var objectSize int64 1616 for _, partInfo := range objInfo.Parts { 1617 if crypto.SSEC.IsEncrypted(objInfo.UserDefined) { 1618 hr, err = hash.NewReader(ctx, io.LimitReader(r, partInfo.Size), partInfo.Size, "", "", partInfo.ActualSize) 1619 } else { 1620 hr, err = hash.NewReader(ctx, io.LimitReader(r, partInfo.ActualSize), partInfo.ActualSize, "", "", partInfo.ActualSize) 1621 } 1622 if err != nil { 1623 return err 1624 } 1625 1626 cHeader := http.Header{} 1627 cHeader.Add(xhttp.MinIOSourceReplicationRequest, "true") 1628 popts := minio.PutObjectPartOptions{ 1629 SSE: opts.ServerSideEncryption, 1630 CustomHeader: cHeader, 1631 } 1632 1633 if crypto.SSEC.IsEncrypted(objInfo.UserDefined) { 1634 objectSize += partInfo.Size 1635 pInfo, err = c.PutObjectPart(ctx, bucket, object, uploadID, partInfo.Number, hr, partInfo.Size, popts) 1636 } else { 1637 objectSize += partInfo.ActualSize 1638 pInfo, err = c.PutObjectPart(ctx, bucket, object, uploadID, partInfo.Number, hr, partInfo.ActualSize, popts) 1639 } 1640 if err != nil { 1641 return err 1642 } 1643 if !crypto.SSEC.IsEncrypted(objInfo.UserDefined) && pInfo.Size != partInfo.ActualSize { 1644 return fmt.Errorf("Part size mismatch: got %d, want %d", pInfo.Size, partInfo.ActualSize) 1645 } 1646 uploadedParts = append(uploadedParts, minio.CompletePart{ 1647 PartNumber: pInfo.PartNumber, 1648 ETag: pInfo.ETag, 1649 }) 1650 } 1651 cctx, ccancel := context.WithTimeout(ctx, 10*time.Minute) 1652 defer ccancel() 1653 _, err = c.CompleteMultipartUpload(cctx, bucket, object, uploadID, uploadedParts, minio.PutObjectOptions{ 1654 UserMetadata: map[string]string{validSSEReplicationHeaders[ReservedMetadataPrefix+"Actual-Object-Size"]: objInfo.UserDefined[ReservedMetadataPrefix+"actual-size"]}, 1655 Internal: minio.AdvancedPutOptions{ 1656 SourceMTime: objInfo.ModTime, 1657 // always set this to distinguish between `mc mirror` replication and serverside 1658 ReplicationRequest: true, 1659 }, 1660 }) 1661 return err 1662 } 1663 1664 // filterReplicationStatusMetadata filters replication status metadata for COPY 1665 func filterReplicationStatusMetadata(metadata map[string]string) map[string]string { 1666 // Copy on write 1667 dst := metadata 1668 var copied bool 1669 delKey := func(key string) { 1670 if _, ok := metadata[key]; !ok { 1671 return 1672 } 1673 if !copied { 1674 dst = make(map[string]string, len(metadata)) 1675 for k, v := range metadata { 1676 dst[k] = v 1677 } 1678 copied = true 1679 } 1680 delete(dst, key) 1681 } 1682 1683 delKey(xhttp.AmzBucketReplicationStatus) 1684 return dst 1685 } 1686 1687 // DeletedObjectReplicationInfo has info on deleted object 1688 type DeletedObjectReplicationInfo struct { 1689 DeletedObject 1690 Bucket string 1691 EventType string 1692 OpType replication.Type 1693 ResetID string 1694 TargetArn string 1695 } 1696 1697 // ToMRFEntry returns the relevant info needed by MRF 1698 func (di DeletedObjectReplicationInfo) ToMRFEntry() MRFReplicateEntry { 1699 versionID := di.DeleteMarkerVersionID 1700 if versionID == "" { 1701 versionID = di.VersionID 1702 } 1703 return MRFReplicateEntry{ 1704 Bucket: di.Bucket, 1705 Object: di.ObjectName, 1706 versionID: versionID, 1707 } 1708 } 1709 1710 // Replication specific APIName 1711 const ( 1712 ReplicateObjectAPI = "ReplicateObject" 1713 ReplicateDeleteAPI = "ReplicateDelete" 1714 ) 1715 1716 const ( 1717 // ReplicateQueued - replication being queued trail 1718 ReplicateQueued = "replicate:queue" 1719 1720 // ReplicateExisting - audit trail for existing objects replication 1721 ReplicateExisting = "replicate:existing" 1722 // ReplicateExistingDelete - audit trail for delete replication triggered for existing delete markers 1723 ReplicateExistingDelete = "replicate:existing:delete" 1724 1725 // ReplicateMRF - audit trail for replication from Most Recent Failures (MRF) queue 1726 ReplicateMRF = "replicate:mrf" 1727 // ReplicateIncoming - audit trail of inline replication 1728 ReplicateIncoming = "replicate:incoming" 1729 // ReplicateIncomingDelete - audit trail of inline replication of deletes. 1730 ReplicateIncomingDelete = "replicate:incoming:delete" 1731 1732 // ReplicateHeal - audit trail for healing of failed/pending replications 1733 ReplicateHeal = "replicate:heal" 1734 // ReplicateHealDelete - audit trail of healing of failed/pending delete replications. 1735 ReplicateHealDelete = "replicate:heal:delete" 1736 ) 1737 1738 var ( 1739 globalReplicationPool *ReplicationPool 1740 globalReplicationStats *ReplicationStats 1741 ) 1742 1743 // ReplicationPool describes replication pool 1744 type ReplicationPool struct { 1745 // atomic ops: 1746 activeWorkers int32 1747 activeMRFWorkers int32 1748 1749 objLayer ObjectLayer 1750 ctx context.Context 1751 priority string 1752 maxWorkers int 1753 mu sync.RWMutex 1754 mrfMU sync.Mutex 1755 resyncer *replicationResyncer 1756 1757 // workers: 1758 workers []chan ReplicationWorkerOperation 1759 lrgworkers []chan ReplicationWorkerOperation 1760 1761 // mrf: 1762 mrfWorkerKillCh chan struct{} 1763 mrfReplicaCh chan ReplicationWorkerOperation 1764 mrfSaveCh chan MRFReplicateEntry 1765 mrfStopCh chan struct{} 1766 mrfWorkerSize int 1767 } 1768 1769 // ReplicationWorkerOperation is a shared interface of replication operations. 1770 type ReplicationWorkerOperation interface { 1771 ToMRFEntry() MRFReplicateEntry 1772 } 1773 1774 const ( 1775 // WorkerMaxLimit max number of workers per node for "fast" mode 1776 WorkerMaxLimit = 500 1777 1778 // WorkerMinLimit min number of workers per node for "slow" mode 1779 WorkerMinLimit = 50 1780 1781 // WorkerAutoDefault is default number of workers for "auto" mode 1782 WorkerAutoDefault = 100 1783 1784 // MRFWorkerMaxLimit max number of mrf workers per node for "fast" mode 1785 MRFWorkerMaxLimit = 8 1786 1787 // MRFWorkerMinLimit min number of mrf workers per node for "slow" mode 1788 MRFWorkerMinLimit = 2 1789 1790 // MRFWorkerAutoDefault is default number of mrf workers for "auto" mode 1791 MRFWorkerAutoDefault = 4 1792 1793 // LargeWorkerCount is default number of workers assigned to large uploads ( >= 128MiB) 1794 LargeWorkerCount = 10 1795 ) 1796 1797 // NewReplicationPool creates a pool of replication workers of specified size 1798 func NewReplicationPool(ctx context.Context, o ObjectLayer, opts replicationPoolOpts) *ReplicationPool { 1799 var workers, failedWorkers int 1800 priority := "auto" 1801 maxWorkers := WorkerMaxLimit 1802 if opts.Priority != "" { 1803 priority = opts.Priority 1804 } 1805 if opts.MaxWorkers > 0 { 1806 maxWorkers = opts.MaxWorkers 1807 } 1808 switch priority { 1809 case "fast": 1810 workers = WorkerMaxLimit 1811 failedWorkers = MRFWorkerMaxLimit 1812 case "slow": 1813 workers = WorkerMinLimit 1814 failedWorkers = MRFWorkerMinLimit 1815 default: 1816 workers = WorkerAutoDefault 1817 failedWorkers = MRFWorkerAutoDefault 1818 } 1819 if maxWorkers > 0 && workers > maxWorkers { 1820 workers = maxWorkers 1821 } 1822 1823 if maxWorkers > 0 && failedWorkers > maxWorkers { 1824 failedWorkers = maxWorkers 1825 } 1826 pool := &ReplicationPool{ 1827 workers: make([]chan ReplicationWorkerOperation, 0, workers), 1828 lrgworkers: make([]chan ReplicationWorkerOperation, 0, LargeWorkerCount), 1829 mrfReplicaCh: make(chan ReplicationWorkerOperation, 100000), 1830 mrfWorkerKillCh: make(chan struct{}, failedWorkers), 1831 resyncer: newresyncer(), 1832 mrfSaveCh: make(chan MRFReplicateEntry, 100000), 1833 mrfStopCh: make(chan struct{}, 1), 1834 ctx: ctx, 1835 objLayer: o, 1836 priority: priority, 1837 maxWorkers: maxWorkers, 1838 } 1839 1840 pool.AddLargeWorkers() 1841 pool.ResizeWorkers(workers, 0) 1842 pool.ResizeFailedWorkers(failedWorkers) 1843 go pool.resyncer.PersistToDisk(ctx, o) 1844 go pool.processMRF() 1845 go pool.persistMRF() 1846 return pool 1847 } 1848 1849 // AddMRFWorker adds a pending/failed replication worker to handle requests that could not be queued 1850 // to the other workers 1851 func (p *ReplicationPool) AddMRFWorker() { 1852 for { 1853 select { 1854 case <-p.ctx.Done(): 1855 return 1856 case oi, ok := <-p.mrfReplicaCh: 1857 if !ok { 1858 return 1859 } 1860 switch v := oi.(type) { 1861 case ReplicateObjectInfo: 1862 globalReplicationStats.incQ(v.Bucket, v.Size, v.DeleteMarker, v.OpType) 1863 atomic.AddInt32(&p.activeMRFWorkers, 1) 1864 replicateObject(p.ctx, v, p.objLayer) 1865 atomic.AddInt32(&p.activeMRFWorkers, -1) 1866 globalReplicationStats.decQ(v.Bucket, v.Size, v.DeleteMarker, v.OpType) 1867 1868 default: 1869 logger.LogOnceIf(p.ctx, fmt.Errorf("unknown mrf replication type: %T", oi), "unknown-mrf-replicate-type") 1870 } 1871 case <-p.mrfWorkerKillCh: 1872 return 1873 } 1874 } 1875 } 1876 1877 // AddWorker adds a replication worker to the pool. 1878 // An optional pointer to a tracker that will be atomically 1879 // incremented when operations are running can be provided. 1880 func (p *ReplicationPool) AddWorker(input <-chan ReplicationWorkerOperation, opTracker *int32) { 1881 for { 1882 select { 1883 case <-p.ctx.Done(): 1884 return 1885 case oi, ok := <-input: 1886 if !ok { 1887 return 1888 } 1889 switch v := oi.(type) { 1890 case ReplicateObjectInfo: 1891 if opTracker != nil { 1892 atomic.AddInt32(opTracker, 1) 1893 } 1894 globalReplicationStats.incQ(v.Bucket, v.Size, v.DeleteMarker, v.OpType) 1895 replicateObject(p.ctx, v, p.objLayer) 1896 globalReplicationStats.decQ(v.Bucket, v.Size, v.DeleteMarker, v.OpType) 1897 if opTracker != nil { 1898 atomic.AddInt32(opTracker, -1) 1899 } 1900 case DeletedObjectReplicationInfo: 1901 if opTracker != nil { 1902 atomic.AddInt32(opTracker, 1) 1903 } 1904 globalReplicationStats.incQ(v.Bucket, 0, true, v.OpType) 1905 1906 replicateDelete(p.ctx, v, p.objLayer) 1907 globalReplicationStats.decQ(v.Bucket, 0, true, v.OpType) 1908 1909 if opTracker != nil { 1910 atomic.AddInt32(opTracker, -1) 1911 } 1912 default: 1913 logger.LogOnceIf(p.ctx, fmt.Errorf("unknown replication type: %T", oi), "unknown-replicate-type") 1914 } 1915 } 1916 } 1917 } 1918 1919 // AddLargeWorkers adds a static number of workers to handle large uploads 1920 func (p *ReplicationPool) AddLargeWorkers() { 1921 for i := 0; i < LargeWorkerCount; i++ { 1922 p.lrgworkers = append(p.lrgworkers, make(chan ReplicationWorkerOperation, 100000)) 1923 i := i 1924 go p.AddLargeWorker(p.lrgworkers[i]) 1925 } 1926 go func() { 1927 <-p.ctx.Done() 1928 for i := 0; i < LargeWorkerCount; i++ { 1929 xioutil.SafeClose(p.lrgworkers[i]) 1930 } 1931 }() 1932 } 1933 1934 // AddLargeWorker adds a replication worker to the static pool for large uploads. 1935 func (p *ReplicationPool) AddLargeWorker(input <-chan ReplicationWorkerOperation) { 1936 for { 1937 select { 1938 case <-p.ctx.Done(): 1939 return 1940 case oi, ok := <-input: 1941 if !ok { 1942 return 1943 } 1944 switch v := oi.(type) { 1945 case ReplicateObjectInfo: 1946 globalReplicationStats.incQ(v.Bucket, v.Size, v.DeleteMarker, v.OpType) 1947 replicateObject(p.ctx, v, p.objLayer) 1948 globalReplicationStats.decQ(v.Bucket, v.Size, v.DeleteMarker, v.OpType) 1949 case DeletedObjectReplicationInfo: 1950 replicateDelete(p.ctx, v, p.objLayer) 1951 default: 1952 logger.LogOnceIf(p.ctx, fmt.Errorf("unknown replication type: %T", oi), "unknown-replicate-type") 1953 } 1954 } 1955 } 1956 } 1957 1958 // ActiveWorkers returns the number of active workers handling replication traffic. 1959 func (p *ReplicationPool) ActiveWorkers() int { 1960 return int(atomic.LoadInt32(&p.activeWorkers)) 1961 } 1962 1963 // ActiveMRFWorkers returns the number of active workers handling replication failures. 1964 func (p *ReplicationPool) ActiveMRFWorkers() int { 1965 return int(atomic.LoadInt32(&p.activeMRFWorkers)) 1966 } 1967 1968 // ResizeWorkers sets replication workers pool to new size. 1969 // checkOld can be set to an expected value. 1970 // If the worker count changed 1971 func (p *ReplicationPool) ResizeWorkers(n, checkOld int) { 1972 p.mu.Lock() 1973 defer p.mu.Unlock() 1974 1975 if (checkOld > 0 && len(p.workers) != checkOld) || n == len(p.workers) || n < 1 { 1976 // Either already satisfied or worker count changed while we waited for the lock. 1977 return 1978 } 1979 for len(p.workers) < n { 1980 input := make(chan ReplicationWorkerOperation, 10000) 1981 p.workers = append(p.workers, input) 1982 1983 go p.AddWorker(input, &p.activeWorkers) 1984 } 1985 for len(p.workers) > n { 1986 worker := p.workers[len(p.workers)-1] 1987 p.workers = p.workers[:len(p.workers)-1] 1988 xioutil.SafeClose(worker) 1989 } 1990 } 1991 1992 // ResizeWorkerPriority sets replication failed workers pool size 1993 func (p *ReplicationPool) ResizeWorkerPriority(pri string, maxWorkers int) { 1994 var workers, mrfWorkers int 1995 p.mu.Lock() 1996 switch pri { 1997 case "fast": 1998 workers = WorkerMaxLimit 1999 mrfWorkers = MRFWorkerMaxLimit 2000 case "slow": 2001 workers = WorkerMinLimit 2002 mrfWorkers = MRFWorkerMinLimit 2003 default: 2004 workers = WorkerAutoDefault 2005 mrfWorkers = MRFWorkerAutoDefault 2006 if len(p.workers) < WorkerAutoDefault { 2007 workers = min(len(p.workers)+1, WorkerAutoDefault) 2008 } 2009 if p.mrfWorkerSize < MRFWorkerAutoDefault { 2010 mrfWorkers = min(p.mrfWorkerSize+1, MRFWorkerAutoDefault) 2011 } 2012 } 2013 if maxWorkers > 0 && workers > maxWorkers { 2014 workers = maxWorkers 2015 } 2016 2017 if maxWorkers > 0 && mrfWorkers > maxWorkers { 2018 mrfWorkers = maxWorkers 2019 } 2020 p.priority = pri 2021 p.maxWorkers = maxWorkers 2022 p.mu.Unlock() 2023 p.ResizeWorkers(workers, 0) 2024 p.ResizeFailedWorkers(mrfWorkers) 2025 } 2026 2027 // ResizeFailedWorkers sets replication failed workers pool size 2028 func (p *ReplicationPool) ResizeFailedWorkers(n int) { 2029 p.mu.Lock() 2030 defer p.mu.Unlock() 2031 2032 for p.mrfWorkerSize < n { 2033 p.mrfWorkerSize++ 2034 go p.AddMRFWorker() 2035 } 2036 for p.mrfWorkerSize > n { 2037 p.mrfWorkerSize-- 2038 go func() { p.mrfWorkerKillCh <- struct{}{} }() 2039 } 2040 } 2041 2042 const ( 2043 minLargeObjSize = 128 * humanize.MiByte // 128MiB 2044 ) 2045 2046 // getWorkerCh gets a worker channel deterministically based on bucket and object names. 2047 // Must be able to grab read lock from p. 2048 2049 func (p *ReplicationPool) getWorkerCh(bucket, object string, sz int64) chan<- ReplicationWorkerOperation { 2050 h := xxh3.HashString(bucket + object) 2051 p.mu.RLock() 2052 defer p.mu.RUnlock() 2053 if len(p.workers) == 0 { 2054 return nil 2055 } 2056 return p.workers[h%uint64(len(p.workers))] 2057 } 2058 2059 func (p *ReplicationPool) queueReplicaTask(ri ReplicateObjectInfo) { 2060 if p == nil { 2061 return 2062 } 2063 // if object is large, queue it to a static set of large workers 2064 if ri.Size >= int64(minLargeObjSize) { 2065 h := xxh3.HashString(ri.Bucket + ri.Name) 2066 select { 2067 case <-p.ctx.Done(): 2068 case p.lrgworkers[h%LargeWorkerCount] <- ri: 2069 default: 2070 globalReplicationPool.queueMRFSave(ri.ToMRFEntry()) 2071 } 2072 return 2073 } 2074 2075 var ch, healCh chan<- ReplicationWorkerOperation 2076 switch ri.OpType { 2077 case replication.HealReplicationType, replication.ExistingObjectReplicationType: 2078 ch = p.mrfReplicaCh 2079 healCh = p.getWorkerCh(ri.Name, ri.Bucket, ri.Size) 2080 default: 2081 ch = p.getWorkerCh(ri.Name, ri.Bucket, ri.Size) 2082 } 2083 if ch == nil && healCh == nil { 2084 return 2085 } 2086 2087 select { 2088 case <-p.ctx.Done(): 2089 case healCh <- ri: 2090 case ch <- ri: 2091 default: 2092 globalReplicationPool.queueMRFSave(ri.ToMRFEntry()) 2093 p.mu.RLock() 2094 prio := p.priority 2095 maxWorkers := p.maxWorkers 2096 p.mu.RUnlock() 2097 switch prio { 2098 case "fast": 2099 logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming traffic"), string(replicationSubsystem)) 2100 case "slow": 2101 logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming traffic - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`"), string(replicationSubsystem)) 2102 default: 2103 maxWorkers = min(maxWorkers, WorkerMaxLimit) 2104 if p.ActiveWorkers() < maxWorkers { 2105 p.mu.RLock() 2106 workers := min(len(p.workers)+1, maxWorkers) 2107 existing := len(p.workers) 2108 p.mu.RUnlock() 2109 p.ResizeWorkers(workers, existing) 2110 } 2111 maxMRFWorkers := min(maxWorkers, MRFWorkerMaxLimit) 2112 if p.ActiveMRFWorkers() < maxMRFWorkers { 2113 p.mu.RLock() 2114 workers := min(p.mrfWorkerSize+1, maxMRFWorkers) 2115 p.mu.RUnlock() 2116 p.ResizeFailedWorkers(workers) 2117 } 2118 } 2119 } 2120 } 2121 2122 func queueReplicateDeletesWrapper(doi DeletedObjectReplicationInfo, existingObjectResync ResyncDecision) { 2123 for k, v := range existingObjectResync.targets { 2124 if v.Replicate { 2125 doi.ResetID = v.ResetID 2126 doi.TargetArn = k 2127 2128 globalReplicationPool.queueReplicaDeleteTask(doi) 2129 } 2130 } 2131 } 2132 2133 func (p *ReplicationPool) queueReplicaDeleteTask(doi DeletedObjectReplicationInfo) { 2134 if p == nil { 2135 return 2136 } 2137 var ch chan<- ReplicationWorkerOperation 2138 switch doi.OpType { 2139 case replication.HealReplicationType, replication.ExistingObjectReplicationType: 2140 fallthrough 2141 default: 2142 ch = p.getWorkerCh(doi.Bucket, doi.ObjectName, 0) 2143 } 2144 2145 select { 2146 case <-p.ctx.Done(): 2147 case ch <- doi: 2148 default: 2149 globalReplicationPool.queueMRFSave(doi.ToMRFEntry()) 2150 p.mu.RLock() 2151 prio := p.priority 2152 maxWorkers := p.maxWorkers 2153 p.mu.RUnlock() 2154 switch prio { 2155 case "fast": 2156 logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming deletes"), string(replicationSubsystem)) 2157 case "slow": 2158 logger.LogOnceIf(GlobalContext, fmt.Errorf("WARNING: Unable to keep up with incoming deletes - we recommend increasing replication priority with `mc admin config set api replication_priority=auto`"), string(replicationSubsystem)) 2159 default: 2160 maxWorkers = min(maxWorkers, WorkerMaxLimit) 2161 if p.ActiveWorkers() < maxWorkers { 2162 p.mu.RLock() 2163 workers := min(len(p.workers)+1, maxWorkers) 2164 existing := len(p.workers) 2165 p.mu.RUnlock() 2166 p.ResizeWorkers(workers, existing) 2167 } 2168 } 2169 } 2170 } 2171 2172 type replicationPoolOpts struct { 2173 Priority string 2174 MaxWorkers int 2175 } 2176 2177 func initBackgroundReplication(ctx context.Context, objectAPI ObjectLayer) { 2178 globalReplicationPool = NewReplicationPool(ctx, objectAPI, globalAPIConfig.getReplicationOpts()) 2179 globalReplicationStats = NewReplicationStats(ctx, objectAPI) 2180 go globalReplicationStats.trackEWMA() 2181 } 2182 2183 type proxyResult struct { 2184 Proxy bool 2185 Err error 2186 } 2187 2188 // get Reader from replication target if active-active replication is in place and 2189 // this node returns a 404 2190 func proxyGetToReplicationTarget(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, _ http.Header, opts ObjectOptions, proxyTargets *madmin.BucketTargets) (gr *GetObjectReader, proxy proxyResult, err error) { 2191 tgt, oi, proxy := proxyHeadToRepTarget(ctx, bucket, object, rs, opts, proxyTargets) 2192 if !proxy.Proxy { 2193 return nil, proxy, nil 2194 } 2195 fn, _, _, err := NewGetObjectReader(nil, oi, opts) 2196 if err != nil { 2197 return nil, proxy, err 2198 } 2199 gopts := minio.GetObjectOptions{ 2200 VersionID: opts.VersionID, 2201 ServerSideEncryption: opts.ServerSideEncryption, 2202 Internal: minio.AdvancedGetOptions{ 2203 ReplicationProxyRequest: "true", 2204 }, 2205 PartNumber: opts.PartNumber, 2206 } 2207 // get correct offsets for encrypted object 2208 if rs != nil { 2209 h, err := rs.ToHeader() 2210 if err != nil { 2211 return nil, proxy, err 2212 } 2213 gopts.Set(xhttp.Range, h) 2214 } 2215 // Make sure to match ETag when proxying. 2216 if err = gopts.SetMatchETag(oi.ETag); err != nil { 2217 return nil, proxy, err 2218 } 2219 c := minio.Core{Client: tgt.Client} 2220 obj, _, h, err := c.GetObject(ctx, tgt.Bucket, object, gopts) 2221 if err != nil { 2222 return nil, proxy, err 2223 } 2224 closeReader := func() { obj.Close() } 2225 reader, err := fn(obj, h, closeReader) 2226 if err != nil { 2227 return nil, proxy, err 2228 } 2229 reader.ObjInfo = oi.Clone() 2230 if rs != nil { 2231 contentSize, err := parseSizeFromContentRange(h) 2232 if err != nil { 2233 return nil, proxy, err 2234 } 2235 reader.ObjInfo.Size = contentSize 2236 } 2237 2238 return reader, proxyResult{Proxy: true}, nil 2239 } 2240 2241 func getProxyTargets(ctx context.Context, bucket, object string, opts ObjectOptions) (tgts *madmin.BucketTargets) { 2242 if opts.VersionSuspended { 2243 return &madmin.BucketTargets{} 2244 } 2245 if opts.ProxyRequest || (opts.ProxyHeaderSet && !opts.ProxyRequest) { 2246 return &madmin.BucketTargets{} 2247 } 2248 cfg, err := getReplicationConfig(ctx, bucket) 2249 if err != nil || cfg == nil { 2250 return &madmin.BucketTargets{} 2251 } 2252 topts := replication.ObjectOpts{Name: object} 2253 tgtArns := cfg.FilterTargetArns(topts) 2254 tgts = &madmin.BucketTargets{Targets: make([]madmin.BucketTarget, len(tgtArns))} 2255 for i, tgtArn := range tgtArns { 2256 tgt := globalBucketTargetSys.GetRemoteBucketTargetByArn(ctx, bucket, tgtArn) 2257 tgts.Targets[i] = tgt 2258 } 2259 2260 return tgts 2261 } 2262 2263 func proxyHeadToRepTarget(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, opts ObjectOptions, proxyTargets *madmin.BucketTargets) (tgt *TargetClient, oi ObjectInfo, proxy proxyResult) { 2264 // this option is set when active-active replication is in place between site A -> B, 2265 // and site B does not have the object yet. 2266 if opts.ProxyRequest || (opts.ProxyHeaderSet && !opts.ProxyRequest) { // true only when site B sets MinIOSourceProxyRequest header 2267 return nil, oi, proxy 2268 } 2269 var perr error 2270 for _, t := range proxyTargets.Targets { 2271 tgt = globalBucketTargetSys.GetRemoteTargetClient(bucket, t.Arn) 2272 if tgt == nil || globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 2273 continue 2274 } 2275 // if proxying explicitly disabled on remote target 2276 if tgt.disableProxy { 2277 continue 2278 } 2279 2280 gopts := minio.GetObjectOptions{ 2281 VersionID: opts.VersionID, 2282 ServerSideEncryption: opts.ServerSideEncryption, 2283 Internal: minio.AdvancedGetOptions{ 2284 ReplicationProxyRequest: "true", 2285 }, 2286 PartNumber: opts.PartNumber, 2287 } 2288 if rs != nil { 2289 h, err := rs.ToHeader() 2290 if err != nil { 2291 logger.LogIf(ctx, fmt.Errorf("invalid range header for %s/%s(%s) - %w", bucket, object, opts.VersionID, err)) 2292 continue 2293 } 2294 gopts.Set(xhttp.Range, h) 2295 } 2296 2297 objInfo, err := tgt.StatObject(ctx, t.TargetBucket, object, gopts) 2298 if err != nil { 2299 perr = err 2300 if isErrInvalidRange(ErrorRespToObjectError(err, bucket, object)) { 2301 return nil, oi, proxyResult{Err: err} 2302 } 2303 continue 2304 } 2305 2306 tags, _ := tags.MapToObjectTags(objInfo.UserTags) 2307 oi = ObjectInfo{ 2308 Bucket: bucket, 2309 Name: object, 2310 ModTime: objInfo.LastModified, 2311 Size: objInfo.Size, 2312 ETag: objInfo.ETag, 2313 VersionID: objInfo.VersionID, 2314 IsLatest: objInfo.IsLatest, 2315 DeleteMarker: objInfo.IsDeleteMarker, 2316 ContentType: objInfo.ContentType, 2317 Expires: objInfo.Expires, 2318 StorageClass: objInfo.StorageClass, 2319 ReplicationStatusInternal: objInfo.ReplicationStatus, 2320 UserTags: tags.String(), 2321 ReplicationStatus: replication.StatusType(objInfo.ReplicationStatus), 2322 } 2323 oi.UserDefined = make(map[string]string, len(objInfo.Metadata)) 2324 for k, v := range objInfo.Metadata { 2325 oi.UserDefined[k] = v[0] 2326 } 2327 ce, ok := oi.UserDefined[xhttp.ContentEncoding] 2328 if !ok { 2329 ce, ok = oi.UserDefined[strings.ToLower(xhttp.ContentEncoding)] 2330 } 2331 if ok { 2332 oi.ContentEncoding = ce 2333 } 2334 return tgt, oi, proxyResult{Proxy: true} 2335 } 2336 proxy.Err = perr 2337 return nil, oi, proxy 2338 } 2339 2340 // get object info from replication target if active-active replication is in place and 2341 // this node returns a 404 2342 func proxyHeadToReplicationTarget(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, opts ObjectOptions, proxyTargets *madmin.BucketTargets) (oi ObjectInfo, proxy proxyResult) { 2343 _, oi, proxy = proxyHeadToRepTarget(ctx, bucket, object, rs, opts, proxyTargets) 2344 return oi, proxy 2345 } 2346 2347 func scheduleReplication(ctx context.Context, oi ObjectInfo, o ObjectLayer, dsc ReplicateDecision, opType replication.Type) { 2348 tgtStatuses := replicationStatusesMap(oi.ReplicationStatusInternal) 2349 purgeStatuses := versionPurgeStatusesMap(oi.VersionPurgeStatusInternal) 2350 tm, _ := time.Parse(time.RFC3339Nano, oi.UserDefined[ReservedMetadataPrefixLower+ReplicationTimestamp]) 2351 rstate := oi.ReplicationState() 2352 rstate.ReplicateDecisionStr = dsc.String() 2353 asz, _ := oi.GetActualSize() 2354 2355 ri := ReplicateObjectInfo{ 2356 Name: oi.Name, 2357 Size: oi.Size, 2358 ActualSize: asz, 2359 Bucket: oi.Bucket, 2360 VersionID: oi.VersionID, 2361 ETag: oi.ETag, 2362 ModTime: oi.ModTime, 2363 ReplicationStatus: oi.ReplicationStatus, 2364 ReplicationStatusInternal: oi.ReplicationStatusInternal, 2365 DeleteMarker: oi.DeleteMarker, 2366 VersionPurgeStatusInternal: oi.VersionPurgeStatusInternal, 2367 VersionPurgeStatus: oi.VersionPurgeStatus, 2368 2369 ReplicationState: rstate, 2370 OpType: opType, 2371 Dsc: dsc, 2372 TargetStatuses: tgtStatuses, 2373 TargetPurgeStatuses: purgeStatuses, 2374 ReplicationTimestamp: tm, 2375 SSEC: crypto.SSEC.IsEncrypted(oi.UserDefined), 2376 UserTags: oi.UserTags, 2377 } 2378 2379 if dsc.Synchronous() { 2380 replicateObject(ctx, ri, o) 2381 } else { 2382 globalReplicationPool.queueReplicaTask(ri) 2383 } 2384 } 2385 2386 // proxyTaggingToRepTarget proxies tagging requests to remote targets for 2387 // active-active replicated setups 2388 func proxyTaggingToRepTarget(ctx context.Context, bucket, object string, tags *tags.Tags, opts ObjectOptions, proxyTargets *madmin.BucketTargets) (proxy proxyResult) { 2389 // this option is set when active-active replication is in place between site A -> B, 2390 // and request hits site B that does not have the object yet. 2391 if opts.ProxyRequest || (opts.ProxyHeaderSet && !opts.ProxyRequest) { // true only when site B sets MinIOSourceProxyRequest header 2392 return proxy 2393 } 2394 var wg sync.WaitGroup 2395 errs := make([]error, len(proxyTargets.Targets)) 2396 for idx, t := range proxyTargets.Targets { 2397 tgt := globalBucketTargetSys.GetRemoteTargetClient(bucket, t.Arn) 2398 if tgt == nil || globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 2399 continue 2400 } 2401 // if proxying explicitly disabled on remote target 2402 if tgt.disableProxy { 2403 continue 2404 } 2405 idx := idx 2406 wg.Add(1) 2407 go func(idx int, tgt *TargetClient) { 2408 defer wg.Done() 2409 var err error 2410 if tags != nil { 2411 popts := minio.PutObjectTaggingOptions{ 2412 VersionID: opts.VersionID, 2413 Internal: minio.AdvancedObjectTaggingOptions{ 2414 ReplicationProxyRequest: "true", 2415 }, 2416 } 2417 err = tgt.PutObjectTagging(ctx, tgt.Bucket, object, tags, popts) 2418 } else { 2419 dopts := minio.RemoveObjectTaggingOptions{ 2420 VersionID: opts.VersionID, 2421 Internal: minio.AdvancedObjectTaggingOptions{ 2422 ReplicationProxyRequest: "true", 2423 }, 2424 } 2425 err = tgt.RemoveObjectTagging(ctx, tgt.Bucket, object, dopts) 2426 } 2427 if err != nil { 2428 errs[idx] = err 2429 } 2430 }(idx, tgt) 2431 } 2432 wg.Wait() 2433 2434 var ( 2435 terr error 2436 taggedCount int 2437 ) 2438 for _, err := range errs { 2439 if err == nil { 2440 taggedCount++ 2441 continue 2442 } 2443 if err != nil { 2444 terr = err 2445 } 2446 } 2447 // don't return error if at least one target was tagged successfully 2448 if taggedCount == 0 && terr != nil { 2449 proxy.Err = terr 2450 } 2451 return proxy 2452 } 2453 2454 // proxyGetTaggingToRepTarget proxies get tagging requests to remote targets for 2455 // active-active replicated setups 2456 func proxyGetTaggingToRepTarget(ctx context.Context, bucket, object string, opts ObjectOptions, proxyTargets *madmin.BucketTargets) (tgs *tags.Tags, proxy proxyResult) { 2457 // this option is set when active-active replication is in place between site A -> B, 2458 // and request hits site B that does not have the object yet. 2459 if opts.ProxyRequest || (opts.ProxyHeaderSet && !opts.ProxyRequest) { // true only when site B sets MinIOSourceProxyRequest header 2460 return nil, proxy 2461 } 2462 var wg sync.WaitGroup 2463 errs := make([]error, len(proxyTargets.Targets)) 2464 tagSlc := make([]map[string]string, len(proxyTargets.Targets)) 2465 for idx, t := range proxyTargets.Targets { 2466 tgt := globalBucketTargetSys.GetRemoteTargetClient(bucket, t.Arn) 2467 if tgt == nil || globalBucketTargetSys.isOffline(tgt.EndpointURL()) { 2468 continue 2469 } 2470 // if proxying explicitly disabled on remote target 2471 if tgt.disableProxy { 2472 continue 2473 } 2474 idx := idx 2475 wg.Add(1) 2476 go func(idx int, tgt *TargetClient) { 2477 defer wg.Done() 2478 var err error 2479 gopts := minio.GetObjectTaggingOptions{ 2480 VersionID: opts.VersionID, 2481 Internal: minio.AdvancedObjectTaggingOptions{ 2482 ReplicationProxyRequest: "true", 2483 }, 2484 } 2485 tgs, err = tgt.GetObjectTagging(ctx, tgt.Bucket, object, gopts) 2486 if err != nil { 2487 errs[idx] = err 2488 } else { 2489 tagSlc[idx] = tgs.ToMap() 2490 } 2491 }(idx, tgt) 2492 } 2493 wg.Wait() 2494 for idx, err := range errs { 2495 errCode := minio.ToErrorResponse(err).Code 2496 if err != nil && errCode != "NoSuchKey" && errCode != "NoSuchVersion" { 2497 return nil, proxyResult{Err: err} 2498 } 2499 if err == nil { 2500 tgs, _ = tags.MapToObjectTags(tagSlc[idx]) 2501 } 2502 } 2503 if len(errs) == 1 { 2504 proxy.Err = errs[0] 2505 } 2506 return tgs, proxy 2507 } 2508 2509 func scheduleReplicationDelete(ctx context.Context, dv DeletedObjectReplicationInfo, o ObjectLayer) { 2510 globalReplicationPool.queueReplicaDeleteTask(dv) 2511 for arn := range dv.ReplicationState.Targets { 2512 globalReplicationStats.Update(dv.Bucket, replicatedTargetInfo{Arn: arn, Size: 0, Duration: 0, OpType: replication.DeleteReplicationType}, replication.Pending, replication.StatusType("")) 2513 } 2514 } 2515 2516 type replicationConfig struct { 2517 Config *replication.Config 2518 remotes *madmin.BucketTargets 2519 } 2520 2521 func (c replicationConfig) Empty() bool { 2522 return c.Config == nil 2523 } 2524 2525 func (c replicationConfig) Replicate(opts replication.ObjectOpts) bool { 2526 return c.Config.Replicate(opts) 2527 } 2528 2529 // Resync returns true if replication reset is requested 2530 func (c replicationConfig) Resync(ctx context.Context, oi ObjectInfo, dsc ReplicateDecision, tgtStatuses map[string]replication.StatusType) (r ResyncDecision) { 2531 if c.Empty() { 2532 return 2533 } 2534 2535 // Now overlay existing object replication choices for target 2536 if oi.DeleteMarker { 2537 opts := replication.ObjectOpts{ 2538 Name: oi.Name, 2539 DeleteMarker: oi.DeleteMarker, 2540 VersionID: oi.VersionID, 2541 OpType: replication.DeleteReplicationType, 2542 ExistingObject: true, 2543 } 2544 2545 tgtArns := c.Config.FilterTargetArns(opts) 2546 // indicates no matching target with Existing object replication enabled. 2547 if len(tgtArns) == 0 { 2548 return 2549 } 2550 for _, t := range tgtArns { 2551 opts.TargetArn = t 2552 // Update replication decision for target based on existing object replciation rule. 2553 dsc.Set(newReplicateTargetDecision(t, c.Replicate(opts), false)) 2554 } 2555 return c.resync(oi, dsc, tgtStatuses) 2556 } 2557 2558 // Ignore previous replication status when deciding if object can be re-replicated 2559 userDefined := cloneMSS(oi.UserDefined) 2560 delete(userDefined, xhttp.AmzBucketReplicationStatus) 2561 2562 rdsc := mustReplicate(ctx, oi.Bucket, oi.Name, getMustReplicateOptions(userDefined, oi.UserTags, "", replication.ExistingObjectReplicationType, ObjectOptions{})) 2563 return c.resync(oi, rdsc, tgtStatuses) 2564 } 2565 2566 // wrapper function for testability. Returns true if a new reset is requested on 2567 // already replicated objects OR object qualifies for existing object replication 2568 // and no reset requested. 2569 func (c replicationConfig) resync(oi ObjectInfo, dsc ReplicateDecision, tgtStatuses map[string]replication.StatusType) (r ResyncDecision) { 2570 r = ResyncDecision{ 2571 targets: make(map[string]ResyncTargetDecision, len(dsc.targetsMap)), 2572 } 2573 if c.remotes == nil { 2574 return 2575 } 2576 for _, tgt := range c.remotes.Targets { 2577 d, ok := dsc.targetsMap[tgt.Arn] 2578 if !ok { 2579 continue 2580 } 2581 if !d.Replicate { 2582 continue 2583 } 2584 r.targets[d.Arn] = resyncTarget(oi, tgt.Arn, tgt.ResetID, tgt.ResetBeforeDate, tgtStatuses[tgt.Arn]) 2585 } 2586 return 2587 } 2588 2589 func targetResetHeader(arn string) string { 2590 return fmt.Sprintf("%s-%s", ReservedMetadataPrefixLower+ReplicationReset, arn) 2591 } 2592 2593 func resyncTarget(oi ObjectInfo, arn string, resetID string, resetBeforeDate time.Time, tgtStatus replication.StatusType) (rd ResyncTargetDecision) { 2594 rd = ResyncTargetDecision{ 2595 ResetID: resetID, 2596 ResetBeforeDate: resetBeforeDate, 2597 } 2598 rs, ok := oi.UserDefined[targetResetHeader(arn)] 2599 if !ok { 2600 rs, ok = oi.UserDefined[xhttp.MinIOReplicationResetStatus] // for backward compatibility 2601 } 2602 if !ok { // existing object replication is enabled and object version is unreplicated so far. 2603 if resetID != "" && oi.ModTime.Before(resetBeforeDate) { // trigger replication if `mc replicate reset` requested 2604 rd.Replicate = true 2605 return 2606 } 2607 // For existing object reset - this condition is needed 2608 rd.Replicate = tgtStatus == "" 2609 return 2610 } 2611 if resetID == "" || resetBeforeDate.Equal(timeSentinel) { // no reset in progress 2612 return 2613 } 2614 2615 // if already replicated, return true if a new reset was requested. 2616 splits := strings.SplitN(rs, ";", 2) 2617 if len(splits) != 2 { 2618 return 2619 } 2620 newReset := splits[1] != resetID 2621 if !newReset && tgtStatus == replication.Completed { 2622 // already replicated and no reset requested 2623 return 2624 } 2625 rd.Replicate = newReset && oi.ModTime.Before(resetBeforeDate) 2626 return 2627 } 2628 2629 const resyncTimeInterval = time.Minute * 1 2630 2631 // PersistToDisk persists in-memory resync metadata stats to disk at periodic intervals 2632 func (s *replicationResyncer) PersistToDisk(ctx context.Context, objectAPI ObjectLayer) { 2633 resyncTimer := time.NewTimer(resyncTimeInterval) 2634 defer resyncTimer.Stop() 2635 2636 // For each bucket name, store the last timestamp of the 2637 // successful save of replication status in the backend disks. 2638 lastResyncStatusSave := make(map[string]time.Time) 2639 2640 for { 2641 select { 2642 case <-resyncTimer.C: 2643 s.RLock() 2644 for bucket, brs := range s.statusMap { 2645 var updt bool 2646 // Save the replication status if one resync to any bucket target is still not finished 2647 for _, st := range brs.TargetsMap { 2648 if st.LastUpdate.Equal(timeSentinel) { 2649 updt = true 2650 break 2651 } 2652 } 2653 // Save the replication status if a new stats update is found and not saved in the backend yet 2654 if brs.LastUpdate.After(lastResyncStatusSave[bucket]) { 2655 updt = true 2656 } 2657 if updt { 2658 if err := saveResyncStatus(ctx, bucket, brs, objectAPI); err != nil { 2659 logger.LogIf(ctx, fmt.Errorf("could not save resync metadata to drive for %s - %w", bucket, err)) 2660 } else { 2661 lastResyncStatusSave[bucket] = brs.LastUpdate 2662 } 2663 } 2664 } 2665 s.RUnlock() 2666 2667 resyncTimer.Reset(resyncTimeInterval) 2668 case <-ctx.Done(): 2669 // server could be restarting - need 2670 // to exit immediately 2671 return 2672 } 2673 } 2674 } 2675 2676 const ( 2677 resyncWorkerCnt = 10 // limit of number of bucket resyncs is progress at any given time 2678 resyncParallelRoutines = 10 // number of parallel resync ops per bucket 2679 ) 2680 2681 func newresyncer() *replicationResyncer { 2682 rs := replicationResyncer{ 2683 statusMap: make(map[string]BucketReplicationResyncStatus), 2684 workerSize: resyncWorkerCnt, 2685 resyncCancelCh: make(chan struct{}, resyncWorkerCnt), 2686 workerCh: make(chan struct{}, resyncWorkerCnt), 2687 } 2688 for i := 0; i < rs.workerSize; i++ { 2689 rs.workerCh <- struct{}{} 2690 } 2691 return &rs 2692 } 2693 2694 // mark status of replication resync on remote target for the bucket 2695 func (s *replicationResyncer) markStatus(status ResyncStatusType, opts resyncOpts, objAPI ObjectLayer) { 2696 s.Lock() 2697 defer s.Unlock() 2698 2699 m := s.statusMap[opts.bucket] 2700 st := m.TargetsMap[opts.arn] 2701 st.LastUpdate = UTCNow() 2702 st.ResyncStatus = status 2703 m.TargetsMap[opts.arn] = st 2704 m.LastUpdate = UTCNow() 2705 s.statusMap[opts.bucket] = m 2706 2707 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 2708 defer cancel() 2709 saveResyncStatus(ctx, opts.bucket, m, objAPI) 2710 } 2711 2712 // update replication resync stats for bucket's remote target 2713 func (s *replicationResyncer) incStats(ts TargetReplicationResyncStatus, opts resyncOpts) { 2714 s.Lock() 2715 defer s.Unlock() 2716 m := s.statusMap[opts.bucket] 2717 st := m.TargetsMap[opts.arn] 2718 st.Object = ts.Object 2719 st.ReplicatedCount += ts.ReplicatedCount 2720 st.FailedCount += ts.FailedCount 2721 st.ReplicatedSize += ts.ReplicatedSize 2722 st.FailedSize += ts.FailedSize 2723 m.TargetsMap[opts.arn] = st 2724 m.LastUpdate = UTCNow() 2725 s.statusMap[opts.bucket] = m 2726 } 2727 2728 // resyncBucket resyncs all qualifying objects as per replication rules for the target 2729 // ARN 2730 func (s *replicationResyncer) resyncBucket(ctx context.Context, objectAPI ObjectLayer, heal bool, opts resyncOpts) { 2731 select { 2732 case <-s.workerCh: // block till a worker is available 2733 case <-ctx.Done(): 2734 return 2735 } 2736 2737 resyncStatus := ResyncFailed 2738 defer func() { 2739 s.markStatus(resyncStatus, opts, objectAPI) 2740 globalSiteResyncMetrics.incBucket(opts, resyncStatus) 2741 s.workerCh <- struct{}{} 2742 }() 2743 // Allocate new results channel to receive ObjectInfo. 2744 objInfoCh := make(chan ObjectInfo) 2745 cfg, err := getReplicationConfig(ctx, opts.bucket) 2746 if err != nil { 2747 logger.LogIf(ctx, fmt.Errorf("replication resync of %s for arn %s failed with %w", opts.bucket, opts.arn, err)) 2748 return 2749 } 2750 tgts, err := globalBucketTargetSys.ListBucketTargets(ctx, opts.bucket) 2751 if err != nil { 2752 logger.LogIf(ctx, fmt.Errorf("replication resync of %s for arn %s failed %w", opts.bucket, opts.arn, err)) 2753 return 2754 } 2755 rcfg := replicationConfig{ 2756 Config: cfg, 2757 remotes: tgts, 2758 } 2759 tgtArns := cfg.FilterTargetArns( 2760 replication.ObjectOpts{ 2761 OpType: replication.ResyncReplicationType, 2762 TargetArn: opts.arn, 2763 }) 2764 if len(tgtArns) != 1 { 2765 logger.LogIf(ctx, fmt.Errorf("replication resync failed for %s - arn specified %s is missing in the replication config", opts.bucket, opts.arn)) 2766 return 2767 } 2768 tgt := globalBucketTargetSys.GetRemoteTargetClient(opts.bucket, opts.arn) 2769 if tgt == nil { 2770 logger.LogIf(ctx, fmt.Errorf("replication resync failed for %s - target could not be created for arn %s", opts.bucket, opts.arn)) 2771 return 2772 } 2773 // mark resync status as resync started 2774 if !heal { 2775 s.markStatus(ResyncStarted, opts, objectAPI) 2776 } 2777 2778 // Walk through all object versions - Walk() is always in ascending order needed to ensure 2779 // delete marker replicated to target after object version is first created. 2780 if err := objectAPI.Walk(ctx, opts.bucket, "", objInfoCh, WalkOptions{}); err != nil { 2781 logger.LogIf(ctx, err) 2782 return 2783 } 2784 2785 s.RLock() 2786 m := s.statusMap[opts.bucket] 2787 st := m.TargetsMap[opts.arn] 2788 s.RUnlock() 2789 var lastCheckpoint string 2790 if st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncFailed { 2791 lastCheckpoint = st.Object 2792 } 2793 workers := make([]chan ReplicateObjectInfo, resyncParallelRoutines) 2794 resultCh := make(chan TargetReplicationResyncStatus, 1) 2795 defer xioutil.SafeClose(resultCh) 2796 go func() { 2797 for r := range resultCh { 2798 s.incStats(r, opts) 2799 globalSiteResyncMetrics.updateMetric(r, opts.resyncID) 2800 } 2801 }() 2802 2803 var wg sync.WaitGroup 2804 for i := 0; i < resyncParallelRoutines; i++ { 2805 wg.Add(1) 2806 workers[i] = make(chan ReplicateObjectInfo, 100) 2807 i := i 2808 go func(ctx context.Context, idx int) { 2809 defer wg.Done() 2810 for roi := range workers[idx] { 2811 select { 2812 case <-ctx.Done(): 2813 return 2814 case <-s.resyncCancelCh: 2815 default: 2816 } 2817 traceFn := s.trace(tgt.ResetID, fmt.Sprintf("%s/%s (%s)", opts.bucket, roi.Name, roi.VersionID)) 2818 if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() { 2819 versionID := "" 2820 dmVersionID := "" 2821 if roi.VersionPurgeStatus.Empty() { 2822 dmVersionID = roi.VersionID 2823 } else { 2824 versionID = roi.VersionID 2825 } 2826 2827 doi := DeletedObjectReplicationInfo{ 2828 DeletedObject: DeletedObject{ 2829 ObjectName: roi.Name, 2830 DeleteMarkerVersionID: dmVersionID, 2831 VersionID: versionID, 2832 ReplicationState: roi.ReplicationState, 2833 DeleteMarkerMTime: DeleteMarkerMTime{roi.ModTime}, 2834 DeleteMarker: roi.DeleteMarker, 2835 }, 2836 Bucket: roi.Bucket, 2837 OpType: replication.ExistingObjectReplicationType, 2838 EventType: ReplicateExistingDelete, 2839 } 2840 replicateDelete(ctx, doi, objectAPI) 2841 } else { 2842 roi.OpType = replication.ExistingObjectReplicationType 2843 roi.EventType = ReplicateExisting 2844 replicateObject(ctx, roi, objectAPI) 2845 } 2846 2847 st := TargetReplicationResyncStatus{ 2848 Object: roi.Name, 2849 Bucket: roi.Bucket, 2850 } 2851 2852 _, err := tgt.StatObject(ctx, tgt.Bucket, roi.Name, minio.StatObjectOptions{ 2853 VersionID: roi.VersionID, 2854 Internal: minio.AdvancedGetOptions{ 2855 ReplicationProxyRequest: "false", 2856 }, 2857 }) 2858 if err != nil { 2859 if roi.DeleteMarker && isErrMethodNotAllowed(ErrorRespToObjectError(err, opts.bucket, roi.Name)) { 2860 st.ReplicatedCount++ 2861 } else { 2862 st.FailedCount++ 2863 } 2864 } else { 2865 st.ReplicatedCount++ 2866 st.ReplicatedSize += roi.Size 2867 } 2868 traceFn(err) 2869 select { 2870 case <-ctx.Done(): 2871 return 2872 case <-s.resyncCancelCh: 2873 return 2874 case resultCh <- st: 2875 } 2876 } 2877 }(ctx, i) 2878 } 2879 for obj := range objInfoCh { 2880 select { 2881 case <-s.resyncCancelCh: 2882 resyncStatus = ResyncCanceled 2883 return 2884 case <-ctx.Done(): 2885 return 2886 default: 2887 } 2888 if heal && lastCheckpoint != "" && lastCheckpoint != obj.Name { 2889 continue 2890 } 2891 lastCheckpoint = "" 2892 roi := getHealReplicateObjectInfo(obj, rcfg) 2893 if !roi.ExistingObjResync.mustResync() { 2894 continue 2895 } 2896 select { 2897 case <-s.resyncCancelCh: 2898 return 2899 case <-ctx.Done(): 2900 return 2901 default: 2902 h := xxh3.HashString(roi.Bucket + roi.Name) 2903 workers[h%uint64(resyncParallelRoutines)] <- roi 2904 } 2905 } 2906 for i := 0; i < resyncParallelRoutines; i++ { 2907 xioutil.SafeClose(workers[i]) 2908 } 2909 wg.Wait() 2910 resyncStatus = ResyncCompleted 2911 } 2912 2913 // start replication resync for the remote target ARN specified 2914 func (s *replicationResyncer) start(ctx context.Context, objAPI ObjectLayer, opts resyncOpts) error { 2915 if opts.bucket == "" { 2916 return fmt.Errorf("bucket name is empty") 2917 } 2918 if opts.arn == "" { 2919 return fmt.Errorf("target ARN specified for resync is empty") 2920 } 2921 // Check if the current bucket has quota restrictions, if not skip it 2922 cfg, err := getReplicationConfig(ctx, opts.bucket) 2923 if err != nil { 2924 return err 2925 } 2926 tgtArns := cfg.FilterTargetArns( 2927 replication.ObjectOpts{ 2928 OpType: replication.ResyncReplicationType, 2929 TargetArn: opts.arn, 2930 }) 2931 2932 if len(tgtArns) == 0 { 2933 return fmt.Errorf("arn %s specified for resync not found in replication config", opts.arn) 2934 } 2935 globalReplicationPool.resyncer.RLock() 2936 data, ok := globalReplicationPool.resyncer.statusMap[opts.bucket] 2937 globalReplicationPool.resyncer.RUnlock() 2938 if !ok { 2939 data, err = loadBucketResyncMetadata(ctx, opts.bucket, objAPI) 2940 if err != nil { 2941 return err 2942 } 2943 } 2944 // validate if resync is in progress for this arn 2945 for tArn, st := range data.TargetsMap { 2946 if opts.arn == tArn && (st.ResyncStatus == ResyncStarted || st.ResyncStatus == ResyncPending) { 2947 return fmt.Errorf("Resync of bucket %s is already in progress for remote bucket %s", opts.bucket, opts.arn) 2948 } 2949 } 2950 2951 status := TargetReplicationResyncStatus{ 2952 ResyncID: opts.resyncID, 2953 ResyncBeforeDate: opts.resyncBefore, 2954 StartTime: UTCNow(), 2955 ResyncStatus: ResyncPending, 2956 Bucket: opts.bucket, 2957 } 2958 data.TargetsMap[opts.arn] = status 2959 if err = saveResyncStatus(ctx, opts.bucket, data, objAPI); err != nil { 2960 return err 2961 } 2962 2963 globalReplicationPool.resyncer.Lock() 2964 defer globalReplicationPool.resyncer.Unlock() 2965 brs, ok := globalReplicationPool.resyncer.statusMap[opts.bucket] 2966 if !ok { 2967 brs = BucketReplicationResyncStatus{ 2968 Version: resyncMetaVersion, 2969 TargetsMap: make(map[string]TargetReplicationResyncStatus), 2970 } 2971 } 2972 brs.TargetsMap[opts.arn] = status 2973 globalReplicationPool.resyncer.statusMap[opts.bucket] = brs 2974 go globalReplicationPool.resyncer.resyncBucket(GlobalContext, objAPI, false, opts) 2975 return nil 2976 } 2977 2978 func (s *replicationResyncer) trace(resyncID string, path string) func(err error) { 2979 startTime := time.Now() 2980 return func(err error) { 2981 duration := time.Since(startTime) 2982 if globalTrace.NumSubscribers(madmin.TraceReplicationResync) > 0 { 2983 globalTrace.Publish(replicationResyncTrace(resyncID, startTime, duration, path, err)) 2984 } 2985 } 2986 } 2987 2988 func replicationResyncTrace(resyncID string, startTime time.Time, duration time.Duration, path string, err error) madmin.TraceInfo { 2989 var errStr string 2990 if err != nil { 2991 errStr = err.Error() 2992 } 2993 funcName := fmt.Sprintf("replication.(resyncID=%s)", resyncID) 2994 return madmin.TraceInfo{ 2995 TraceType: madmin.TraceReplicationResync, 2996 Time: startTime, 2997 NodeName: globalLocalNodeName, 2998 FuncName: funcName, 2999 Duration: duration, 3000 Path: path, 3001 Error: errStr, 3002 } 3003 } 3004 3005 // delete resync metadata from replication resync state in memory 3006 func (p *ReplicationPool) deleteResyncMetadata(ctx context.Context, bucket string) { 3007 if p == nil { 3008 return 3009 } 3010 p.resyncer.Lock() 3011 delete(p.resyncer.statusMap, bucket) 3012 defer p.resyncer.Unlock() 3013 3014 globalSiteResyncMetrics.deleteBucket(bucket) 3015 } 3016 3017 // initResync - initializes bucket replication resync for all buckets. 3018 func (p *ReplicationPool) initResync(ctx context.Context, buckets []BucketInfo, objAPI ObjectLayer) error { 3019 if objAPI == nil { 3020 return errServerNotInitialized 3021 } 3022 // Load bucket metadata sys in background 3023 go p.startResyncRoutine(ctx, buckets, objAPI) 3024 return nil 3025 } 3026 3027 func (p *ReplicationPool) startResyncRoutine(ctx context.Context, buckets []BucketInfo, objAPI ObjectLayer) { 3028 r := rand.New(rand.NewSource(time.Now().UnixNano())) 3029 // Run the replication resync in a loop 3030 for { 3031 if err := p.loadResync(ctx, buckets, objAPI); err == nil { 3032 <-ctx.Done() 3033 return 3034 } 3035 duration := time.Duration(r.Float64() * float64(time.Minute)) 3036 if duration < time.Second { 3037 // Make sure to sleep at least a second to avoid high CPU ticks. 3038 duration = time.Second 3039 } 3040 time.Sleep(duration) 3041 } 3042 } 3043 3044 // Loads bucket replication resync statuses into memory. 3045 func (p *ReplicationPool) loadResync(ctx context.Context, buckets []BucketInfo, objAPI ObjectLayer) error { 3046 // Make sure only one node running resync on the cluster. 3047 ctx, cancel := globalLeaderLock.GetLock(ctx) 3048 defer cancel() 3049 3050 for index := range buckets { 3051 bucket := buckets[index].Name 3052 3053 meta, err := loadBucketResyncMetadata(ctx, bucket, objAPI) 3054 if err != nil { 3055 if !errors.Is(err, errVolumeNotFound) { 3056 logger.LogIf(ctx, err) 3057 } 3058 continue 3059 } 3060 3061 p.resyncer.Lock() 3062 p.resyncer.statusMap[bucket] = meta 3063 p.resyncer.Unlock() 3064 3065 tgts := meta.cloneTgtStats() 3066 for arn, st := range tgts { 3067 switch st.ResyncStatus { 3068 case ResyncFailed, ResyncStarted, ResyncPending: 3069 go p.resyncer.resyncBucket(ctx, objAPI, true, resyncOpts{ 3070 bucket: bucket, 3071 arn: arn, 3072 resyncID: st.ResyncID, 3073 resyncBefore: st.ResyncBeforeDate, 3074 }) 3075 } 3076 } 3077 } 3078 return nil 3079 } 3080 3081 // load bucket resync metadata from disk 3082 func loadBucketResyncMetadata(ctx context.Context, bucket string, objAPI ObjectLayer) (brs BucketReplicationResyncStatus, e error) { 3083 brs = newBucketResyncStatus(bucket) 3084 resyncDirPath := path.Join(bucketMetaPrefix, bucket, replicationDir) 3085 data, err := readConfig(GlobalContext, objAPI, pathJoin(resyncDirPath, resyncFileName)) 3086 if err != nil && err != errConfigNotFound { 3087 return brs, err 3088 } 3089 if len(data) == 0 { 3090 // Seems to be empty. 3091 return brs, nil 3092 } 3093 if len(data) <= 4 { 3094 return brs, fmt.Errorf("replication resync: no data") 3095 } 3096 // Read resync meta header 3097 switch binary.LittleEndian.Uint16(data[0:2]) { 3098 case resyncMetaFormat: 3099 default: 3100 return brs, fmt.Errorf("resyncMeta: unknown format: %d", binary.LittleEndian.Uint16(data[0:2])) 3101 } 3102 switch binary.LittleEndian.Uint16(data[2:4]) { 3103 case resyncMetaVersion: 3104 default: 3105 return brs, fmt.Errorf("resyncMeta: unknown version: %d", binary.LittleEndian.Uint16(data[2:4])) 3106 } 3107 // OK, parse data. 3108 if _, err = brs.UnmarshalMsg(data[4:]); err != nil { 3109 return brs, err 3110 } 3111 3112 switch brs.Version { 3113 case resyncMetaVersionV1: 3114 default: 3115 return brs, fmt.Errorf("unexpected resync meta version: %d", brs.Version) 3116 } 3117 return brs, nil 3118 } 3119 3120 // save resync status to resync.bin 3121 func saveResyncStatus(ctx context.Context, bucket string, brs BucketReplicationResyncStatus, objectAPI ObjectLayer) error { 3122 data := make([]byte, 4, brs.Msgsize()+4) 3123 3124 // Initialize the resync meta header. 3125 binary.LittleEndian.PutUint16(data[0:2], resyncMetaFormat) 3126 binary.LittleEndian.PutUint16(data[2:4], resyncMetaVersion) 3127 3128 buf, err := brs.MarshalMsg(data) 3129 if err != nil { 3130 return err 3131 } 3132 3133 configFile := path.Join(bucketMetaPrefix, bucket, replicationDir, resyncFileName) 3134 return saveConfig(ctx, objectAPI, configFile, buf) 3135 } 3136 3137 // getReplicationDiff returns un-replicated objects in a channel. 3138 // If a non-nil channel is returned it must be consumed fully or 3139 // the provided context must be canceled. 3140 func getReplicationDiff(ctx context.Context, objAPI ObjectLayer, bucket string, opts madmin.ReplDiffOpts) (chan madmin.DiffInfo, error) { 3141 cfg, err := getReplicationConfig(ctx, bucket) 3142 if err != nil { 3143 logger.LogIf(ctx, err) 3144 return nil, err 3145 } 3146 tgts, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 3147 if err != nil { 3148 logger.LogIf(ctx, err) 3149 return nil, err 3150 } 3151 3152 objInfoCh := make(chan ObjectInfo, 10) 3153 if err := objAPI.Walk(ctx, bucket, opts.Prefix, objInfoCh, WalkOptions{}); err != nil { 3154 logger.LogIf(ctx, err) 3155 return nil, err 3156 } 3157 rcfg := replicationConfig{ 3158 Config: cfg, 3159 remotes: tgts, 3160 } 3161 diffCh := make(chan madmin.DiffInfo, 4000) 3162 go func() { 3163 defer xioutil.SafeClose(diffCh) 3164 for obj := range objInfoCh { 3165 if contextCanceled(ctx) { 3166 // Just consume input... 3167 continue 3168 } 3169 // Ignore object prefixes which are excluded 3170 // from versioning via the MinIO bucket versioning extension. 3171 if globalBucketVersioningSys.PrefixSuspended(bucket, obj.Name) { 3172 continue 3173 } 3174 roi := getHealReplicateObjectInfo(obj, rcfg) 3175 switch roi.ReplicationStatus { 3176 case replication.Completed, replication.Replica: 3177 if !opts.Verbose { 3178 continue 3179 } 3180 fallthrough 3181 default: 3182 // ignore pre-existing objects that don't satisfy replication rule(s) 3183 if roi.ReplicationStatus.Empty() && !roi.ExistingObjResync.mustResync() { 3184 continue 3185 } 3186 tgtsMap := make(map[string]madmin.TgtDiffInfo) 3187 for arn, st := range roi.TargetStatuses { 3188 if opts.ARN == "" || opts.ARN == arn { 3189 if !opts.Verbose && (st == replication.Completed || st == replication.Replica) { 3190 continue 3191 } 3192 tgtsMap[arn] = madmin.TgtDiffInfo{ 3193 ReplicationStatus: st.String(), 3194 } 3195 } 3196 } 3197 for arn, st := range roi.TargetPurgeStatuses { 3198 if opts.ARN == "" || opts.ARN == arn { 3199 if !opts.Verbose && st == Complete { 3200 continue 3201 } 3202 t, ok := tgtsMap[arn] 3203 if !ok { 3204 t = madmin.TgtDiffInfo{} 3205 } 3206 t.DeleteReplicationStatus = string(st) 3207 tgtsMap[arn] = t 3208 } 3209 } 3210 select { 3211 case diffCh <- madmin.DiffInfo{ 3212 Object: obj.Name, 3213 VersionID: obj.VersionID, 3214 LastModified: obj.ModTime, 3215 IsDeleteMarker: obj.DeleteMarker, 3216 ReplicationStatus: string(roi.ReplicationStatus), 3217 DeleteReplicationStatus: string(roi.VersionPurgeStatus), 3218 ReplicationTimestamp: roi.ReplicationTimestamp, 3219 Targets: tgtsMap, 3220 }: 3221 case <-ctx.Done(): 3222 continue 3223 } 3224 } 3225 } 3226 }() 3227 return diffCh, nil 3228 } 3229 3230 // QueueReplicationHeal is a wrapper for queueReplicationHeal 3231 func QueueReplicationHeal(ctx context.Context, bucket string, oi ObjectInfo, retryCount int) { 3232 // ignore modtime zero objects 3233 if oi.ModTime.IsZero() { 3234 return 3235 } 3236 rcfg, _ := getReplicationConfig(ctx, bucket) 3237 tgts, _ := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 3238 queueReplicationHeal(ctx, bucket, oi, replicationConfig{ 3239 Config: rcfg, 3240 remotes: tgts, 3241 }, retryCount) 3242 } 3243 3244 // queueReplicationHeal enqueues objects that failed replication OR eligible for resyncing through 3245 // an ongoing resync operation or via existing objects replication configuration setting. 3246 func queueReplicationHeal(ctx context.Context, bucket string, oi ObjectInfo, rcfg replicationConfig, retryCount int) (roi ReplicateObjectInfo) { 3247 // ignore modtime zero objects 3248 if oi.ModTime.IsZero() { 3249 return roi 3250 } 3251 3252 if isVeeamSOSAPIObject(oi.Name) { 3253 return roi 3254 } 3255 if rcfg.Config == nil || rcfg.remotes == nil { 3256 return roi 3257 } 3258 roi = getHealReplicateObjectInfo(oi, rcfg) 3259 roi.RetryCount = uint32(retryCount) 3260 if !roi.Dsc.ReplicateAny() { 3261 return 3262 } 3263 // early return if replication already done, otherwise we need to determine if this 3264 // version is an existing object that needs healing. 3265 if oi.ReplicationStatus == replication.Completed && oi.VersionPurgeStatus.Empty() && !roi.ExistingObjResync.mustResync() { 3266 return 3267 } 3268 3269 if roi.DeleteMarker || !roi.VersionPurgeStatus.Empty() { 3270 versionID := "" 3271 dmVersionID := "" 3272 if roi.VersionPurgeStatus.Empty() { 3273 dmVersionID = roi.VersionID 3274 } else { 3275 versionID = roi.VersionID 3276 } 3277 3278 dv := DeletedObjectReplicationInfo{ 3279 DeletedObject: DeletedObject{ 3280 ObjectName: roi.Name, 3281 DeleteMarkerVersionID: dmVersionID, 3282 VersionID: versionID, 3283 ReplicationState: roi.ReplicationState, 3284 DeleteMarkerMTime: DeleteMarkerMTime{roi.ModTime}, 3285 DeleteMarker: roi.DeleteMarker, 3286 }, 3287 Bucket: roi.Bucket, 3288 OpType: replication.HealReplicationType, 3289 EventType: ReplicateHealDelete, 3290 } 3291 // heal delete marker replication failure or versioned delete replication failure 3292 if roi.ReplicationStatus == replication.Pending || 3293 roi.ReplicationStatus == replication.Failed || 3294 roi.VersionPurgeStatus == Failed || roi.VersionPurgeStatus == Pending { 3295 globalReplicationPool.queueReplicaDeleteTask(dv) 3296 return 3297 } 3298 // if replication status is Complete on DeleteMarker and existing object resync required 3299 if roi.ExistingObjResync.mustResync() && (roi.ReplicationStatus == replication.Completed || roi.ReplicationStatus.Empty()) { 3300 queueReplicateDeletesWrapper(dv, roi.ExistingObjResync) 3301 return 3302 } 3303 return 3304 } 3305 if roi.ExistingObjResync.mustResync() { 3306 roi.OpType = replication.ExistingObjectReplicationType 3307 } 3308 switch roi.ReplicationStatus { 3309 case replication.Pending, replication.Failed: 3310 roi.EventType = ReplicateHeal 3311 globalReplicationPool.queueReplicaTask(roi) 3312 return 3313 } 3314 if roi.ExistingObjResync.mustResync() { 3315 roi.EventType = ReplicateExisting 3316 globalReplicationPool.queueReplicaTask(roi) 3317 } 3318 return 3319 } 3320 3321 const ( 3322 mrfSaveInterval = 5 * time.Minute 3323 mrfQueueInterval = mrfSaveInterval + time.Minute // A minute higher than save interval 3324 3325 mrfRetryLimit = 3 // max number of retries before letting scanner catch up on this object version 3326 mrfMaxEntries = 1000000 3327 ) 3328 3329 func (p *ReplicationPool) persistMRF() { 3330 if !p.initialized() { 3331 return 3332 } 3333 3334 entries := make(map[string]MRFReplicateEntry) 3335 mTimer := time.NewTimer(mrfSaveInterval) 3336 defer mTimer.Stop() 3337 3338 saveMRFToDisk := func() { 3339 if len(entries) == 0 { 3340 return 3341 } 3342 3343 // queue all entries for healing before overwriting the node mrf file 3344 if !contextCanceled(p.ctx) { 3345 p.queueMRFHeal() 3346 } 3347 3348 p.saveMRFEntries(p.ctx, entries) 3349 3350 entries = make(map[string]MRFReplicateEntry) 3351 } 3352 for { 3353 select { 3354 case <-mTimer.C: 3355 saveMRFToDisk() 3356 mTimer.Reset(mrfSaveInterval) 3357 case <-p.ctx.Done(): 3358 p.mrfStopCh <- struct{}{} 3359 xioutil.SafeClose(p.mrfSaveCh) 3360 // We try to save if possible, but we don't care beyond that. 3361 saveMRFToDisk() 3362 return 3363 case e, ok := <-p.mrfSaveCh: 3364 if !ok { 3365 return 3366 } 3367 entries[e.versionID] = e 3368 3369 if len(entries) >= mrfMaxEntries { 3370 saveMRFToDisk() 3371 } 3372 } 3373 } 3374 } 3375 3376 func (p *ReplicationPool) queueMRFSave(entry MRFReplicateEntry) { 3377 if !p.initialized() { 3378 return 3379 } 3380 if entry.RetryCount > mrfRetryLimit { // let scanner catch up if retry count exceeded 3381 atomic.AddUint64(&globalReplicationStats.mrfStats.TotalDroppedCount, 1) 3382 atomic.AddUint64(&globalReplicationStats.mrfStats.TotalDroppedBytes, uint64(entry.sz)) 3383 return 3384 } 3385 3386 select { 3387 case <-GlobalContext.Done(): 3388 return 3389 case <-p.mrfStopCh: 3390 return 3391 default: 3392 select { 3393 case p.mrfSaveCh <- entry: 3394 default: 3395 atomic.AddUint64(&globalReplicationStats.mrfStats.TotalDroppedCount, 1) 3396 atomic.AddUint64(&globalReplicationStats.mrfStats.TotalDroppedBytes, uint64(entry.sz)) 3397 } 3398 } 3399 } 3400 3401 func (p *ReplicationPool) persistToDrive(ctx context.Context, v MRFReplicateEntries) { 3402 newReader := func() io.ReadCloser { 3403 r, w := io.Pipe() 3404 go func() { 3405 // Initialize MRF meta header. 3406 var data [4]byte 3407 binary.LittleEndian.PutUint16(data[0:2], mrfMetaFormat) 3408 binary.LittleEndian.PutUint16(data[2:4], mrfMetaVersion) 3409 mw := msgp.NewWriter(w) 3410 n, err := mw.Write(data[:]) 3411 if err != nil { 3412 w.CloseWithError(err) 3413 return 3414 } 3415 if n != len(data) { 3416 w.CloseWithError(io.ErrShortWrite) 3417 return 3418 } 3419 err = v.EncodeMsg(mw) 3420 mw.Flush() 3421 w.CloseWithError(err) 3422 }() 3423 return r 3424 } 3425 3426 globalLocalDrivesMu.RLock() 3427 localDrives := cloneDrives(globalLocalDrives) 3428 globalLocalDrivesMu.RUnlock() 3429 3430 for _, localDrive := range localDrives { 3431 r := newReader() 3432 err := localDrive.CreateFile(ctx, "", minioMetaBucket, pathJoin(replicationMRFDir, globalLocalNodeNameHex+".bin"), -1, r) 3433 r.Close() 3434 if err == nil { 3435 break 3436 } 3437 } 3438 } 3439 3440 // save mrf entries to nodenamehex.bin 3441 func (p *ReplicationPool) saveMRFEntries(ctx context.Context, entries map[string]MRFReplicateEntry) { 3442 if !p.initialized() { 3443 return 3444 } 3445 atomic.StoreUint64(&globalReplicationStats.mrfStats.LastFailedCount, uint64(len(entries))) 3446 if len(entries) == 0 { 3447 return 3448 } 3449 3450 v := MRFReplicateEntries{ 3451 Entries: entries, 3452 Version: mrfMetaVersion, 3453 } 3454 3455 p.persistToDrive(ctx, v) 3456 } 3457 3458 // load mrf entries from disk 3459 func (p *ReplicationPool) loadMRF() (mrfRec MRFReplicateEntries, err error) { 3460 loadMRF := func(rc io.ReadCloser) (re MRFReplicateEntries, err error) { 3461 defer rc.Close() 3462 3463 if !p.initialized() { 3464 return re, nil 3465 } 3466 var data [4]byte 3467 n, err := rc.Read(data[:]) 3468 if err != nil { 3469 return re, err 3470 } 3471 if n != len(data) { 3472 return re, errors.New("replication mrf: no data") 3473 } 3474 // Read resync meta header 3475 switch binary.LittleEndian.Uint16(data[0:2]) { 3476 case mrfMetaFormat: 3477 default: 3478 return re, fmt.Errorf("replication mrf: unknown format: %d", binary.LittleEndian.Uint16(data[0:2])) 3479 } 3480 switch binary.LittleEndian.Uint16(data[2:4]) { 3481 case mrfMetaVersion: 3482 default: 3483 return re, fmt.Errorf("replication mrf: unknown version: %d", binary.LittleEndian.Uint16(data[2:4])) 3484 } 3485 3486 // OK, parse data. 3487 // ignore any parsing errors, we do not care this file is generated again anyways. 3488 re.DecodeMsg(msgp.NewReader(rc)) 3489 3490 return re, nil 3491 } 3492 3493 globalLocalDrivesMu.RLock() 3494 localDrives := cloneDrives(globalLocalDrives) 3495 globalLocalDrivesMu.RUnlock() 3496 3497 for _, localDrive := range localDrives { 3498 rc, err := localDrive.ReadFileStream(p.ctx, minioMetaBucket, pathJoin(replicationMRFDir, globalLocalNodeNameHex+".bin"), 0, -1) 3499 if err != nil { 3500 continue 3501 } 3502 3503 mrfRec, err = loadMRF(rc) 3504 if err != nil { 3505 continue 3506 } 3507 3508 // finally delete the file after processing mrf entries 3509 localDrive.Delete(p.ctx, minioMetaBucket, pathJoin(replicationMRFDir, globalLocalNodeNameHex+".bin"), DeleteOptions{}) 3510 break 3511 } 3512 3513 return mrfRec, nil 3514 } 3515 3516 func (p *ReplicationPool) processMRF() { 3517 if !p.initialized() { 3518 return 3519 } 3520 pTimer := time.NewTimer(mrfQueueInterval) 3521 defer pTimer.Stop() 3522 for { 3523 select { 3524 case <-pTimer.C: 3525 // skip healing if all targets are offline 3526 var offlineCnt int 3527 tgts := globalBucketTargetSys.ListTargets(p.ctx, "", "") 3528 for _, tgt := range tgts { 3529 if globalBucketTargetSys.isOffline(tgt.URL()) { 3530 offlineCnt++ 3531 } 3532 } 3533 if len(tgts) == offlineCnt { 3534 pTimer.Reset(mrfQueueInterval) 3535 continue 3536 } 3537 if err := p.queueMRFHeal(); err != nil && !osIsNotExist(err) { 3538 logger.LogIf(p.ctx, err) 3539 } 3540 pTimer.Reset(mrfQueueInterval) 3541 case <-p.ctx.Done(): 3542 return 3543 } 3544 } 3545 } 3546 3547 // process sends error logs to the heal channel for an attempt to heal replication. 3548 func (p *ReplicationPool) queueMRFHeal() error { 3549 p.mrfMU.Lock() 3550 defer p.mrfMU.Unlock() 3551 3552 if !p.initialized() { 3553 return errServerNotInitialized 3554 } 3555 3556 mrfRec, err := p.loadMRF() 3557 if err != nil { 3558 return err 3559 } 3560 3561 // queue replication heal in a goroutine to avoid holding up mrf save routine 3562 go func() { 3563 for vID, e := range mrfRec.Entries { 3564 ctx, cancel := context.WithTimeout(p.ctx, time.Second) // Do not waste more than a second on this. 3565 3566 oi, err := p.objLayer.GetObjectInfo(ctx, e.Bucket, e.Object, ObjectOptions{ 3567 VersionID: vID, 3568 }) 3569 cancel() 3570 if err != nil { 3571 continue 3572 } 3573 3574 QueueReplicationHeal(p.ctx, e.Bucket, oi, e.RetryCount) 3575 } 3576 }() 3577 3578 return nil 3579 } 3580 3581 func (p *ReplicationPool) initialized() bool { 3582 return !(p == nil || p.objLayer == nil) 3583 } 3584 3585 // getMRF returns MRF entries for this node. 3586 func (p *ReplicationPool) getMRF(ctx context.Context, bucket string) (ch <-chan madmin.ReplicationMRF, err error) { 3587 mrfRec, err := p.loadMRF() 3588 if err != nil { 3589 return nil, err 3590 } 3591 3592 mrfCh := make(chan madmin.ReplicationMRF, 100) 3593 go func() { 3594 defer xioutil.SafeClose(mrfCh) 3595 for vID, e := range mrfRec.Entries { 3596 if bucket != "" && e.Bucket != bucket { 3597 continue 3598 } 3599 select { 3600 case mrfCh <- madmin.ReplicationMRF{ 3601 NodeName: globalLocalNodeName, 3602 Object: e.Object, 3603 VersionID: vID, 3604 Bucket: e.Bucket, 3605 RetryCount: e.RetryCount, 3606 }: 3607 case <-ctx.Done(): 3608 return 3609 } 3610 } 3611 }() 3612 3613 return mrfCh, nil 3614 }