storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/bucket-replication.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/http"
    23  	"reflect"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	minio "github.com/minio/minio-go/v7"
    29  	miniogo "github.com/minio/minio-go/v7"
    30  	"github.com/minio/minio-go/v7/pkg/encrypt"
    31  	"github.com/minio/minio-go/v7/pkg/tags"
    32  
    33  	"storj.io/minio/cmd/crypto"
    34  	xhttp "storj.io/minio/cmd/http"
    35  	"storj.io/minio/cmd/logger"
    36  	"storj.io/minio/pkg/bucket/bandwidth"
    37  	"storj.io/minio/pkg/bucket/replication"
    38  	"storj.io/minio/pkg/event"
    39  	iampolicy "storj.io/minio/pkg/iam/policy"
    40  	"storj.io/minio/pkg/madmin"
    41  )
    42  
    43  // gets replication config associated to a given bucket name.
    44  func getReplicationConfig(ctx context.Context, bucketName string) (rc *replication.Config, err error) {
    45  	if GlobalIsGateway {
    46  		objAPI := newObjectLayerFn()
    47  		if objAPI == nil {
    48  			return nil, errServerNotInitialized
    49  		}
    50  
    51  		return nil, BucketReplicationConfigNotFound{Bucket: bucketName}
    52  	}
    53  
    54  	return globalBucketMetadataSys.GetReplicationConfig(ctx, bucketName)
    55  }
    56  
    57  // validateReplicationDestination returns error if replication destination bucket missing or not configured
    58  // It also returns true if replication destination is same as this server.
    59  func validateReplicationDestination(ctx context.Context, bucket string, rCfg *replication.Config) (bool, error) {
    60  	arn, err := madmin.ParseARN(rCfg.RoleArn)
    61  	if err != nil {
    62  		return false, BucketRemoteArnInvalid{}
    63  	}
    64  	if arn.Type != madmin.ReplicationService {
    65  		return false, BucketRemoteArnTypeInvalid{}
    66  	}
    67  	clnt := globalBucketTargetSys.GetRemoteTargetClient(ctx, rCfg.RoleArn)
    68  	if clnt == nil {
    69  		return false, BucketRemoteTargetNotFound{Bucket: bucket}
    70  	}
    71  	if found, _ := clnt.BucketExists(ctx, rCfg.GetDestination().Bucket); !found {
    72  		return false, BucketRemoteDestinationNotFound{Bucket: rCfg.GetDestination().Bucket}
    73  	}
    74  	if ret, err := globalBucketObjectLockSys.Get(bucket); err == nil {
    75  		if ret.LockEnabled {
    76  			lock, _, _, _, err := clnt.GetObjectLockConfig(ctx, rCfg.GetDestination().Bucket)
    77  			if err != nil || lock != "Enabled" {
    78  				return false, BucketReplicationDestinationMissingLock{Bucket: rCfg.GetDestination().Bucket}
    79  			}
    80  		}
    81  	}
    82  	// validate replication ARN against target endpoint
    83  	c, ok := globalBucketTargetSys.arnRemotesMap[rCfg.RoleArn]
    84  	if ok {
    85  		if c.EndpointURL().String() == clnt.EndpointURL().String() {
    86  			sameTarget, _ := isLocalHost(clnt.EndpointURL().Hostname(), clnt.EndpointURL().Port(), globalMinioPort)
    87  			return sameTarget, nil
    88  		}
    89  	}
    90  	return false, BucketRemoteTargetNotFound{Bucket: bucket}
    91  }
    92  
    93  func mustReplicateWeb(ctx context.Context, r *http.Request, bucket, object string, meta map[string]string, replStatus string, permErr APIErrorCode) (replicate bool, sync bool) {
    94  	if permErr != ErrNone {
    95  		return
    96  	}
    97  	return mustReplicater(ctx, bucket, object, meta, replStatus)
    98  }
    99  
   100  // mustReplicate returns 2 booleans - true if object meets replication criteria and true if replication is to be done in
   101  // a synchronous manner.
   102  func mustReplicate(ctx context.Context, r *http.Request, bucket, object string, meta map[string]string, replStatus string) (replicate bool, sync bool) {
   103  	if s3Err := isPutActionAllowed(ctx, getRequestAuthType(r), bucket, "", r, iampolicy.GetReplicationConfigurationAction); s3Err != ErrNone {
   104  		return
   105  	}
   106  	return mustReplicater(ctx, bucket, object, meta, replStatus)
   107  }
   108  
   109  // mustReplicater returns 2 booleans - true if object meets replication criteria and true if replication is to be done in
   110  // a synchronous manner.
   111  func mustReplicater(ctx context.Context, bucket, object string, meta map[string]string, replStatus string) (replicate bool, sync bool) {
   112  	if GlobalIsGateway {
   113  		return replicate, sync
   114  	}
   115  	if rs, ok := meta[xhttp.AmzBucketReplicationStatus]; ok {
   116  		replStatus = rs
   117  	}
   118  	if replication.StatusType(replStatus) == replication.Replica {
   119  		return replicate, sync
   120  	}
   121  	cfg, err := getReplicationConfig(ctx, bucket)
   122  	if err != nil {
   123  		return replicate, sync
   124  	}
   125  	opts := replication.ObjectOpts{
   126  		Name: object,
   127  		SSEC: crypto.SSEC.IsEncrypted(meta),
   128  	}
   129  	tagStr, ok := meta[xhttp.AmzObjectTagging]
   130  	if ok {
   131  		opts.UserTags = tagStr
   132  	}
   133  	tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, cfg.RoleArn)
   134  	// the target online status should not be used here while deciding
   135  	// whether to replicate as the target could be temporarily down
   136  	if tgt != nil {
   137  		return cfg.Replicate(opts), tgt.replicateSync
   138  	}
   139  	return cfg.Replicate(opts), false
   140  }
   141  
   142  // Standard headers that needs to be extracted from User metadata.
   143  var standardHeaders = []string{
   144  	xhttp.ContentType,
   145  	xhttp.CacheControl,
   146  	xhttp.ContentEncoding,
   147  	xhttp.ContentLanguage,
   148  	xhttp.ContentDisposition,
   149  	xhttp.AmzStorageClass,
   150  	xhttp.AmzObjectTagging,
   151  	xhttp.AmzBucketReplicationStatus,
   152  	xhttp.AmzObjectLockMode,
   153  	xhttp.AmzObjectLockRetainUntilDate,
   154  	xhttp.AmzObjectLockLegalHold,
   155  	xhttp.AmzTagCount,
   156  	xhttp.AmzServerSideEncryption,
   157  }
   158  
   159  // returns true if any of the objects being deleted qualifies for replication.
   160  func hasReplicationRules(ctx context.Context, bucket string, objects []ObjectToDelete) bool {
   161  	c, err := getReplicationConfig(ctx, bucket)
   162  	if err != nil || c == nil {
   163  		return false
   164  	}
   165  	for _, obj := range objects {
   166  		if c.HasActiveRules(obj.ObjectName, true) {
   167  			return true
   168  		}
   169  	}
   170  	return false
   171  }
   172  
   173  // isStandardHeader returns true if header is a supported header and not a custom header
   174  func isStandardHeader(matchHeaderKey string) bool {
   175  	return equals(matchHeaderKey, standardHeaders...)
   176  }
   177  
   178  // returns whether object version is a deletemarker and if object qualifies for replication
   179  func checkReplicateDelete(ctx context.Context, bucket string, dobj ObjectToDelete, oi ObjectInfo, gerr error) (replicate, sync bool) {
   180  	rcfg, err := getReplicationConfig(ctx, bucket)
   181  	if err != nil || rcfg == nil {
   182  		return false, sync
   183  	}
   184  	opts := replication.ObjectOpts{
   185  		Name:         dobj.ObjectName,
   186  		SSEC:         crypto.SSEC.IsEncrypted(oi.UserDefined),
   187  		UserTags:     oi.UserTags,
   188  		DeleteMarker: oi.DeleteMarker,
   189  		VersionID:    dobj.VersionID,
   190  		OpType:       replication.DeleteReplicationType,
   191  	}
   192  	replicate = rcfg.Replicate(opts)
   193  	// when incoming delete is removal of a delete marker( a.k.a versioned delete),
   194  	// GetObjectInfo returns extra information even though it returns errFileNotFound
   195  	if gerr != nil {
   196  		validReplStatus := false
   197  		switch oi.ReplicationStatus {
   198  		case replication.Pending, replication.Completed, replication.Failed:
   199  			validReplStatus = true
   200  		}
   201  		if oi.DeleteMarker && (validReplStatus || replicate) {
   202  			return true, sync
   203  		}
   204  		// can be the case that other cluster is down and duplicate `mc rm --vid`
   205  		// is issued - this still needs to be replicated back to the other target
   206  		return oi.VersionPurgeStatus == Pending || oi.VersionPurgeStatus == Failed, sync
   207  	}
   208  	tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, rcfg.RoleArn)
   209  	// the target online status should not be used here while deciding
   210  	// whether to replicate deletes as the target could be temporarily down
   211  	if tgt == nil {
   212  		return false, false
   213  	}
   214  	return replicate, tgt.replicateSync
   215  }
   216  
   217  // replicate deletes to the designated replication target if replication configuration
   218  // has delete marker replication or delete replication (MinIO extension to allow deletes where version id
   219  // is specified) enabled.
   220  // Similar to bucket replication for PUT operation, soft delete (a.k.a setting delete marker) and
   221  // permanent deletes (by specifying a version ID in the delete operation) have three states "Pending", "Complete"
   222  // and "Failed" to mark the status of the replication of "DELETE" operation. All failed operations can
   223  // then be retried by healing. In the case of permanent deletes, until the replication is completed on the
   224  // target cluster, the object version is marked deleted on the source and hidden from listing. It is permanently
   225  // deleted from the source when the VersionPurgeStatus changes to "Complete", i.e after replication succeeds
   226  // on target.
   227  func replicateDelete(ctx context.Context, dobj DeletedObjectVersionInfo, objectAPI ObjectLayer) {
   228  	bucket := dobj.Bucket
   229  	versionID := dobj.DeleteMarkerVersionID
   230  	if versionID == "" {
   231  		versionID = dobj.VersionID
   232  	}
   233  
   234  	rcfg, err := getReplicationConfig(ctx, bucket)
   235  	if err != nil || rcfg == nil {
   236  		logger.LogIf(ctx, err)
   237  		sendEvent(eventArgs{
   238  			BucketName: bucket,
   239  			Object: ObjectInfo{
   240  				Bucket:       bucket,
   241  				Name:         dobj.ObjectName,
   242  				VersionID:    versionID,
   243  				DeleteMarker: dobj.DeleteMarker,
   244  			},
   245  			Host:      "Internal: [Replication]",
   246  			EventName: event.ObjectReplicationNotTracked,
   247  		})
   248  		return
   249  	}
   250  
   251  	tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, rcfg.RoleArn)
   252  	if tgt == nil {
   253  		logger.LogIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, rcfg.RoleArn))
   254  		sendEvent(eventArgs{
   255  			BucketName: bucket,
   256  			Object: ObjectInfo{
   257  				Bucket:       bucket,
   258  				Name:         dobj.ObjectName,
   259  				VersionID:    versionID,
   260  				DeleteMarker: dobj.DeleteMarker,
   261  			},
   262  			Host:      "Internal: [Replication]",
   263  			EventName: event.ObjectReplicationNotTracked,
   264  		})
   265  		return
   266  	}
   267  
   268  	rmErr := tgt.RemoveObject(ctx, rcfg.GetDestination().Bucket, dobj.ObjectName, miniogo.RemoveObjectOptions{
   269  		VersionID: versionID,
   270  		Internal: miniogo.AdvancedRemoveOptions{
   271  			ReplicationDeleteMarker: dobj.DeleteMarkerVersionID != "",
   272  			ReplicationMTime:        dobj.DeleteMarkerMTime.Time,
   273  			ReplicationStatus:       miniogo.ReplicationStatusReplica,
   274  			ReplicationRequest:      true, // always set this to distinguish between `mc mirror` replication and serverside
   275  		},
   276  	})
   277  
   278  	replicationStatus := dobj.DeleteMarkerReplicationStatus
   279  	versionPurgeStatus := dobj.VersionPurgeStatus
   280  
   281  	if rmErr != nil {
   282  		if dobj.VersionID == "" {
   283  			replicationStatus = string(replication.Failed)
   284  		} else {
   285  			versionPurgeStatus = Failed
   286  		}
   287  		logger.LogIf(ctx, fmt.Errorf("Unable to replicate delete marker to %s/%s(%s): %s", rcfg.GetDestination().Bucket, dobj.ObjectName, versionID, rmErr))
   288  	} else {
   289  		if dobj.VersionID == "" {
   290  			replicationStatus = string(replication.Completed)
   291  		} else {
   292  			versionPurgeStatus = Complete
   293  		}
   294  	}
   295  	prevStatus := dobj.DeleteMarkerReplicationStatus
   296  	currStatus := replicationStatus
   297  	if dobj.VersionID != "" {
   298  		prevStatus = string(dobj.VersionPurgeStatus)
   299  		currStatus = string(versionPurgeStatus)
   300  	}
   301  	// to decrement pending count later.
   302  	globalReplicationStats.Update(dobj.Bucket, 0, replication.StatusType(currStatus), replication.StatusType(prevStatus), replication.DeleteReplicationType)
   303  
   304  	var eventName = event.ObjectReplicationComplete
   305  	if replicationStatus == string(replication.Failed) || versionPurgeStatus == Failed {
   306  		eventName = event.ObjectReplicationFailed
   307  	}
   308  
   309  	// Update metadata on the delete marker or purge permanent delete if replication success.
   310  	dobjInfo, err := objectAPI.DeleteObject(ctx, bucket, dobj.ObjectName, ObjectOptions{
   311  		VersionID:                     versionID,
   312  		DeleteMarkerReplicationStatus: replicationStatus,
   313  		VersionPurgeStatus:            versionPurgeStatus,
   314  		Versioned:                     globalBucketVersioningSys.Enabled(bucket),
   315  		VersionSuspended:              globalBucketVersioningSys.Suspended(bucket),
   316  	})
   317  	if err != nil && !isErrVersionNotFound(err) { // VersionNotFound would be reported by pool that object version is missing on.
   318  		logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %s", bucket, dobj.ObjectName, versionID, err))
   319  		sendEvent(eventArgs{
   320  			BucketName: bucket,
   321  			Object: ObjectInfo{
   322  				Bucket:       bucket,
   323  				Name:         dobj.ObjectName,
   324  				VersionID:    versionID,
   325  				DeleteMarker: dobj.DeleteMarker,
   326  			},
   327  			Host:      "Internal: [Replication]",
   328  			EventName: eventName,
   329  		})
   330  	} else {
   331  		sendEvent(eventArgs{
   332  			BucketName: bucket,
   333  			Object:     dobjInfo,
   334  			Host:       "Internal: [Replication]",
   335  			EventName:  eventName,
   336  		})
   337  	}
   338  }
   339  
   340  func getCopyObjMetadata(oi ObjectInfo, dest replication.Destination) map[string]string {
   341  	meta := make(map[string]string, len(oi.UserDefined))
   342  	for k, v := range oi.UserDefined {
   343  		if strings.HasPrefix(strings.ToLower(k), ReservedMetadataPrefixLower) {
   344  			continue
   345  		}
   346  
   347  		if equals(k, xhttp.AmzBucketReplicationStatus) {
   348  			continue
   349  		}
   350  
   351  		// https://github.com/google/security-research/security/advisories/GHSA-76wf-9vgp-pj7w
   352  		if equals(k, xhttp.AmzMetaUnencryptedContentLength, xhttp.AmzMetaUnencryptedContentMD5) {
   353  			continue
   354  		}
   355  
   356  		meta[k] = v
   357  	}
   358  
   359  	if oi.ContentEncoding != "" {
   360  		meta[xhttp.ContentEncoding] = oi.ContentEncoding
   361  	}
   362  
   363  	if oi.ContentType != "" {
   364  		meta[xhttp.ContentType] = oi.ContentType
   365  	}
   366  
   367  	if oi.UserTags != "" {
   368  		meta[xhttp.AmzObjectTagging] = oi.UserTags
   369  		meta[xhttp.AmzTagDirective] = "REPLACE"
   370  	}
   371  
   372  	sc := dest.StorageClass
   373  	if sc == "" {
   374  		sc = oi.StorageClass
   375  	}
   376  	if sc != "" {
   377  		meta[xhttp.AmzStorageClass] = sc
   378  	}
   379  	meta[xhttp.MinIOSourceETag] = oi.ETag
   380  	meta[xhttp.MinIOSourceMTime] = oi.ModTime.Format(time.RFC3339Nano)
   381  	meta[xhttp.AmzBucketReplicationStatus] = replication.Replica.String()
   382  	return meta
   383  }
   384  
   385  type caseInsensitiveMap map[string]string
   386  
   387  // Lookup map entry case insensitively.
   388  func (m caseInsensitiveMap) Lookup(key string) (string, bool) {
   389  	if len(m) == 0 {
   390  		return "", false
   391  	}
   392  	for _, k := range []string{
   393  		key,
   394  		strings.ToLower(key),
   395  		http.CanonicalHeaderKey(key),
   396  	} {
   397  		v, ok := m[k]
   398  		if ok {
   399  			return v, ok
   400  		}
   401  	}
   402  	return "", false
   403  }
   404  
   405  func putReplicationOpts(ctx context.Context, dest replication.Destination, objInfo ObjectInfo) (putOpts miniogo.PutObjectOptions, err error) {
   406  	meta := make(map[string]string)
   407  	for k, v := range objInfo.UserDefined {
   408  		if strings.HasPrefix(strings.ToLower(k), ReservedMetadataPrefixLower) {
   409  			continue
   410  		}
   411  		if isStandardHeader(k) {
   412  			continue
   413  		}
   414  		meta[k] = v
   415  	}
   416  
   417  	sc := dest.StorageClass
   418  	if sc == "" {
   419  		sc = objInfo.StorageClass
   420  	}
   421  	putOpts = miniogo.PutObjectOptions{
   422  		UserMetadata:    meta,
   423  		ContentType:     objInfo.ContentType,
   424  		ContentEncoding: objInfo.ContentEncoding,
   425  		StorageClass:    sc,
   426  		Internal: miniogo.AdvancedPutOptions{
   427  			SourceVersionID:    objInfo.VersionID,
   428  			ReplicationStatus:  miniogo.ReplicationStatusReplica,
   429  			SourceMTime:        objInfo.ModTime,
   430  			SourceETag:         objInfo.ETag,
   431  			ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside
   432  		},
   433  	}
   434  	if objInfo.UserTags != "" {
   435  		tag, _ := tags.ParseObjectTags(objInfo.UserTags)
   436  		if tag != nil {
   437  			putOpts.UserTags = tag.ToMap()
   438  		}
   439  	}
   440  
   441  	lkMap := caseInsensitiveMap(objInfo.UserDefined)
   442  	if lang, ok := lkMap.Lookup(xhttp.ContentLanguage); ok {
   443  		putOpts.ContentLanguage = lang
   444  	}
   445  	if disp, ok := lkMap.Lookup(xhttp.ContentDisposition); ok {
   446  		putOpts.ContentDisposition = disp
   447  	}
   448  	if cc, ok := lkMap.Lookup(xhttp.CacheControl); ok {
   449  		putOpts.CacheControl = cc
   450  	}
   451  	if mode, ok := lkMap.Lookup(xhttp.AmzObjectLockMode); ok {
   452  		rmode := miniogo.RetentionMode(mode)
   453  		putOpts.Mode = rmode
   454  	}
   455  	if retainDateStr, ok := lkMap.Lookup(xhttp.AmzObjectLockRetainUntilDate); ok {
   456  		rdate, err := time.Parse(time.RFC3339, retainDateStr)
   457  		if err != nil {
   458  			return putOpts, err
   459  		}
   460  		putOpts.RetainUntilDate = rdate
   461  	}
   462  	if lhold, ok := lkMap.Lookup(xhttp.AmzObjectLockLegalHold); ok {
   463  		putOpts.LegalHold = miniogo.LegalHoldStatus(lhold)
   464  	}
   465  	if crypto.S3.IsEncrypted(objInfo.UserDefined) {
   466  		putOpts.ServerSideEncryption = encrypt.NewSSE()
   467  	}
   468  	return
   469  }
   470  
   471  type replicationAction string
   472  
   473  const (
   474  	replicateMetadata replicationAction = "metadata"
   475  	replicateNone     replicationAction = "none"
   476  	replicateAll      replicationAction = "all"
   477  )
   478  
   479  // matches k1 with all keys, returns 'true' if one of them matches
   480  func equals(k1 string, keys ...string) bool {
   481  	for _, k2 := range keys {
   482  		if strings.ToLower(k1) == strings.ToLower(k2) {
   483  			return true
   484  		}
   485  	}
   486  	return false
   487  }
   488  
   489  // returns replicationAction by comparing metadata between source and target
   490  func getReplicationAction(oi1 ObjectInfo, oi2 minio.ObjectInfo) replicationAction {
   491  	// needs full replication
   492  	if oi1.ETag != oi2.ETag ||
   493  		oi1.VersionID != oi2.VersionID ||
   494  		oi1.Size != oi2.Size ||
   495  		oi1.DeleteMarker != oi2.IsDeleteMarker ||
   496  		oi1.ModTime.Unix() != oi2.LastModified.Unix() {
   497  		return replicateAll
   498  	}
   499  
   500  	if oi1.ContentType != oi2.ContentType {
   501  		return replicateMetadata
   502  	}
   503  
   504  	if oi1.ContentEncoding != "" {
   505  		enc, ok := oi2.Metadata[xhttp.ContentEncoding]
   506  		if !ok {
   507  			enc, ok = oi2.Metadata[strings.ToLower(xhttp.ContentEncoding)]
   508  			if !ok {
   509  				return replicateMetadata
   510  			}
   511  		}
   512  		if strings.Join(enc, ",") != oi1.ContentEncoding {
   513  			return replicateMetadata
   514  		}
   515  	}
   516  
   517  	t, _ := tags.ParseObjectTags(oi1.UserTags)
   518  	if !reflect.DeepEqual(oi2.UserTags, t.ToMap()) {
   519  		return replicateMetadata
   520  	}
   521  
   522  	// Compare only necessary headers
   523  	compareKeys := []string{
   524  		"Expires",
   525  		"Cache-Control",
   526  		"Content-Language",
   527  		"Content-Disposition",
   528  		"X-Amz-Object-Lock-Mode",
   529  		"X-Amz-Object-Lock-Retain-Until-Date",
   530  		"X-Amz-Object-Lock-Legal-Hold",
   531  		"X-Amz-Website-Redirect-Location",
   532  		"X-Amz-Meta-",
   533  	}
   534  
   535  	// compare metadata on both maps to see if meta is identical
   536  	compareMeta1 := make(map[string]string)
   537  	for k, v := range oi1.UserDefined {
   538  		var found bool
   539  		for _, prefix := range compareKeys {
   540  			if !strings.HasPrefix(strings.ToLower(k), strings.ToLower(prefix)) {
   541  				continue
   542  			}
   543  			found = true
   544  			break
   545  		}
   546  		if found {
   547  			compareMeta1[strings.ToLower(k)] = v
   548  		}
   549  	}
   550  
   551  	compareMeta2 := make(map[string]string)
   552  	for k, v := range oi2.Metadata {
   553  		var found bool
   554  		for _, prefix := range compareKeys {
   555  			if !strings.HasPrefix(strings.ToLower(k), strings.ToLower(prefix)) {
   556  				continue
   557  			}
   558  			found = true
   559  			break
   560  		}
   561  		if found {
   562  			compareMeta2[strings.ToLower(k)] = strings.Join(v, ",")
   563  		}
   564  	}
   565  
   566  	if !reflect.DeepEqual(compareMeta1, compareMeta2) {
   567  		return replicateMetadata
   568  	}
   569  
   570  	return replicateNone
   571  }
   572  
   573  // replicateObject replicates the specified version of the object to destination bucket
   574  // The source object is then updated to reflect the replication status.
   575  func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI ObjectLayer) {
   576  	objInfo := ri.ObjectInfo
   577  	bucket := objInfo.Bucket
   578  	object := objInfo.Name
   579  
   580  	cfg, err := getReplicationConfig(ctx, bucket)
   581  	if err != nil {
   582  		logger.LogIf(ctx, err)
   583  		sendEvent(eventArgs{
   584  			EventName:  event.ObjectReplicationNotTracked,
   585  			BucketName: bucket,
   586  			Object:     objInfo,
   587  			Host:       "Internal: [Replication]",
   588  		})
   589  		return
   590  	}
   591  	tgt := globalBucketTargetSys.GetRemoteTargetClient(ctx, cfg.RoleArn)
   592  	if tgt == nil {
   593  		logger.LogIf(ctx, fmt.Errorf("failed to get target for bucket:%s arn:%s", bucket, cfg.RoleArn))
   594  		sendEvent(eventArgs{
   595  			EventName:  event.ObjectReplicationNotTracked,
   596  			BucketName: bucket,
   597  			Object:     objInfo,
   598  			Host:       "Internal: [Replication]",
   599  		})
   600  		return
   601  	}
   602  	gr, err := objectAPI.GetObjectNInfo(ctx, bucket, object, nil, http.Header{}, writeLock, ObjectOptions{
   603  		VersionID: objInfo.VersionID,
   604  	})
   605  	if err != nil {
   606  		sendEvent(eventArgs{
   607  			EventName:  event.ObjectReplicationNotTracked,
   608  			BucketName: bucket,
   609  			Object:     objInfo,
   610  			Host:       "Internal: [Replication]",
   611  		})
   612  		logger.LogIf(ctx, fmt.Errorf("Unable to update replicate for %s/%s(%s): %w", bucket, object, objInfo.VersionID, err))
   613  		return
   614  	}
   615  	defer gr.Close() // hold write lock for entire transaction
   616  
   617  	objInfo = gr.ObjInfo
   618  	size, err := objInfo.GetActualSize()
   619  	if err != nil {
   620  		logger.LogIf(ctx, err)
   621  		sendEvent(eventArgs{
   622  			EventName:  event.ObjectReplicationNotTracked,
   623  			BucketName: bucket,
   624  			Object:     objInfo,
   625  			Host:       "Internal: [Replication]",
   626  		})
   627  		return
   628  	}
   629  
   630  	dest := cfg.GetDestination()
   631  	if dest.Bucket == "" {
   632  		logger.LogIf(ctx, fmt.Errorf("Unable to replicate object %s(%s), bucket is empty", objInfo.Name, objInfo.VersionID))
   633  		sendEvent(eventArgs{
   634  			EventName:  event.ObjectReplicationNotTracked,
   635  			BucketName: bucket,
   636  			Object:     objInfo,
   637  			Host:       "Internal: [Replication]",
   638  		})
   639  		return
   640  	}
   641  
   642  	rtype := replicateAll
   643  	oi, err := tgt.StatObject(ctx, dest.Bucket, object, miniogo.StatObjectOptions{
   644  		VersionID: objInfo.VersionID,
   645  		Internal: miniogo.AdvancedGetOptions{
   646  			ReplicationProxyRequest: "false",
   647  		}})
   648  	if err == nil {
   649  		rtype = getReplicationAction(objInfo, oi)
   650  		if rtype == replicateNone {
   651  			// object with same VersionID already exists, replication kicked off by
   652  			// PutObject might have completed
   653  			return
   654  		}
   655  	}
   656  	replicationStatus := replication.Completed
   657  	// use core client to avoid doing multipart on PUT
   658  	c := &miniogo.Core{Client: tgt.Client}
   659  	if rtype != replicateAll {
   660  		// replicate metadata for object tagging/copy with metadata replacement
   661  		srcOpts := miniogo.CopySrcOptions{
   662  			Bucket:    dest.Bucket,
   663  			Object:    object,
   664  			VersionID: objInfo.VersionID,
   665  		}
   666  		dstOpts := miniogo.PutObjectOptions{
   667  			Internal: miniogo.AdvancedPutOptions{
   668  				SourceVersionID:    objInfo.VersionID,
   669  				ReplicationRequest: true, // always set this to distinguish between `mc mirror` replication and serverside
   670  			}}
   671  		if _, err = c.CopyObject(ctx, dest.Bucket, object, dest.Bucket, object, getCopyObjMetadata(objInfo, dest), srcOpts, dstOpts); err != nil {
   672  			replicationStatus = replication.Failed
   673  			logger.LogIf(ctx, fmt.Errorf("Unable to replicate metadata for object %s/%s(%s): %s", bucket, objInfo.Name, objInfo.VersionID, err))
   674  		}
   675  	} else {
   676  		target, err := globalBucketMetadataSys.GetBucketTarget(bucket, cfg.RoleArn)
   677  		if err != nil {
   678  			logger.LogIf(ctx, fmt.Errorf("failed to get target for replication bucket:%s cfg:%s err:%s", bucket, cfg.RoleArn, err))
   679  			sendEvent(eventArgs{
   680  				EventName:  event.ObjectReplicationNotTracked,
   681  				BucketName: bucket,
   682  				Object:     objInfo,
   683  				Host:       "Internal: [Replication]",
   684  			})
   685  			return
   686  		}
   687  
   688  		putOpts, err := putReplicationOpts(ctx, dest, objInfo)
   689  		if err != nil {
   690  			logger.LogIf(ctx, fmt.Errorf("failed to get target for replication bucket:%s cfg:%s err:%w", bucket, cfg.RoleArn, err))
   691  			sendEvent(eventArgs{
   692  				EventName:  event.ObjectReplicationNotTracked,
   693  				BucketName: bucket,
   694  				Object:     objInfo,
   695  				Host:       "Internal: [Replication]",
   696  			})
   697  			return
   698  		}
   699  
   700  		// Setup bandwidth throttling
   701  		peers, _ := globalEndpoints.peers()
   702  		totalNodesCount := len(peers)
   703  		if totalNodesCount == 0 {
   704  			totalNodesCount = 1 // For standalone erasure coding
   705  		}
   706  
   707  		var headerSize int
   708  		for k, v := range putOpts.Header() {
   709  			headerSize += len(k) + len(v)
   710  		}
   711  
   712  		opts := &bandwidth.MonitorReaderOptions{
   713  			Bucket:               objInfo.Bucket,
   714  			Object:               objInfo.Name,
   715  			HeaderSize:           headerSize,
   716  			BandwidthBytesPerSec: target.BandwidthLimit / int64(totalNodesCount),
   717  			ClusterBandwidth:     target.BandwidthLimit,
   718  		}
   719  
   720  		r := bandwidth.NewMonitoredReader(ctx, globalBucketMonitor, gr, opts)
   721  		if _, err = c.PutObject(ctx, dest.Bucket, object, r, size, "", "", putOpts); err != nil {
   722  			replicationStatus = replication.Failed
   723  			logger.LogIf(ctx, fmt.Errorf("Unable to replicate for object %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err))
   724  		}
   725  	}
   726  
   727  	prevReplStatus := objInfo.ReplicationStatus
   728  	objInfo.UserDefined[xhttp.AmzBucketReplicationStatus] = replicationStatus.String()
   729  	if objInfo.UserTags != "" {
   730  		objInfo.UserDefined[xhttp.AmzObjectTagging] = objInfo.UserTags
   731  	}
   732  
   733  	// FIXME: add support for missing replication events
   734  	// - event.ObjectReplicationMissedThreshold
   735  	// - event.ObjectReplicationReplicatedAfterThreshold
   736  	var eventName = event.ObjectReplicationComplete
   737  	if replicationStatus == replication.Failed {
   738  		eventName = event.ObjectReplicationFailed
   739  	}
   740  
   741  	z, ok := objectAPI.(*erasureServerPools)
   742  	if !ok {
   743  		return
   744  	}
   745  	// Leave metadata in `PENDING` state if inline replication fails to save iops
   746  	if ri.OpType == replication.HealReplicationType || replicationStatus == replication.Completed {
   747  		// This lower level implementation is necessary to avoid write locks from CopyObject.
   748  		poolIdx, err := z.getPoolIdx(ctx, bucket, object, objInfo.Size)
   749  		if err != nil {
   750  			logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err))
   751  		} else {
   752  			fi := FileInfo{}
   753  			fi.VersionID = objInfo.VersionID
   754  			fi.Metadata = make(map[string]string, len(objInfo.UserDefined))
   755  			for k, v := range objInfo.UserDefined {
   756  				fi.Metadata[k] = v
   757  			}
   758  			if err = z.serverPools[poolIdx].getHashedSet(object).updateObjectMeta(ctx, bucket, object, fi); err != nil {
   759  				logger.LogIf(ctx, fmt.Errorf("Unable to update replication metadata for %s/%s(%s): %w", bucket, objInfo.Name, objInfo.VersionID, err))
   760  			}
   761  		}
   762  		opType := replication.MetadataReplicationType
   763  		if rtype == replicateAll {
   764  			opType = replication.ObjectReplicationType
   765  		}
   766  		globalReplicationStats.Update(bucket, size, replicationStatus, prevReplStatus, opType)
   767  		sendEvent(eventArgs{
   768  			EventName:  eventName,
   769  			BucketName: bucket,
   770  			Object:     objInfo,
   771  			Host:       "Internal: [Replication]",
   772  		})
   773  	}
   774  	// re-queue failures once more - keep a retry count to avoid flooding the queue if
   775  	// the target site is down. Leave it to scanner to catch up instead.
   776  	if replicationStatus == replication.Failed && ri.RetryCount < 1 {
   777  		ri.OpType = replication.HealReplicationType
   778  		ri.RetryCount++
   779  		globalReplicationPool.queueReplicaTask(ctx, ri)
   780  	}
   781  }
   782  
   783  // filterReplicationStatusMetadata filters replication status metadata for COPY
   784  func filterReplicationStatusMetadata(metadata map[string]string) map[string]string {
   785  	// Copy on write
   786  	dst := metadata
   787  	var copied bool
   788  	delKey := func(key string) {
   789  		if _, ok := metadata[key]; !ok {
   790  			return
   791  		}
   792  		if !copied {
   793  			dst = make(map[string]string, len(metadata))
   794  			for k, v := range metadata {
   795  				dst[k] = v
   796  			}
   797  			copied = true
   798  		}
   799  		delete(dst, key)
   800  	}
   801  
   802  	delKey(xhttp.AmzBucketReplicationStatus)
   803  	return dst
   804  }
   805  
   806  // DeletedObjectVersionInfo has info on deleted object
   807  type DeletedObjectVersionInfo struct {
   808  	DeletedObject
   809  	Bucket string
   810  }
   811  
   812  var (
   813  	globalReplicationPool  *ReplicationPool
   814  	globalReplicationStats *ReplicationStats
   815  )
   816  
   817  // ReplicationPool describes replication pool
   818  type ReplicationPool struct {
   819  	once               sync.Once
   820  	mu                 sync.Mutex
   821  	size               int
   822  	replicaCh          chan ReplicateObjectInfo
   823  	replicaDeleteCh    chan DeletedObjectVersionInfo
   824  	mrfReplicaCh       chan ReplicateObjectInfo
   825  	mrfReplicaDeleteCh chan DeletedObjectVersionInfo
   826  	killCh             chan struct{}
   827  	wg                 sync.WaitGroup
   828  	ctx                context.Context
   829  	objLayer           ObjectLayer
   830  }
   831  
   832  // NewReplicationPool creates a pool of replication workers of specified size
   833  func NewReplicationPool(ctx context.Context, o ObjectLayer, sz int) *ReplicationPool {
   834  	pool := &ReplicationPool{
   835  		replicaCh:          make(chan ReplicateObjectInfo, 1000),
   836  		replicaDeleteCh:    make(chan DeletedObjectVersionInfo, 1000),
   837  		mrfReplicaCh:       make(chan ReplicateObjectInfo, 100000),
   838  		mrfReplicaDeleteCh: make(chan DeletedObjectVersionInfo, 100000),
   839  		ctx:                ctx,
   840  		objLayer:           o,
   841  	}
   842  	pool.Resize(sz)
   843  	// add long running worker for handling most recent failures/pending replications
   844  	go pool.AddMRFWorker()
   845  	return pool
   846  }
   847  
   848  // AddMRFWorker adds a pending/failed replication worker to handle requests that could not be queued
   849  // to the other workers
   850  func (p *ReplicationPool) AddMRFWorker() {
   851  	for {
   852  		select {
   853  		case <-p.ctx.Done():
   854  			return
   855  		case oi, ok := <-p.mrfReplicaCh:
   856  			if !ok {
   857  				return
   858  			}
   859  			replicateObject(p.ctx, oi, p.objLayer)
   860  		case doi, ok := <-p.mrfReplicaDeleteCh:
   861  			if !ok {
   862  				return
   863  			}
   864  			replicateDelete(p.ctx, doi, p.objLayer)
   865  		}
   866  	}
   867  }
   868  
   869  // AddWorker adds a replication worker to the pool
   870  func (p *ReplicationPool) AddWorker() {
   871  	defer p.wg.Done()
   872  	for {
   873  		select {
   874  		case <-p.ctx.Done():
   875  			return
   876  		case oi, ok := <-p.replicaCh:
   877  			if !ok {
   878  				return
   879  			}
   880  			replicateObject(p.ctx, oi, p.objLayer)
   881  		case doi, ok := <-p.replicaDeleteCh:
   882  			if !ok {
   883  				return
   884  			}
   885  			replicateDelete(p.ctx, doi, p.objLayer)
   886  		case <-p.killCh:
   887  			return
   888  		}
   889  	}
   890  
   891  }
   892  
   893  //Resize replication pool to new size
   894  func (p *ReplicationPool) Resize(n int) {
   895  	p.mu.Lock()
   896  	defer p.mu.Unlock()
   897  
   898  	for p.size < n {
   899  		p.size++
   900  		p.wg.Add(1)
   901  		go p.AddWorker()
   902  	}
   903  	for p.size > n {
   904  		p.size--
   905  		go func() { p.killCh <- struct{}{} }()
   906  	}
   907  }
   908  
   909  func (p *ReplicationPool) queueReplicaTask(ctx context.Context, ri ReplicateObjectInfo) {
   910  	if p == nil {
   911  		return
   912  	}
   913  	select {
   914  	case <-ctx.Done():
   915  		p.once.Do(func() {
   916  			close(p.replicaCh)
   917  			close(p.mrfReplicaCh)
   918  		})
   919  	case p.replicaCh <- ri:
   920  	case p.mrfReplicaCh <- ri:
   921  		// queue all overflows into the mrfReplicaCh to handle incoming pending/failed operations
   922  	default:
   923  	}
   924  }
   925  
   926  func (p *ReplicationPool) queueReplicaDeleteTask(ctx context.Context, doi DeletedObjectVersionInfo) {
   927  	if p == nil {
   928  		return
   929  	}
   930  	select {
   931  	case <-ctx.Done():
   932  		p.once.Do(func() {
   933  			close(p.replicaDeleteCh)
   934  			close(p.mrfReplicaDeleteCh)
   935  		})
   936  	case p.replicaDeleteCh <- doi:
   937  	case p.mrfReplicaDeleteCh <- doi:
   938  		// queue all overflows into the mrfReplicaDeleteCh to handle incoming pending/failed operations
   939  	default:
   940  	}
   941  }
   942  
   943  func initBackgroundReplication(ctx context.Context, objectAPI ObjectLayer) {
   944  	globalReplicationPool = NewReplicationPool(ctx, objectAPI, globalAPIConfig.getReplicationWorkers())
   945  	globalReplicationStats = NewReplicationStats(ctx, objectAPI)
   946  }
   947  
   948  // get Reader from replication target if active-active replication is in place and
   949  // this node returns a 404
   950  func proxyGetToReplicationTarget(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, opts ObjectOptions) (gr *GetObjectReader, proxy bool) {
   951  	tgt, oi, proxy, err := proxyHeadToRepTarget(ctx, bucket, object, opts)
   952  	if !proxy || err != nil {
   953  		return nil, false
   954  	}
   955  	fn, off, length, err := NewGetObjectReader(rs, oi, opts)
   956  	if err != nil {
   957  		return nil, false
   958  	}
   959  	gopts := miniogo.GetObjectOptions{
   960  		VersionID:            opts.VersionID,
   961  		ServerSideEncryption: opts.ServerSideEncryption,
   962  		Internal: miniogo.AdvancedGetOptions{
   963  			ReplicationProxyRequest: "true",
   964  		},
   965  	}
   966  	// get correct offsets for encrypted object
   967  	if off >= 0 && length >= 0 {
   968  		if err := gopts.SetRange(off, off+length-1); err != nil {
   969  			return nil, false
   970  		}
   971  	}
   972  	// Make sure to match ETag when proxying.
   973  	if err = gopts.SetMatchETag(oi.ETag); err != nil {
   974  		return nil, false
   975  	}
   976  	c := miniogo.Core{Client: tgt.Client}
   977  	obj, _, _, err := c.GetObject(ctx, bucket, object, gopts)
   978  	if err != nil {
   979  		return nil, false
   980  	}
   981  	closeReader := func() { obj.Close() }
   982  
   983  	reader, err := fn(obj, h, opts.CheckPrecondFn, closeReader)
   984  	if err != nil {
   985  		return nil, false
   986  	}
   987  	reader.ObjInfo = oi.Clone()
   988  	return reader, true
   989  }
   990  
   991  // isProxyable returns true if replication config found for this bucket
   992  func isProxyable(ctx context.Context, bucket string) bool {
   993  	cfg, err := getReplicationConfig(ctx, bucket)
   994  	if err != nil {
   995  		return false
   996  	}
   997  	dest := cfg.GetDestination()
   998  	return dest.Bucket == bucket
   999  }
  1000  
  1001  func proxyHeadToRepTarget(ctx context.Context, bucket, object string, opts ObjectOptions) (tgt *TargetClient, oi ObjectInfo, proxy bool, err error) {
  1002  	// this option is set when active-active replication is in place between site A -> B,
  1003  	// and site B does not have the object yet.
  1004  	if opts.ProxyRequest || (opts.ProxyHeaderSet && !opts.ProxyRequest) { // true only when site B sets MinIOSourceProxyRequest header
  1005  		return nil, oi, false, nil
  1006  	}
  1007  	cfg, err := getReplicationConfig(ctx, bucket)
  1008  	if err != nil {
  1009  		return nil, oi, false, err
  1010  	}
  1011  	dest := cfg.GetDestination()
  1012  	if dest.Bucket != bucket { // not active-active
  1013  		return nil, oi, false, err
  1014  	}
  1015  	ssec := false
  1016  	if opts.ServerSideEncryption != nil {
  1017  		ssec = opts.ServerSideEncryption.Type() == encrypt.SSEC
  1018  	}
  1019  	ropts := replication.ObjectOpts{
  1020  		Name: object,
  1021  		SSEC: ssec,
  1022  	}
  1023  	if !cfg.Replicate(ropts) { // no matching rule for object prefix
  1024  		return nil, oi, false, nil
  1025  	}
  1026  	tgt = globalBucketTargetSys.GetRemoteTargetClient(ctx, cfg.RoleArn)
  1027  	if tgt == nil || tgt.isOffline() {
  1028  		return nil, oi, false, fmt.Errorf("target is offline or not configured")
  1029  	}
  1030  
  1031  	gopts := miniogo.GetObjectOptions{
  1032  		VersionID:            opts.VersionID,
  1033  		ServerSideEncryption: opts.ServerSideEncryption,
  1034  		Internal: miniogo.AdvancedGetOptions{
  1035  			ReplicationProxyRequest: "true",
  1036  		},
  1037  	}
  1038  
  1039  	objInfo, err := tgt.StatObject(ctx, dest.Bucket, object, gopts)
  1040  	if err != nil {
  1041  		return nil, oi, false, err
  1042  	}
  1043  
  1044  	tags, _ := tags.MapToObjectTags(objInfo.UserTags)
  1045  	oi = ObjectInfo{
  1046  		Bucket:            bucket,
  1047  		Name:              object,
  1048  		ModTime:           objInfo.LastModified,
  1049  		Size:              objInfo.Size,
  1050  		ETag:              objInfo.ETag,
  1051  		VersionID:         objInfo.VersionID,
  1052  		IsLatest:          objInfo.IsLatest,
  1053  		DeleteMarker:      objInfo.IsDeleteMarker,
  1054  		ContentType:       objInfo.ContentType,
  1055  		Expires:           objInfo.Expires,
  1056  		StorageClass:      objInfo.StorageClass,
  1057  		ReplicationStatus: replication.StatusType(objInfo.ReplicationStatus),
  1058  		UserTags:          tags.String(),
  1059  	}
  1060  	oi.UserDefined = make(map[string]string, len(objInfo.Metadata))
  1061  	for k, v := range objInfo.Metadata {
  1062  		oi.UserDefined[k] = v[0]
  1063  	}
  1064  	ce, ok := oi.UserDefined[xhttp.ContentEncoding]
  1065  	if !ok {
  1066  		ce, ok = oi.UserDefined[strings.ToLower(xhttp.ContentEncoding)]
  1067  	}
  1068  	if ok {
  1069  		oi.ContentEncoding = ce
  1070  	}
  1071  	return tgt, oi, true, nil
  1072  }
  1073  
  1074  // get object info from replication target if active-active replication is in place and
  1075  // this node returns a 404
  1076  func proxyHeadToReplicationTarget(ctx context.Context, bucket, object string, opts ObjectOptions) (oi ObjectInfo, proxy bool, err error) {
  1077  	_, oi, proxy, err = proxyHeadToRepTarget(ctx, bucket, object, opts)
  1078  	return oi, proxy, err
  1079  }
  1080  
  1081  func scheduleReplication(ctx context.Context, objInfo ObjectInfo, o ObjectLayer, sync bool, opType replication.Type) {
  1082  	if sync {
  1083  		replicateObject(ctx, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType}, o)
  1084  	} else {
  1085  		globalReplicationPool.queueReplicaTask(GlobalContext, ReplicateObjectInfo{ObjectInfo: objInfo, OpType: opType})
  1086  	}
  1087  	if sz, err := objInfo.GetActualSize(); err == nil {
  1088  		globalReplicationStats.Update(objInfo.Bucket, sz, objInfo.ReplicationStatus, replication.StatusType(""), opType)
  1089  	}
  1090  }
  1091  
  1092  func scheduleReplicationDelete(ctx context.Context, dv DeletedObjectVersionInfo, o ObjectLayer, sync bool) {
  1093  	globalReplicationPool.queueReplicaDeleteTask(GlobalContext, dv)
  1094  	globalReplicationStats.Update(dv.Bucket, 0, replication.Pending, replication.StatusType(""), replication.DeleteReplicationType)
  1095  }