github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/site-replication.go (about) 1 // Copyright (c) 2015-2022 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "bytes" 22 "context" 23 "encoding/base64" 24 "encoding/binary" 25 "encoding/json" 26 "encoding/xml" 27 "errors" 28 "fmt" 29 "math/rand" 30 "net/url" 31 "reflect" 32 "runtime" 33 "sort" 34 "strings" 35 "sync" 36 "time" 37 38 "github.com/minio/madmin-go/v3" 39 "github.com/minio/minio-go/v7" 40 minioClient "github.com/minio/minio-go/v7" 41 "github.com/minio/minio-go/v7/pkg/credentials" 42 "github.com/minio/minio-go/v7/pkg/replication" 43 "github.com/minio/minio-go/v7/pkg/set" 44 "github.com/minio/minio/internal/auth" 45 "github.com/minio/minio/internal/bucket/lifecycle" 46 sreplication "github.com/minio/minio/internal/bucket/replication" 47 "github.com/minio/minio/internal/logger" 48 "github.com/minio/pkg/v2/policy" 49 "github.com/puzpuzpuz/xsync/v3" 50 ) 51 52 const ( 53 srStatePrefix = minioConfigPrefix + "/site-replication" 54 srStateFile = "state.json" 55 ) 56 57 const ( 58 srStateFormatVersion1 = 1 59 ) 60 61 var ( 62 errSRCannotJoin = SRError{ 63 Cause: errors.New("this site is already configured for site-replication"), 64 Code: ErrSiteReplicationInvalidRequest, 65 } 66 errSRDuplicateSites = SRError{ 67 Cause: errors.New("duplicate sites provided for site-replication"), 68 Code: ErrSiteReplicationInvalidRequest, 69 } 70 errSRSelfNotFound = SRError{ 71 Cause: errors.New("none of the given sites correspond to the current one"), 72 Code: ErrSiteReplicationInvalidRequest, 73 } 74 errSRPeerNotFound = SRError{ 75 Cause: errors.New("peer not found"), 76 Code: ErrSiteReplicationInvalidRequest, 77 } 78 errSRRequestorNotFound = SRError{ 79 Cause: errors.New("requesting site not found in site replication config"), 80 Code: ErrSiteReplicationInvalidRequest, 81 } 82 errSRNotEnabled = SRError{ 83 Cause: errors.New("site replication is not enabled"), 84 Code: ErrSiteReplicationInvalidRequest, 85 } 86 errSRResyncStarted = SRError{ 87 Cause: errors.New("site replication resync is already in progress"), 88 Code: ErrSiteReplicationInvalidRequest, 89 } 90 errSRResyncCanceled = SRError{ 91 Cause: errors.New("site replication resync is already canceled"), 92 Code: ErrSiteReplicationInvalidRequest, 93 } 94 errSRNoResync = SRError{ 95 Cause: errors.New("no resync in progress"), 96 Code: ErrSiteReplicationInvalidRequest, 97 } 98 errSRResyncToSelf = SRError{ 99 Cause: errors.New("invalid peer specified - cannot resync to self"), 100 Code: ErrSiteReplicationInvalidRequest, 101 } 102 ) 103 104 func errSRInvalidRequest(err error) SRError { 105 return SRError{ 106 Cause: err, 107 Code: ErrSiteReplicationInvalidRequest, 108 } 109 } 110 111 func errSRPeerResp(err error) SRError { 112 return SRError{ 113 Cause: err, 114 Code: ErrSiteReplicationPeerResp, 115 } 116 } 117 118 func errSRBackendIssue(err error) SRError { 119 return SRError{ 120 Cause: err, 121 Code: ErrSiteReplicationBackendIssue, 122 } 123 } 124 125 func errSRServiceAccount(err error) SRError { 126 return SRError{ 127 Cause: err, 128 Code: ErrSiteReplicationServiceAccountError, 129 } 130 } 131 132 func errSRBucketConfigError(err error) SRError { 133 return SRError{ 134 Cause: err, 135 Code: ErrSiteReplicationBucketConfigError, 136 } 137 } 138 139 func errSRBucketMetaError(err error) SRError { 140 return SRError{ 141 Cause: err, 142 Code: ErrSiteReplicationBucketMetaError, 143 } 144 } 145 146 func errSRIAMError(err error) SRError { 147 return SRError{ 148 Cause: err, 149 Code: ErrSiteReplicationIAMError, 150 } 151 } 152 153 func errSRConfigMissingError(err error) SRError { 154 return SRError{ 155 Cause: err, 156 Code: ErrSiteReplicationConfigMissing, 157 } 158 } 159 160 func errSRIAMConfigMismatch(peer1, peer2 string, s1, s2 madmin.IDPSettings) SRError { 161 return SRError{ 162 Cause: fmt.Errorf("IAM/IDP settings mismatch between %s and %s: %#v vs %#v", peer1, peer2, s1, s2), 163 Code: ErrSiteReplicationIAMConfigMismatch, 164 } 165 } 166 167 var errSRObjectLayerNotReady = SRError{ 168 Cause: fmt.Errorf("object layer not ready"), 169 Code: ErrServerNotInitialized, 170 } 171 172 func getSRStateFilePath() string { 173 return srStatePrefix + SlashSeparator + srStateFile 174 } 175 176 // SRError - wrapped error for site replication. 177 type SRError struct { 178 Cause error 179 Code APIErrorCode 180 } 181 182 func (c SRError) Error() string { 183 if c.Cause != nil { 184 return c.Cause.Error() 185 } 186 return "<nil>" 187 } 188 189 func (c SRError) Unwrap() error { 190 return c.Cause 191 } 192 193 func wrapSRErr(err error) SRError { 194 return SRError{Cause: err, Code: ErrInternalError} 195 } 196 197 // SiteReplicationSys - manages cluster-level replication. 198 type SiteReplicationSys struct { 199 sync.RWMutex 200 201 enabled bool 202 203 // In-memory and persisted multi-site replication state. 204 state srState 205 206 iamMetaCache srIAMCache 207 } 208 209 type srState srStateV1 210 211 // srStateV1 represents version 1 of the site replication state persistence 212 // format. 213 type srStateV1 struct { 214 Name string `json:"name"` 215 216 // Peers maps peers by their deploymentID 217 Peers map[string]madmin.PeerInfo `json:"peers"` 218 ServiceAccountAccessKey string `json:"serviceAccountAccessKey"` 219 UpdatedAt time.Time `json:"updatedAt"` 220 } 221 222 // srStateData represents the format of the current `srStateFile`. 223 type srStateData struct { 224 Version int `json:"version"` 225 226 SRState srStateV1 `json:"srState"` 227 } 228 229 // Init - initialize the site replication manager. 230 func (c *SiteReplicationSys) Init(ctx context.Context, objAPI ObjectLayer) error { 231 go c.startHealRoutine(ctx, objAPI) 232 r := rand.New(rand.NewSource(time.Now().UnixNano())) 233 for { 234 err := c.loadFromDisk(ctx, objAPI) 235 if err == errConfigNotFound { 236 return nil 237 } 238 if err == nil { 239 break 240 } 241 logger.LogOnceIf(context.Background(), fmt.Errorf("unable to initialize site replication subsystem: (%w)", err), "site-relication-init") 242 243 duration := time.Duration(r.Float64() * float64(time.Minute)) 244 if duration < time.Second { 245 // Make sure to sleep at least a second to avoid high CPU ticks. 246 duration = time.Second 247 } 248 time.Sleep(duration) 249 } 250 c.RLock() 251 defer c.RUnlock() 252 if c.enabled { 253 logger.Info("Cluster replication initialized") 254 } 255 return nil 256 } 257 258 func (c *SiteReplicationSys) loadFromDisk(ctx context.Context, objAPI ObjectLayer) error { 259 buf, err := readConfig(ctx, objAPI, getSRStateFilePath()) 260 if err != nil { 261 if errors.Is(err, errConfigNotFound) { 262 c.Lock() 263 defer c.Unlock() 264 c.state = srState{} 265 c.enabled = false 266 } 267 return err 268 } 269 270 // attempt to read just the version key in the state file to ensure we 271 // are reading a compatible version. 272 var ver struct { 273 Version int `json:"version"` 274 } 275 err = json.Unmarshal(buf, &ver) 276 if err != nil { 277 return err 278 } 279 if ver.Version != srStateFormatVersion1 { 280 return fmt.Errorf("Unexpected ClusterRepl state version: %d", ver.Version) 281 } 282 283 var sdata srStateData 284 err = json.Unmarshal(buf, &sdata) 285 if err != nil { 286 return err 287 } 288 289 c.Lock() 290 defer c.Unlock() 291 c.state = srState(sdata.SRState) 292 c.enabled = len(c.state.Peers) != 0 293 return nil 294 } 295 296 func (c *SiteReplicationSys) saveToDisk(ctx context.Context, state srState) error { 297 sdata := srStateData{ 298 Version: srStateFormatVersion1, 299 SRState: srStateV1(state), 300 } 301 buf, err := json.Marshal(sdata) 302 if err != nil { 303 return err 304 } 305 306 objAPI := newObjectLayerFn() 307 if objAPI == nil { 308 return errServerNotInitialized 309 } 310 311 if err = saveConfig(ctx, objAPI, getSRStateFilePath(), buf); err != nil { 312 return err 313 } 314 315 for _, err := range globalNotificationSys.ReloadSiteReplicationConfig(ctx) { 316 logger.LogIf(ctx, err) 317 } 318 319 c.Lock() 320 defer c.Unlock() 321 c.state = state 322 c.enabled = len(c.state.Peers) != 0 323 return nil 324 } 325 326 func (c *SiteReplicationSys) removeFromDisk(ctx context.Context) error { 327 objAPI := newObjectLayerFn() 328 if objAPI == nil { 329 return errServerNotInitialized 330 } 331 332 if err := deleteConfig(ctx, objAPI, getSRStateFilePath()); err != nil { 333 return err 334 } 335 336 for _, err := range globalNotificationSys.ReloadSiteReplicationConfig(ctx) { 337 logger.LogIf(ctx, err) 338 } 339 340 c.Lock() 341 defer c.Unlock() 342 c.state = srState{} 343 c.enabled = false 344 return nil 345 } 346 347 const ( 348 // Access key of service account used for perform cluster-replication 349 // operations. 350 siteReplicatorSvcAcc = "site-replicator-0" 351 ) 352 353 // PeerSiteInfo is a wrapper struct around madmin.PeerSite with extra info on site status 354 type PeerSiteInfo struct { 355 madmin.PeerSite 356 self bool 357 DeploymentID string 358 Replicated bool // true if already participating in site replication 359 Empty bool // true if cluster has no buckets 360 } 361 362 // getSiteStatuses gathers more info on the sites being added 363 func (c *SiteReplicationSys) getSiteStatuses(ctx context.Context, sites ...madmin.PeerSite) (psi []PeerSiteInfo, err error) { 364 psi = make([]PeerSiteInfo, 0, len(sites)) 365 for _, v := range sites { 366 admClient, err := getAdminClient(v.Endpoint, v.AccessKey, v.SecretKey) 367 if err != nil { 368 return psi, errSRPeerResp(fmt.Errorf("unable to create admin client for %s: %w", v.Name, err)) 369 } 370 371 info, err := admClient.ServerInfo(ctx) 372 if err != nil { 373 return psi, errSRPeerResp(fmt.Errorf("unable to fetch server info for %s: %w", v.Name, err)) 374 } 375 376 s3Client, err := getS3Client(v) 377 if err != nil { 378 return psi, errSRPeerResp(fmt.Errorf("unable to create s3 client for %s: %w", v.Name, err)) 379 } 380 381 buckets, err := s3Client.ListBuckets(ctx) 382 if err != nil { 383 return psi, errSRPeerResp(fmt.Errorf("unable to list buckets for %s: %v", v.Name, err)) 384 } 385 386 psi = append(psi, PeerSiteInfo{ 387 PeerSite: v, 388 DeploymentID: info.DeploymentID, 389 Empty: len(buckets) == 0, 390 self: info.DeploymentID == globalDeploymentID(), 391 }) 392 } 393 return 394 } 395 396 // AddPeerClusters - add cluster sites for replication configuration. 397 func (c *SiteReplicationSys) AddPeerClusters(ctx context.Context, psites []madmin.PeerSite, opts madmin.SRAddOptions) (madmin.ReplicateAddStatus, error) { 398 sites, serr := c.getSiteStatuses(ctx, psites...) 399 if serr != nil { 400 return madmin.ReplicateAddStatus{}, serr 401 } 402 var ( 403 currSites madmin.SiteReplicationInfo 404 currDeploymentIDsSet = set.NewStringSet() 405 err error 406 ) 407 currSites, err = c.GetClusterInfo(ctx) 408 if err != nil { 409 return madmin.ReplicateAddStatus{}, errSRBackendIssue(err) 410 } 411 for _, v := range currSites.Sites { 412 currDeploymentIDsSet.Add(v.DeploymentID) 413 } 414 deploymentIDsSet := set.NewStringSet() 415 localHasBuckets := false 416 nonLocalPeerWithBuckets := "" 417 selfIdx := -1 418 for i, v := range sites { 419 // deploymentIDs must be unique 420 if deploymentIDsSet.Contains(v.DeploymentID) { 421 return madmin.ReplicateAddStatus{}, errSRDuplicateSites 422 } 423 deploymentIDsSet.Add(v.DeploymentID) 424 425 if v.self { 426 selfIdx = i 427 localHasBuckets = !v.Empty 428 continue 429 } 430 if !v.Empty && !currDeploymentIDsSet.Contains(v.DeploymentID) { 431 nonLocalPeerWithBuckets = v.Name 432 } 433 } 434 if selfIdx == -1 { 435 return madmin.ReplicateAddStatus{}, errSRBackendIssue(fmt.Errorf("global deployment ID %s mismatch, expected one of %s", globalDeploymentID(), deploymentIDsSet)) 436 } 437 if !currDeploymentIDsSet.IsEmpty() { 438 // If current cluster is already SR enabled and no new site being added ,fail. 439 if currDeploymentIDsSet.Equals(deploymentIDsSet) { 440 return madmin.ReplicateAddStatus{}, errSRCannotJoin 441 } 442 if len(currDeploymentIDsSet.Intersection(deploymentIDsSet)) != len(currDeploymentIDsSet) { 443 diffSlc := getMissingSiteNames(currDeploymentIDsSet, deploymentIDsSet, currSites.Sites) 444 return madmin.ReplicateAddStatus{}, errSRInvalidRequest(fmt.Errorf("all existing replicated sites must be specified - missing %s", strings.Join(diffSlc, " "))) 445 } 446 } 447 448 // validate that all clusters are using the same IDP settings. 449 err = c.validateIDPSettings(ctx, sites) 450 if err != nil { 451 return madmin.ReplicateAddStatus{}, err 452 } 453 454 // For this `add` API, either all clusters must be empty or the local 455 // cluster must be the only one having some buckets. 456 if localHasBuckets && nonLocalPeerWithBuckets != "" { 457 return madmin.ReplicateAddStatus{}, errSRInvalidRequest(errors.New("only one cluster may have data when configuring site replication")) 458 } 459 460 if !localHasBuckets && nonLocalPeerWithBuckets != "" { 461 return madmin.ReplicateAddStatus{}, errSRInvalidRequest(fmt.Errorf("please send your request to the cluster containing data/buckets: %s", nonLocalPeerWithBuckets)) 462 } 463 464 // FIXME: Ideally, we also need to check if there are any global IAM 465 // policies and any (LDAP user created) service accounts on the other 466 // peer clusters, and if so, reject the cluster replicate add request. 467 // This is not yet implemented. 468 469 // VALIDATIONS COMPLETE. 470 471 // Create a common service account for all clusters, with root 472 // permissions. 473 474 // Create a local service account. 475 476 // Generate a secret key for the service account if not created already. 477 var secretKey string 478 var svcCred auth.Credentials 479 sa, _, err := globalIAMSys.getServiceAccount(ctx, siteReplicatorSvcAcc) 480 switch { 481 case err == errNoSuchServiceAccount: 482 _, secretKey, err = auth.GenerateCredentials() 483 if err != nil { 484 return madmin.ReplicateAddStatus{}, errSRServiceAccount(fmt.Errorf("unable to create local service account: %w", err)) 485 } 486 svcCred, _, err = globalIAMSys.NewServiceAccount(ctx, sites[selfIdx].AccessKey, nil, newServiceAccountOpts{ 487 accessKey: siteReplicatorSvcAcc, 488 secretKey: secretKey, 489 allowSiteReplicatorAccount: true, 490 }) 491 if err != nil { 492 return madmin.ReplicateAddStatus{}, errSRServiceAccount(fmt.Errorf("unable to create local service account: %w", err)) 493 } 494 case err == nil: 495 svcCred = sa.Credentials 496 secretKey = svcCred.SecretKey 497 default: 498 return madmin.ReplicateAddStatus{}, errSRBackendIssue(err) 499 } 500 501 currTime := time.Now() 502 joinReq := madmin.SRPeerJoinReq{ 503 SvcAcctAccessKey: svcCred.AccessKey, 504 SvcAcctSecretKey: secretKey, 505 Peers: make(map[string]madmin.PeerInfo), 506 UpdatedAt: currTime, 507 } 508 // check if few peers exist already and ILM expiry replcation is set to true 509 replicateILMExpirySet := false 510 if c.state.Peers != nil { 511 for _, pi := range c.state.Peers { 512 if pi.ReplicateILMExpiry { 513 replicateILMExpirySet = true 514 break 515 } 516 } 517 } 518 for _, v := range sites { 519 var peerReplicateILMExpiry bool 520 // if peers already exist and for one of them ReplicateILMExpiry 521 // set true, that means earlier replication of ILM expiry was set 522 // for the site replication. All new sites added to the setup should 523 // get this enabled as well 524 if replicateILMExpirySet { 525 peerReplicateILMExpiry = replicateILMExpirySet 526 } else { 527 peerReplicateILMExpiry = opts.ReplicateILMExpiry 528 } 529 joinReq.Peers[v.DeploymentID] = madmin.PeerInfo{ 530 Endpoint: v.Endpoint, 531 Name: v.Name, 532 DeploymentID: v.DeploymentID, 533 ReplicateILMExpiry: peerReplicateILMExpiry, 534 } 535 } 536 537 addedCount := 0 538 var ( 539 peerAddErr error 540 admClient *madmin.AdminClient 541 ) 542 543 for _, v := range sites { 544 if v.self { 545 continue 546 } 547 switch { 548 case currDeploymentIDsSet.Contains(v.DeploymentID): 549 admClient, err = c.getAdminClient(ctx, v.DeploymentID) 550 default: 551 admClient, err = getAdminClient(v.Endpoint, v.AccessKey, v.SecretKey) 552 } 553 if err != nil { 554 peerAddErr = errSRPeerResp(fmt.Errorf("unable to create admin client for %s: %w", v.Name, err)) 555 break 556 } 557 joinReq.SvcAcctParent = v.AccessKey 558 err = admClient.SRPeerJoin(ctx, joinReq) 559 if err != nil { 560 peerAddErr = errSRPeerResp(fmt.Errorf("unable to link with peer %s: %w", v.Name, err)) 561 break 562 } 563 addedCount++ 564 } 565 566 if peerAddErr != nil { 567 if addedCount == 0 { 568 return madmin.ReplicateAddStatus{}, peerAddErr 569 } 570 // In this case, it means at least one cluster was added 571 // successfully, we need to send a response to the client with 572 // some details - FIXME: the disks on this cluster would need to 573 // be cleaned to recover. 574 partial := madmin.ReplicateAddStatus{ 575 Status: madmin.ReplicateAddStatusPartial, 576 ErrDetail: peerAddErr.Error(), 577 } 578 579 return partial, nil 580 } 581 582 // Other than handling existing buckets, we can now save the cluster 583 // replication configuration state. 584 state := srState{ 585 Name: sites[selfIdx].Name, 586 Peers: joinReq.Peers, 587 ServiceAccountAccessKey: svcCred.AccessKey, 588 UpdatedAt: currTime, 589 } 590 591 if err = c.saveToDisk(ctx, state); err != nil { 592 return madmin.ReplicateAddStatus{ 593 Status: madmin.ReplicateAddStatusPartial, 594 ErrDetail: fmt.Sprintf("unable to save cluster-replication state on local: %v", err), 595 }, nil 596 } 597 598 if !globalSiteReplicatorCred.IsValid() { 599 globalSiteReplicatorCred.Set(svcCred) 600 } 601 result := madmin.ReplicateAddStatus{ 602 Success: true, 603 Status: madmin.ReplicateAddStatusSuccess, 604 } 605 606 if err := c.syncToAllPeers(ctx, opts); err != nil { 607 result.InitialSyncErrorMessage = err.Error() 608 } 609 610 return result, nil 611 } 612 613 // PeerJoinReq - internal API handler to respond to a peer cluster's request to join. 614 func (c *SiteReplicationSys) PeerJoinReq(ctx context.Context, arg madmin.SRPeerJoinReq) error { 615 var ourName string 616 for d, p := range arg.Peers { 617 if d == globalDeploymentID() { 618 ourName = p.Name 619 break 620 } 621 } 622 if ourName == "" { 623 return errSRSelfNotFound 624 } 625 626 sa, _, err := globalIAMSys.GetServiceAccount(ctx, arg.SvcAcctAccessKey) 627 if err == errNoSuchServiceAccount { 628 sa, _, err = globalIAMSys.NewServiceAccount(ctx, arg.SvcAcctParent, nil, newServiceAccountOpts{ 629 accessKey: arg.SvcAcctAccessKey, 630 secretKey: arg.SvcAcctSecretKey, 631 allowSiteReplicatorAccount: arg.SvcAcctAccessKey == siteReplicatorSvcAcc, 632 }) 633 } 634 if err != nil { 635 return errSRServiceAccount(fmt.Errorf("unable to create service account on %s: %v", ourName, err)) 636 } 637 638 peers := make(map[string]madmin.PeerInfo, len(arg.Peers)) 639 for dID, pi := range arg.Peers { 640 if c.state.Peers != nil { 641 if existingPeer, ok := c.state.Peers[dID]; ok { 642 // retain existing ReplicateILMExpiry of peer if its already set 643 // and incoming arg has it false. it could be default false 644 if !pi.ReplicateILMExpiry && existingPeer.ReplicateILMExpiry { 645 pi.ReplicateILMExpiry = existingPeer.ReplicateILMExpiry 646 } 647 } 648 } 649 peers[dID] = pi 650 } 651 state := srState{ 652 Name: ourName, 653 Peers: peers, 654 ServiceAccountAccessKey: arg.SvcAcctAccessKey, 655 UpdatedAt: arg.UpdatedAt, 656 } 657 if err = c.saveToDisk(ctx, state); err != nil { 658 return errSRBackendIssue(fmt.Errorf("unable to save cluster-replication state to drive on %s: %v", ourName, err)) 659 } 660 if !globalSiteReplicatorCred.IsValid() { 661 globalSiteReplicatorCred.Set(sa) 662 } 663 664 return nil 665 } 666 667 // GetIDPSettings returns info about the configured identity provider. It is 668 // used to validate that all peers have the same IDP. 669 func (c *SiteReplicationSys) GetIDPSettings(ctx context.Context) madmin.IDPSettings { 670 s := madmin.IDPSettings{} 671 s.LDAP = madmin.LDAPSettings{ 672 IsLDAPEnabled: globalIAMSys.LDAPConfig.Enabled(), 673 LDAPUserDNSearchBase: globalIAMSys.LDAPConfig.LDAP.UserDNSearchBaseDistName, 674 LDAPUserDNSearchFilter: globalIAMSys.LDAPConfig.LDAP.UserDNSearchFilter, 675 LDAPGroupSearchBase: globalIAMSys.LDAPConfig.LDAP.GroupSearchBaseDistName, 676 LDAPGroupSearchFilter: globalIAMSys.LDAPConfig.LDAP.GroupSearchFilter, 677 } 678 s.OpenID = globalIAMSys.OpenIDConfig.GetSettings() 679 if s.OpenID.Enabled { 680 s.OpenID.Region = globalSite.Region 681 } 682 return s 683 } 684 685 func (c *SiteReplicationSys) validateIDPSettings(ctx context.Context, peers []PeerSiteInfo) error { 686 s := make([]madmin.IDPSettings, 0, len(peers)) 687 for _, v := range peers { 688 if v.self { 689 s = append(s, c.GetIDPSettings(ctx)) 690 continue 691 } 692 693 admClient, err := getAdminClient(v.Endpoint, v.AccessKey, v.SecretKey) 694 if err != nil { 695 return errSRPeerResp(fmt.Errorf("unable to create admin client for %s: %w", v.Name, err)) 696 } 697 698 is, err := admClient.SRPeerGetIDPSettings(ctx) 699 if err != nil { 700 return errSRPeerResp(fmt.Errorf("unable to fetch IDP settings from %s: %v", v.Name, err)) 701 } 702 s = append(s, is) 703 } 704 705 for i := 1; i < len(s); i++ { 706 if !reflect.DeepEqual(s[i], s[0]) { 707 return errSRIAMConfigMismatch(peers[0].Name, peers[i].Name, s[0], s[i]) 708 } 709 } 710 711 return nil 712 } 713 714 // Netperf for site-replication net perf 715 func (c *SiteReplicationSys) Netperf(ctx context.Context, duration time.Duration) (results madmin.SiteNetPerfResult, err error) { 716 infos, err := globalSiteReplicationSys.GetClusterInfo(ctx) 717 if err != nil { 718 return results, err 719 } 720 var wg sync.WaitGroup 721 var resultsMu sync.RWMutex 722 for _, info := range infos.Sites { 723 info := info 724 // will call siteNetperf, means call others's adminAPISiteReplicationDevNull 725 if globalDeploymentID() == info.DeploymentID { 726 wg.Add(1) 727 go func() { 728 defer wg.Done() 729 result := madmin.SiteNetPerfNodeResult{} 730 cli, err := globalSiteReplicationSys.getAdminClient(ctx, info.DeploymentID) 731 if err != nil { 732 result.Error = err.Error() 733 } else { 734 result = siteNetperf(ctx, duration) 735 result.Endpoint = cli.GetEndpointURL().String() 736 } 737 resultsMu.Lock() 738 results.NodeResults = append(results.NodeResults, result) 739 resultsMu.Unlock() 740 return 741 }() 742 continue 743 } 744 wg.Add(1) 745 go func() { 746 defer wg.Done() 747 ctx, cancel := context.WithTimeout(ctx, duration+10*time.Second) 748 defer cancel() 749 result := perfNetRequest( 750 ctx, 751 info.DeploymentID, 752 adminPathPrefix+adminAPIVersionPrefix+adminAPISiteReplicationNetPerf, 753 nil, 754 ) 755 resultsMu.Lock() 756 results.NodeResults = append(results.NodeResults, result) 757 resultsMu.Unlock() 758 return 759 }() 760 } 761 wg.Wait() 762 return 763 } 764 765 // GetClusterInfo - returns site replication information. 766 func (c *SiteReplicationSys) GetClusterInfo(ctx context.Context) (info madmin.SiteReplicationInfo, err error) { 767 c.RLock() 768 defer c.RUnlock() 769 if !c.enabled { 770 return info, nil 771 } 772 773 info.Enabled = true 774 info.Name = c.state.Name 775 info.Sites = make([]madmin.PeerInfo, 0, len(c.state.Peers)) 776 for _, peer := range c.state.Peers { 777 info.Sites = append(info.Sites, peer) 778 } 779 sort.Slice(info.Sites, func(i, j int) bool { 780 return info.Sites[i].Name < info.Sites[j].Name 781 }) 782 783 info.ServiceAccountAccessKey = c.state.ServiceAccountAccessKey 784 return info, nil 785 } 786 787 const ( 788 makeBucketWithVersion = "MakeBucketWithVersioning" 789 configureReplication = "ConfigureReplication" 790 deleteBucket = "DeleteBucket" 791 replicateIAMItem = "SRPeerReplicateIAMItem" 792 replicateBucketMetadata = "SRPeerReplicateBucketMeta" 793 siteReplicationEdit = "SiteReplicationEdit" 794 ) 795 796 // MakeBucketHook - called during a regular make bucket call when cluster 797 // replication is enabled. It is responsible for the creation of the same bucket 798 // on remote clusters, and creating replication rules on local and peer 799 // clusters. 800 func (c *SiteReplicationSys) MakeBucketHook(ctx context.Context, bucket string, opts MakeBucketOptions) error { 801 // At this point, the local bucket is created. 802 803 c.RLock() 804 defer c.RUnlock() 805 if !c.enabled { 806 return nil 807 } 808 809 optsMap := make(map[string]string) 810 if opts.LockEnabled { 811 optsMap["lockEnabled"] = "true" 812 optsMap["versioningEnabled"] = "true" 813 } 814 if opts.VersioningEnabled { 815 optsMap["versioningEnabled"] = "true" 816 } 817 if opts.ForceCreate { 818 optsMap["forceCreate"] = "true" 819 } 820 createdAt, _ := globalBucketMetadataSys.CreatedAt(bucket) 821 optsMap["createdAt"] = createdAt.UTC().Format(time.RFC3339Nano) 822 opts.CreatedAt = createdAt 823 824 // Create bucket and enable versioning on all peers. 825 makeBucketConcErr := c.concDo( 826 func() error { 827 return c.annotateErr(makeBucketWithVersion, c.PeerBucketMakeWithVersioningHandler(ctx, bucket, opts)) 828 }, 829 func(deploymentID string, p madmin.PeerInfo) error { 830 admClient, err := c.getAdminClient(ctx, deploymentID) 831 if err != nil { 832 return err 833 } 834 835 return c.annotatePeerErr(p.Name, makeBucketWithVersion, admClient.SRPeerBucketOps(ctx, bucket, madmin.MakeWithVersioningBktOp, optsMap)) 836 }, 837 makeBucketWithVersion, 838 ) 839 840 // Create bucket remotes and add replication rules for the bucket on self and peers. 841 makeRemotesConcErr := c.concDo( 842 func() error { 843 return c.annotateErr(configureReplication, c.PeerBucketConfigureReplHandler(ctx, bucket)) 844 }, 845 func(deploymentID string, p madmin.PeerInfo) error { 846 admClient, err := c.getAdminClient(ctx, deploymentID) 847 if err != nil { 848 return err 849 } 850 851 return c.annotatePeerErr(p.Name, configureReplication, admClient.SRPeerBucketOps(ctx, bucket, madmin.ConfigureReplBktOp, nil)) 852 }, 853 configureReplication, 854 ) 855 856 if err := errors.Unwrap(makeBucketConcErr); err != nil { 857 return err 858 } 859 860 if err := errors.Unwrap(makeRemotesConcErr); err != nil { 861 return err 862 } 863 864 return nil 865 } 866 867 // DeleteBucketHook - called during a regular delete bucket call when cluster 868 // replication is enabled. It is responsible for the deletion of the same bucket 869 // on remote clusters. 870 func (c *SiteReplicationSys) DeleteBucketHook(ctx context.Context, bucket string, forceDelete bool) error { 871 // At this point, the local bucket is deleted. 872 873 c.RLock() 874 defer c.RUnlock() 875 if !c.enabled { 876 return nil 877 } 878 879 op := madmin.DeleteBucketBktOp 880 if forceDelete { 881 op = madmin.ForceDeleteBucketBktOp 882 } 883 884 // Send bucket delete to other clusters. 885 cerr := c.concDo(nil, func(deploymentID string, p madmin.PeerInfo) error { 886 admClient, err := c.getAdminClient(ctx, deploymentID) 887 if err != nil { 888 return wrapSRErr(err) 889 } 890 891 return c.annotatePeerErr(p.Name, deleteBucket, admClient.SRPeerBucketOps(ctx, bucket, op, nil)) 892 }, 893 deleteBucket, 894 ) 895 return errors.Unwrap(cerr) 896 } 897 898 // PeerBucketMakeWithVersioningHandler - creates bucket and enables versioning. 899 func (c *SiteReplicationSys) PeerBucketMakeWithVersioningHandler(ctx context.Context, bucket string, opts MakeBucketOptions) error { 900 objAPI := newObjectLayerFn() 901 if objAPI == nil { 902 return errServerNotInitialized 903 } 904 905 err := objAPI.MakeBucket(ctx, bucket, opts) 906 if err != nil { 907 // Check if this is a bucket exists error. 908 _, ok1 := err.(BucketExists) 909 _, ok2 := err.(BucketAlreadyExists) 910 if !ok1 && !ok2 { 911 return wrapSRErr(c.annotateErr(makeBucketWithVersion, err)) 912 } 913 } else { 914 // Load updated bucket metadata into memory as new 915 // bucket was created. 916 globalNotificationSys.LoadBucketMetadata(GlobalContext, bucket) 917 } 918 919 meta, err := globalBucketMetadataSys.Get(bucket) 920 if err != nil { 921 return wrapSRErr(c.annotateErr(makeBucketWithVersion, err)) 922 } 923 924 meta.SetCreatedAt(opts.CreatedAt) 925 926 meta.VersioningConfigXML = enabledBucketVersioningConfig 927 if opts.LockEnabled { 928 meta.ObjectLockConfigXML = enabledBucketObjectLockConfig 929 } 930 931 if err := meta.Save(context.Background(), objAPI); err != nil { 932 return wrapSRErr(err) 933 } 934 935 globalBucketMetadataSys.Set(bucket, meta) 936 937 // Load updated bucket metadata into memory as new metadata updated. 938 globalNotificationSys.LoadBucketMetadata(GlobalContext, bucket) 939 return nil 940 } 941 942 // PeerBucketConfigureReplHandler - configures replication remote and 943 // replication rules to all other peers for the local bucket. 944 func (c *SiteReplicationSys) PeerBucketConfigureReplHandler(ctx context.Context, bucket string) error { 945 creds, err := c.getPeerCreds() 946 if err != nil { 947 return wrapSRErr(err) 948 } 949 950 // The following function, creates a bucket remote and sets up a bucket 951 // replication rule for the given peer. 952 configurePeerFn := func(d string, peer madmin.PeerInfo) error { 953 // Create bucket replication rule to this peer. 954 955 // To add the bucket replication rule, we fetch the current 956 // server configuration, and convert it to minio-go's 957 // replication configuration type (by converting to xml and 958 // parsing it back), use minio-go's add rule function, and 959 // finally convert it back to the server type (again via xml). 960 // This is needed as there is no add-rule function in the server 961 // yet. 962 963 // Though we do not check if the rule already exists, this is 964 // not a problem as we are always using the same replication 965 // rule ID - if the rule already exists, it is just replaced. 966 replicationConfigS, _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, bucket) 967 if err != nil { 968 _, ok := err.(BucketReplicationConfigNotFound) 969 if !ok { 970 return err 971 } 972 } 973 var replicationConfig replication.Config 974 if replicationConfigS != nil { 975 replCfgSBytes, err := xml.Marshal(replicationConfigS) 976 if err != nil { 977 return err 978 } 979 err = xml.Unmarshal(replCfgSBytes, &replicationConfig) 980 if err != nil { 981 return err 982 } 983 } 984 var ( 985 ruleID = fmt.Sprintf("site-repl-%s", d) 986 hasRule bool 987 ) 988 var ruleARN string 989 for _, r := range replicationConfig.Rules { 990 if r.ID == ruleID { 991 hasRule = true 992 ruleARN = r.Destination.Bucket 993 } 994 } 995 996 ep, _ := url.Parse(peer.Endpoint) 997 var targets []madmin.BucketTarget 998 if targetsPtr, _ := globalBucketTargetSys.ListBucketTargets(ctx, bucket); targetsPtr != nil { 999 targets = targetsPtr.Targets 1000 } 1001 targetARN := "" 1002 var updateTgt, updateBW bool 1003 var targetToUpdate madmin.BucketTarget 1004 for _, target := range targets { 1005 if target.Arn == ruleARN { 1006 targetARN = ruleARN 1007 updateBW = peer.DefaultBandwidth.Limit != 0 && target.BandwidthLimit == 0 1008 if (target.URL().String() != peer.Endpoint) || updateBW { 1009 updateTgt = true 1010 targetToUpdate = target 1011 } 1012 break 1013 } 1014 } 1015 // replication config had a stale target ARN - update the endpoint 1016 if updateTgt { 1017 targetToUpdate.Endpoint = ep.Host 1018 targetToUpdate.Secure = ep.Scheme == "https" 1019 targetToUpdate.Credentials = &madmin.Credentials{ 1020 AccessKey: creds.AccessKey, 1021 SecretKey: creds.SecretKey, 1022 } 1023 if !peer.SyncState.Empty() { 1024 targetToUpdate.ReplicationSync = (peer.SyncState == madmin.SyncEnabled) 1025 } 1026 if updateBW { 1027 targetToUpdate.BandwidthLimit = int64(peer.DefaultBandwidth.Limit) 1028 } 1029 err := globalBucketTargetSys.SetTarget(ctx, bucket, &targetToUpdate, true) 1030 if err != nil { 1031 return c.annotatePeerErr(peer.Name, "Bucket target update error", err) 1032 } 1033 targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 1034 if err != nil { 1035 return wrapSRErr(err) 1036 } 1037 tgtBytes, err := json.Marshal(&targets) 1038 if err != nil { 1039 return wrapSRErr(err) 1040 } 1041 if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil { 1042 return wrapSRErr(err) 1043 } 1044 1045 } 1046 // no replication rule for this peer or target ARN missing in bucket targets 1047 if targetARN == "" { 1048 bucketTarget := madmin.BucketTarget{ 1049 SourceBucket: bucket, 1050 Endpoint: ep.Host, 1051 Credentials: &madmin.Credentials{ 1052 AccessKey: creds.AccessKey, 1053 SecretKey: creds.SecretKey, 1054 }, 1055 TargetBucket: bucket, 1056 Secure: ep.Scheme == "https", 1057 API: "s3v4", 1058 Type: madmin.ReplicationService, 1059 Region: "", 1060 ReplicationSync: peer.SyncState == madmin.SyncEnabled, 1061 DeploymentID: d, 1062 BandwidthLimit: int64(peer.DefaultBandwidth.Limit), 1063 } 1064 var exists bool // true if ARN already exists 1065 bucketTarget.Arn, exists = globalBucketTargetSys.getRemoteARN(bucket, &bucketTarget, peer.DeploymentID) 1066 if !exists { // persist newly generated ARN to targets and metadata on disk 1067 err := globalBucketTargetSys.SetTarget(ctx, bucket, &bucketTarget, false) 1068 if err != nil { 1069 return c.annotatePeerErr(peer.Name, "Bucket target creation error", err) 1070 } 1071 targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 1072 if err != nil { 1073 return err 1074 } 1075 tgtBytes, err := json.Marshal(&targets) 1076 if err != nil { 1077 return err 1078 } 1079 if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil { 1080 return err 1081 } 1082 } 1083 targetARN = bucketTarget.Arn 1084 } 1085 opts := replication.Options{ 1086 // Set the ID so we can identify the rule as being 1087 // created for site-replication and include the 1088 // destination cluster's deployment ID. 1089 ID: ruleID, 1090 1091 // Use a helper to generate unique priority numbers. 1092 Priority: fmt.Sprintf("%d", getPriorityHelper(replicationConfig)), 1093 1094 Op: replication.AddOption, 1095 RuleStatus: "enable", 1096 DestBucket: targetARN, 1097 // Replicate everything! 1098 ReplicateDeletes: "enable", 1099 ReplicateDeleteMarkers: "enable", 1100 ReplicaSync: "enable", 1101 ExistingObjectReplicate: "enable", 1102 } 1103 1104 switch { 1105 case hasRule: 1106 if ruleARN != opts.DestBucket { 1107 // remove stale replication rule and replace rule with correct target ARN 1108 if len(replicationConfig.Rules) > 1 { 1109 err = replicationConfig.RemoveRule(opts) 1110 } else { 1111 replicationConfig = replication.Config{} 1112 } 1113 if err == nil { 1114 err = replicationConfig.AddRule(opts) 1115 } 1116 } else { 1117 err = replicationConfig.EditRule(opts) 1118 } 1119 default: 1120 err = replicationConfig.AddRule(opts) 1121 } 1122 if err != nil { 1123 return c.annotatePeerErr(peer.Name, "Error adding bucket replication rule", err) 1124 } 1125 1126 // Now convert the configuration back to server's type so we can 1127 // do some validation. 1128 newReplCfgBytes, err := xml.Marshal(replicationConfig) 1129 if err != nil { 1130 return err 1131 } 1132 newReplicationConfig, err := sreplication.ParseConfig(bytes.NewReader(newReplCfgBytes)) 1133 if err != nil { 1134 return err 1135 } 1136 sameTarget, apiErr := validateReplicationDestination(ctx, bucket, newReplicationConfig, true) 1137 if apiErr != noError { 1138 return fmt.Errorf("bucket replication config validation error: %#v", apiErr) 1139 } 1140 err = newReplicationConfig.Validate(bucket, sameTarget) 1141 if err != nil { 1142 return err 1143 } 1144 // Config looks good, so we save it. 1145 replCfgData, err := xml.Marshal(newReplicationConfig) 1146 if err != nil { 1147 return err 1148 } 1149 1150 _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketReplicationConfig, replCfgData) 1151 return c.annotatePeerErr(peer.Name, "Error updating replication configuration", err) 1152 } 1153 1154 c.RLock() 1155 defer c.RUnlock() 1156 errMap := make(map[string]error, len(c.state.Peers)) 1157 for d, peer := range c.state.Peers { 1158 if d == globalDeploymentID() { 1159 continue 1160 } 1161 errMap[d] = configurePeerFn(d, peer) 1162 } 1163 return c.toErrorFromErrMap(errMap, configureReplication) 1164 } 1165 1166 // PeerBucketDeleteHandler - deletes bucket on local in response to a delete 1167 // bucket request from a peer. 1168 func (c *SiteReplicationSys) PeerBucketDeleteHandler(ctx context.Context, bucket string, opts DeleteBucketOptions) error { 1169 c.RLock() 1170 defer c.RUnlock() 1171 if !c.enabled { 1172 return errSRNotEnabled 1173 } 1174 1175 objAPI := newObjectLayerFn() 1176 if objAPI == nil { 1177 return errServerNotInitialized 1178 } 1179 1180 if globalDNSConfig != nil { 1181 if err := globalDNSConfig.Delete(bucket); err != nil { 1182 return err 1183 } 1184 } 1185 err := objAPI.DeleteBucket(ctx, bucket, opts) 1186 if err != nil { 1187 if globalDNSConfig != nil { 1188 if err2 := globalDNSConfig.Put(bucket); err2 != nil { 1189 logger.LogIf(ctx, fmt.Errorf("Unable to restore bucket DNS entry %w, please fix it manually", err2)) 1190 } 1191 } 1192 return err 1193 } 1194 1195 globalNotificationSys.DeleteBucketMetadata(ctx, bucket) 1196 1197 return nil 1198 } 1199 1200 // IAMChangeHook - called when IAM items need to be replicated to peer clusters. 1201 // This includes named policy creation, policy mapping changes and service 1202 // account changes. 1203 // 1204 // All policies are replicated. 1205 // 1206 // Policy mappings are only replicated when they are for LDAP users or groups 1207 // (as an external IDP is always assumed when SR is used). In the case of 1208 // OpenID, such mappings are provided from the IDP directly and so are not 1209 // applicable here. 1210 // 1211 // Service accounts are replicated as long as they are not meant for the root 1212 // user. 1213 // 1214 // STS accounts are replicated, but only if the session token is verifiable 1215 // using the local cluster's root credential. 1216 func (c *SiteReplicationSys) IAMChangeHook(ctx context.Context, item madmin.SRIAMItem) error { 1217 // The IAM item has already been applied to the local cluster at this 1218 // point, and only needs to be updated on all remote peer clusters. 1219 1220 c.RLock() 1221 defer c.RUnlock() 1222 if !c.enabled { 1223 return nil 1224 } 1225 1226 cerr := c.concDo(nil, func(d string, p madmin.PeerInfo) error { 1227 admClient, err := c.getAdminClient(ctx, d) 1228 if err != nil { 1229 return wrapSRErr(err) 1230 } 1231 1232 return c.annotatePeerErr(p.Name, replicateIAMItem, admClient.SRPeerReplicateIAMItem(ctx, item)) 1233 }, 1234 replicateIAMItem, 1235 ) 1236 return errors.Unwrap(cerr) 1237 } 1238 1239 // PeerAddPolicyHandler - copies IAM policy to local. A nil policy argument, 1240 // causes the named policy to be deleted. 1241 func (c *SiteReplicationSys) PeerAddPolicyHandler(ctx context.Context, policyName string, p *policy.Policy, updatedAt time.Time) error { 1242 var err error 1243 // skip overwrite of local update if peer sent stale info 1244 if !updatedAt.IsZero() { 1245 if p, err := globalIAMSys.store.GetPolicyDoc(policyName); err == nil && p.UpdateDate.After(updatedAt) { 1246 return nil 1247 } 1248 } 1249 if p == nil { 1250 err = globalIAMSys.DeletePolicy(ctx, policyName, true) 1251 } else { 1252 _, err = globalIAMSys.SetPolicy(ctx, policyName, *p) 1253 } 1254 if err != nil { 1255 return wrapSRErr(err) 1256 } 1257 return nil 1258 } 1259 1260 // PeerIAMUserChangeHandler - copies IAM user to local. 1261 func (c *SiteReplicationSys) PeerIAMUserChangeHandler(ctx context.Context, change *madmin.SRIAMUser, updatedAt time.Time) error { 1262 if change == nil { 1263 return errSRInvalidRequest(errInvalidArgument) 1264 } 1265 // skip overwrite of local update if peer sent stale info 1266 if !updatedAt.IsZero() { 1267 if ui, err := globalIAMSys.GetUserInfo(ctx, change.AccessKey); err == nil && ui.UpdatedAt.After(updatedAt) { 1268 return nil 1269 } 1270 } 1271 1272 var err error 1273 if change.IsDeleteReq { 1274 err = globalIAMSys.DeleteUser(ctx, change.AccessKey, true) 1275 } else { 1276 if change.UserReq == nil { 1277 return errSRInvalidRequest(errInvalidArgument) 1278 } 1279 userReq := *change.UserReq 1280 if userReq.Status != "" && userReq.SecretKey == "" { 1281 // Status is set without secretKey updates means we are 1282 // only changing the account status. 1283 _, err = globalIAMSys.SetUserStatus(ctx, change.AccessKey, userReq.Status) 1284 } else { 1285 _, err = globalIAMSys.CreateUser(ctx, change.AccessKey, userReq) 1286 } 1287 } 1288 if err != nil { 1289 return wrapSRErr(err) 1290 } 1291 return nil 1292 } 1293 1294 // PeerGroupInfoChangeHandler - copies group changes to local. 1295 func (c *SiteReplicationSys) PeerGroupInfoChangeHandler(ctx context.Context, change *madmin.SRGroupInfo, updatedAt time.Time) error { 1296 if change == nil { 1297 return errSRInvalidRequest(errInvalidArgument) 1298 } 1299 updReq := change.UpdateReq 1300 var err error 1301 1302 // skip overwrite of local update if peer sent stale info 1303 if !updatedAt.IsZero() { 1304 if gd, err := globalIAMSys.GetGroupDescription(updReq.Group); err == nil && gd.UpdatedAt.After(updatedAt) { 1305 return nil 1306 } 1307 } 1308 1309 if updReq.IsRemove { 1310 _, err = globalIAMSys.RemoveUsersFromGroup(ctx, updReq.Group, updReq.Members) 1311 } else { 1312 if updReq.Status != "" && len(updReq.Members) == 0 { 1313 _, err = globalIAMSys.SetGroupStatus(ctx, updReq.Group, updReq.Status == madmin.GroupEnabled) 1314 } else { 1315 _, err = globalIAMSys.AddUsersToGroup(ctx, updReq.Group, updReq.Members) 1316 if err == nil && updReq.Status != madmin.GroupEnabled { 1317 _, err = globalIAMSys.SetGroupStatus(ctx, updReq.Group, updReq.Status == madmin.GroupEnabled) 1318 } 1319 } 1320 } 1321 if err != nil && !errors.Is(err, errNoSuchGroup) { 1322 return wrapSRErr(err) 1323 } 1324 return nil 1325 } 1326 1327 // PeerSvcAccChangeHandler - copies service-account change to local. 1328 func (c *SiteReplicationSys) PeerSvcAccChangeHandler(ctx context.Context, change *madmin.SRSvcAccChange, updatedAt time.Time) error { 1329 if change == nil { 1330 return errSRInvalidRequest(errInvalidArgument) 1331 } 1332 switch { 1333 case change.Create != nil: 1334 var sp *policy.Policy 1335 var err error 1336 if len(change.Create.SessionPolicy) > 0 { 1337 sp, err = policy.ParseConfig(bytes.NewReader(change.Create.SessionPolicy)) 1338 if err != nil { 1339 return wrapSRErr(err) 1340 } 1341 } 1342 // skip overwrite of local update if peer sent stale info 1343 if !updatedAt.IsZero() && change.Create.AccessKey != "" { 1344 if sa, _, err := globalIAMSys.getServiceAccount(ctx, change.Create.AccessKey); err == nil && sa.UpdatedAt.After(updatedAt) { 1345 return nil 1346 } 1347 } 1348 opts := newServiceAccountOpts{ 1349 accessKey: change.Create.AccessKey, 1350 secretKey: change.Create.SecretKey, 1351 sessionPolicy: sp, 1352 claims: change.Create.Claims, 1353 name: change.Create.Name, 1354 description: change.Create.Description, 1355 expiration: change.Create.Expiration, 1356 } 1357 _, _, err = globalIAMSys.NewServiceAccount(ctx, change.Create.Parent, change.Create.Groups, opts) 1358 if err != nil { 1359 return wrapSRErr(err) 1360 } 1361 1362 case change.Update != nil: 1363 var sp *policy.Policy 1364 var err error 1365 if len(change.Update.SessionPolicy) > 0 { 1366 sp, err = policy.ParseConfig(bytes.NewReader(change.Update.SessionPolicy)) 1367 if err != nil { 1368 return wrapSRErr(err) 1369 } 1370 } 1371 // skip overwrite of local update if peer sent stale info 1372 if !updatedAt.IsZero() { 1373 if sa, _, err := globalIAMSys.getServiceAccount(ctx, change.Update.AccessKey); err == nil && sa.UpdatedAt.After(updatedAt) { 1374 return nil 1375 } 1376 } 1377 opts := updateServiceAccountOpts{ 1378 secretKey: change.Update.SecretKey, 1379 status: change.Update.Status, 1380 name: change.Update.Name, 1381 description: change.Update.Description, 1382 sessionPolicy: sp, 1383 expiration: change.Update.Expiration, 1384 } 1385 1386 _, err = globalIAMSys.UpdateServiceAccount(ctx, change.Update.AccessKey, opts) 1387 if err != nil { 1388 return wrapSRErr(err) 1389 } 1390 1391 case change.Delete != nil: 1392 // skip overwrite of local update if peer sent stale info 1393 if !updatedAt.IsZero() { 1394 if sa, _, err := globalIAMSys.getServiceAccount(ctx, change.Delete.AccessKey); err == nil && sa.UpdatedAt.After(updatedAt) { 1395 return nil 1396 } 1397 } 1398 if err := globalIAMSys.DeleteServiceAccount(ctx, change.Delete.AccessKey, true); err != nil { 1399 return wrapSRErr(err) 1400 } 1401 1402 } 1403 1404 return nil 1405 } 1406 1407 // PeerPolicyMappingHandler - copies policy mapping to local. 1408 func (c *SiteReplicationSys) PeerPolicyMappingHandler(ctx context.Context, mapping *madmin.SRPolicyMapping, updatedAt time.Time) error { 1409 if mapping == nil { 1410 return errSRInvalidRequest(errInvalidArgument) 1411 } 1412 // skip overwrite of local update if peer sent stale info 1413 if !updatedAt.IsZero() { 1414 mp, ok := globalIAMSys.store.GetMappedPolicy(mapping.Policy, mapping.IsGroup) 1415 if ok && mp.UpdatedAt.After(updatedAt) { 1416 return nil 1417 } 1418 } 1419 1420 _, err := globalIAMSys.PolicyDBSet(ctx, mapping.UserOrGroup, mapping.Policy, IAMUserType(mapping.UserType), mapping.IsGroup) 1421 if err != nil { 1422 return wrapSRErr(err) 1423 } 1424 return nil 1425 } 1426 1427 // PeerSTSAccHandler - replicates STS credential locally. 1428 func (c *SiteReplicationSys) PeerSTSAccHandler(ctx context.Context, stsCred *madmin.SRSTSCredential, updatedAt time.Time) error { 1429 if stsCred == nil { 1430 return errSRInvalidRequest(errInvalidArgument) 1431 } 1432 // skip overwrite of local update if peer sent stale info 1433 if !updatedAt.IsZero() { 1434 if u, _, err := globalIAMSys.getTempAccount(ctx, stsCred.AccessKey); err == nil { 1435 if u.UpdatedAt.After(updatedAt) { 1436 return nil 1437 } 1438 } 1439 } 1440 secretKey, err := getTokenSigningKey() 1441 if err != nil { 1442 return errSRInvalidRequest(err) 1443 } 1444 1445 // Verify the session token of the stsCred 1446 claims, err := auth.ExtractClaims(stsCred.SessionToken, secretKey) 1447 if err != nil { 1448 return fmt.Errorf("STS credential could not be verified: %w", err) 1449 } 1450 1451 mapClaims := claims.Map() 1452 expiry, err := auth.ExpToInt64(mapClaims["exp"]) 1453 if err != nil { 1454 return fmt.Errorf("Expiry claim was not found: %v: %w", mapClaims, err) 1455 } 1456 1457 cred := auth.Credentials{ 1458 AccessKey: stsCred.AccessKey, 1459 SecretKey: stsCred.SecretKey, 1460 Expiration: time.Unix(expiry, 0).UTC(), 1461 SessionToken: stsCred.SessionToken, 1462 ParentUser: stsCred.ParentUser, 1463 Status: auth.AccountOn, 1464 } 1465 1466 // Extract the username and lookup DN and groups in LDAP. 1467 ldapUser, isLDAPSTS := claims.Lookup(ldapUserN) 1468 if isLDAPSTS { 1469 // Need to lookup the groups from LDAP. 1470 _, ldapGroups, err := globalIAMSys.LDAPConfig.LookupUserDN(ldapUser) 1471 if err != nil { 1472 return fmt.Errorf("unable to query LDAP server for %s: %w", ldapUser, err) 1473 } 1474 1475 cred.Groups = ldapGroups 1476 } 1477 1478 // Set these credentials to IAM. 1479 if _, err := globalIAMSys.SetTempUser(ctx, cred.AccessKey, cred, stsCred.ParentPolicyMapping); err != nil { 1480 return fmt.Errorf("unable to save STS credential and/or parent policy mapping: %w", err) 1481 } 1482 1483 return nil 1484 } 1485 1486 // BucketMetaHook - called when bucket meta changes happen and need to be 1487 // replicated to peer clusters. 1488 func (c *SiteReplicationSys) BucketMetaHook(ctx context.Context, item madmin.SRBucketMeta) error { 1489 // The change has already been applied to the local cluster at this 1490 // point, and only needs to be updated on all remote peer clusters. 1491 1492 c.RLock() 1493 defer c.RUnlock() 1494 if !c.enabled { 1495 return nil 1496 } 1497 1498 cerr := c.concDo(nil, func(d string, p madmin.PeerInfo) error { 1499 admClient, err := c.getAdminClient(ctx, d) 1500 if err != nil { 1501 return wrapSRErr(err) 1502 } 1503 1504 return c.annotatePeerErr(p.Name, replicateBucketMetadata, admClient.SRPeerReplicateBucketMeta(ctx, item)) 1505 }, 1506 replicateBucketMetadata, 1507 ) 1508 return errors.Unwrap(cerr) 1509 } 1510 1511 // PeerBucketVersioningHandler - updates versioning config to local cluster. 1512 func (c *SiteReplicationSys) PeerBucketVersioningHandler(ctx context.Context, bucket string, versioning *string, updatedAt time.Time) error { 1513 if versioning != nil { 1514 // skip overwrite if local update is newer than peer update. 1515 if !updatedAt.IsZero() { 1516 if _, updateTm, err := globalBucketMetadataSys.GetVersioningConfig(bucket); err == nil && updateTm.After(updatedAt) { 1517 return nil 1518 } 1519 } 1520 configData, err := base64.StdEncoding.DecodeString(*versioning) 1521 if err != nil { 1522 return wrapSRErr(err) 1523 } 1524 _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketVersioningConfig, configData) 1525 if err != nil { 1526 return wrapSRErr(err) 1527 } 1528 return nil 1529 } 1530 1531 return nil 1532 } 1533 1534 // PeerBucketMetadataUpdateHandler - merges the bucket metadata, save and ping other nodes 1535 func (c *SiteReplicationSys) PeerBucketMetadataUpdateHandler(ctx context.Context, item madmin.SRBucketMeta) error { 1536 objectAPI := newObjectLayerFn() 1537 if objectAPI == nil { 1538 return errSRObjectLayerNotReady 1539 } 1540 1541 if item.Bucket == "" || item.UpdatedAt.IsZero() { 1542 return wrapSRErr(errInvalidArgument) 1543 } 1544 1545 meta, err := readBucketMetadata(ctx, objectAPI, item.Bucket) 1546 if err != nil { 1547 return wrapSRErr(err) 1548 } 1549 1550 if meta.Created.After(item.UpdatedAt) { 1551 return nil 1552 } 1553 1554 if item.Policy != nil { 1555 meta.PolicyConfigJSON = item.Policy 1556 meta.PolicyConfigUpdatedAt = item.UpdatedAt 1557 } 1558 1559 if item.Versioning != nil { 1560 configData, err := base64.StdEncoding.DecodeString(*item.Versioning) 1561 if err != nil { 1562 return wrapSRErr(err) 1563 } 1564 meta.VersioningConfigXML = configData 1565 meta.VersioningConfigUpdatedAt = item.UpdatedAt 1566 } 1567 1568 if item.Tags != nil { 1569 configData, err := base64.StdEncoding.DecodeString(*item.Tags) 1570 if err != nil { 1571 return wrapSRErr(err) 1572 } 1573 meta.TaggingConfigXML = configData 1574 meta.TaggingConfigUpdatedAt = item.UpdatedAt 1575 } 1576 1577 if item.ObjectLockConfig != nil { 1578 configData, err := base64.StdEncoding.DecodeString(*item.ObjectLockConfig) 1579 if err != nil { 1580 return wrapSRErr(err) 1581 } 1582 meta.ObjectLockConfigXML = configData 1583 meta.ObjectLockConfigUpdatedAt = item.UpdatedAt 1584 } 1585 1586 if item.SSEConfig != nil { 1587 configData, err := base64.StdEncoding.DecodeString(*item.SSEConfig) 1588 if err != nil { 1589 return wrapSRErr(err) 1590 } 1591 meta.EncryptionConfigXML = configData 1592 meta.EncryptionConfigUpdatedAt = item.UpdatedAt 1593 } 1594 1595 if item.Quota != nil { 1596 meta.QuotaConfigJSON = item.Quota 1597 meta.QuotaConfigUpdatedAt = item.UpdatedAt 1598 } 1599 1600 return globalBucketMetadataSys.save(ctx, meta) 1601 } 1602 1603 // PeerBucketPolicyHandler - copies/deletes policy to local cluster. 1604 func (c *SiteReplicationSys) PeerBucketPolicyHandler(ctx context.Context, bucket string, policy *policy.BucketPolicy, updatedAt time.Time) error { 1605 // skip overwrite if local update is newer than peer update. 1606 if !updatedAt.IsZero() { 1607 if _, updateTm, err := globalBucketMetadataSys.GetPolicyConfig(bucket); err == nil && updateTm.After(updatedAt) { 1608 return nil 1609 } 1610 } 1611 1612 if policy != nil { 1613 configData, err := json.Marshal(policy) 1614 if err != nil { 1615 return wrapSRErr(err) 1616 } 1617 1618 _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketPolicyConfig, configData) 1619 if err != nil { 1620 return wrapSRErr(err) 1621 } 1622 return nil 1623 } 1624 1625 // Delete the bucket policy 1626 _, err := globalBucketMetadataSys.Delete(ctx, bucket, bucketPolicyConfig) 1627 if err != nil { 1628 return wrapSRErr(err) 1629 } 1630 1631 return nil 1632 } 1633 1634 // PeerBucketTaggingHandler - copies/deletes tags to local cluster. 1635 func (c *SiteReplicationSys) PeerBucketTaggingHandler(ctx context.Context, bucket string, tags *string, updatedAt time.Time) error { 1636 // skip overwrite if local update is newer than peer update. 1637 if !updatedAt.IsZero() { 1638 if _, updateTm, err := globalBucketMetadataSys.GetTaggingConfig(bucket); err == nil && updateTm.After(updatedAt) { 1639 return nil 1640 } 1641 } 1642 1643 if tags != nil { 1644 configData, err := base64.StdEncoding.DecodeString(*tags) 1645 if err != nil { 1646 return wrapSRErr(err) 1647 } 1648 _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTaggingConfig, configData) 1649 if err != nil { 1650 return wrapSRErr(err) 1651 } 1652 return nil 1653 } 1654 1655 // Delete the tags 1656 _, err := globalBucketMetadataSys.Delete(ctx, bucket, bucketTaggingConfig) 1657 if err != nil { 1658 return wrapSRErr(err) 1659 } 1660 1661 return nil 1662 } 1663 1664 // PeerBucketObjectLockConfigHandler - sets object lock on local bucket. 1665 func (c *SiteReplicationSys) PeerBucketObjectLockConfigHandler(ctx context.Context, bucket string, objectLockData *string, updatedAt time.Time) error { 1666 if objectLockData != nil { 1667 // skip overwrite if local update is newer than peer update. 1668 if !updatedAt.IsZero() { 1669 if _, updateTm, err := globalBucketMetadataSys.GetObjectLockConfig(bucket); err == nil && updateTm.After(updatedAt) { 1670 return nil 1671 } 1672 } 1673 1674 configData, err := base64.StdEncoding.DecodeString(*objectLockData) 1675 if err != nil { 1676 return wrapSRErr(err) 1677 } 1678 _, err = globalBucketMetadataSys.Update(ctx, bucket, objectLockConfig, configData) 1679 if err != nil { 1680 return wrapSRErr(err) 1681 } 1682 return nil 1683 } 1684 1685 return nil 1686 } 1687 1688 // PeerBucketSSEConfigHandler - copies/deletes SSE config to local cluster. 1689 func (c *SiteReplicationSys) PeerBucketSSEConfigHandler(ctx context.Context, bucket string, sseConfig *string, updatedAt time.Time) error { 1690 // skip overwrite if local update is newer than peer update. 1691 if !updatedAt.IsZero() { 1692 if _, updateTm, err := globalBucketMetadataSys.GetSSEConfig(bucket); err == nil && updateTm.After(updatedAt) { 1693 return nil 1694 } 1695 } 1696 1697 if sseConfig != nil { 1698 configData, err := base64.StdEncoding.DecodeString(*sseConfig) 1699 if err != nil { 1700 return wrapSRErr(err) 1701 } 1702 _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketSSEConfig, configData) 1703 if err != nil { 1704 return wrapSRErr(err) 1705 } 1706 return nil 1707 } 1708 1709 // Delete sse config 1710 _, err := globalBucketMetadataSys.Delete(ctx, bucket, bucketSSEConfig) 1711 if err != nil { 1712 return wrapSRErr(err) 1713 } 1714 return nil 1715 } 1716 1717 // PeerBucketQuotaConfigHandler - copies/deletes policy to local cluster. 1718 func (c *SiteReplicationSys) PeerBucketQuotaConfigHandler(ctx context.Context, bucket string, quota *madmin.BucketQuota, updatedAt time.Time) error { 1719 // skip overwrite if local update is newer than peer update. 1720 if !updatedAt.IsZero() { 1721 if _, updateTm, err := globalBucketMetadataSys.GetQuotaConfig(ctx, bucket); err == nil && updateTm.After(updatedAt) { 1722 return nil 1723 } 1724 } 1725 1726 if quota != nil { 1727 quotaData, err := json.Marshal(quota) 1728 if err != nil { 1729 return wrapSRErr(err) 1730 } 1731 1732 if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketQuotaConfigFile, quotaData); err != nil { 1733 return wrapSRErr(err) 1734 } 1735 1736 return nil 1737 } 1738 1739 // Delete the bucket policy 1740 _, err := globalBucketMetadataSys.Delete(ctx, bucket, bucketQuotaConfigFile) 1741 if err != nil { 1742 return wrapSRErr(err) 1743 } 1744 1745 return nil 1746 } 1747 1748 // PeerBucketLCConfigHandler - copies/deletes lifecycle config to local cluster 1749 func (c *SiteReplicationSys) PeerBucketLCConfigHandler(ctx context.Context, bucket string, expLCConfig *string, updatedAt time.Time) error { 1750 // skip overwrite if local update is newer than peer update. 1751 if !updatedAt.IsZero() { 1752 if cfg, _, err := globalBucketMetadataSys.GetLifecycleConfig(bucket); err == nil && (cfg.ExpiryUpdatedAt != nil && cfg.ExpiryUpdatedAt.After(updatedAt)) { 1753 return nil 1754 } 1755 } 1756 1757 if expLCConfig != nil { 1758 configData, err := mergeWithCurrentLCConfig(ctx, bucket, expLCConfig, updatedAt) 1759 if err != nil { 1760 return wrapSRErr(err) 1761 } 1762 _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketLifecycleConfig, configData) 1763 if err != nil { 1764 return wrapSRErr(err) 1765 } 1766 return nil 1767 } 1768 1769 // Delete ILM config 1770 _, err := globalBucketMetadataSys.Delete(ctx, bucket, bucketLifecycleConfig) 1771 if err != nil { 1772 return wrapSRErr(err) 1773 } 1774 return nil 1775 } 1776 1777 // getAdminClient - NOTE: ensure to take at least a read lock on SiteReplicationSys 1778 // before calling this. 1779 func (c *SiteReplicationSys) getAdminClient(ctx context.Context, deploymentID string) (*madmin.AdminClient, error) { 1780 creds, err := c.getPeerCreds() 1781 if err != nil { 1782 return nil, err 1783 } 1784 1785 peer, ok := c.state.Peers[deploymentID] 1786 if !ok { 1787 return nil, errSRPeerNotFound 1788 } 1789 1790 return getAdminClient(peer.Endpoint, creds.AccessKey, creds.SecretKey) 1791 } 1792 1793 // getAdminClientWithEndpoint - NOTE: ensure to take at least a read lock on SiteReplicationSys 1794 // before calling this. 1795 func (c *SiteReplicationSys) getAdminClientWithEndpoint(ctx context.Context, deploymentID, endpoint string) (*madmin.AdminClient, error) { 1796 creds, err := c.getPeerCreds() 1797 if err != nil { 1798 return nil, err 1799 } 1800 1801 if _, ok := c.state.Peers[deploymentID]; !ok { 1802 return nil, errSRPeerNotFound 1803 } 1804 return getAdminClient(endpoint, creds.AccessKey, creds.SecretKey) 1805 } 1806 1807 func (c *SiteReplicationSys) getPeerCreds() (*auth.Credentials, error) { 1808 u, ok := globalIAMSys.store.GetUser(c.state.ServiceAccountAccessKey) 1809 if !ok { 1810 return nil, errors.New("site replication service account not found") 1811 } 1812 return &u.Credentials, nil 1813 } 1814 1815 // listBuckets returns a consistent common view of latest unique buckets across 1816 // sites, this is used for replication. 1817 func (c *SiteReplicationSys) listBuckets(ctx context.Context) ([]BucketInfo, error) { 1818 // If local has buckets, enable versioning on them, create them on peers 1819 // and setup replication rules. 1820 objAPI := newObjectLayerFn() 1821 if objAPI == nil { 1822 return nil, errSRObjectLayerNotReady 1823 } 1824 return objAPI.ListBuckets(ctx, BucketOptions{Deleted: true}) 1825 } 1826 1827 // syncToAllPeers is used for syncing local data to all remote peers, it is 1828 // called once during initial "AddPeerClusters" request. 1829 func (c *SiteReplicationSys) syncToAllPeers(ctx context.Context, addOpts madmin.SRAddOptions) error { 1830 objAPI := newObjectLayerFn() 1831 if objAPI == nil { 1832 return errSRObjectLayerNotReady 1833 } 1834 1835 buckets, err := objAPI.ListBuckets(ctx, BucketOptions{}) 1836 if err != nil { 1837 return err 1838 } 1839 1840 for _, bucketInfo := range buckets { 1841 bucket := bucketInfo.Name 1842 1843 meta, err := globalBucketMetadataSys.GetConfigFromDisk(ctx, bucket) 1844 if err != nil && !errors.Is(err, errConfigNotFound) { 1845 return errSRBackendIssue(err) 1846 } 1847 1848 opts := MakeBucketOptions{ 1849 LockEnabled: meta.ObjectLocking(), 1850 CreatedAt: bucketInfo.Created.UTC(), 1851 } 1852 1853 // Now call the MakeBucketHook on existing bucket - this will 1854 // create buckets and replication rules on peer clusters. 1855 if err = c.MakeBucketHook(ctx, bucket, opts); err != nil { 1856 return errSRBucketConfigError(err) 1857 } 1858 1859 // Replicate bucket policy if present. 1860 policyJSON, tm := meta.PolicyConfigJSON, meta.PolicyConfigUpdatedAt 1861 if len(policyJSON) > 0 { 1862 err = c.BucketMetaHook(ctx, madmin.SRBucketMeta{ 1863 Type: madmin.SRBucketMetaTypePolicy, 1864 Bucket: bucket, 1865 Policy: policyJSON, 1866 UpdatedAt: tm, 1867 }) 1868 if err != nil { 1869 return errSRBucketMetaError(err) 1870 } 1871 } 1872 1873 // Replicate bucket tags if present. 1874 tagCfg, tm := meta.TaggingConfigXML, meta.TaggingConfigUpdatedAt 1875 if len(tagCfg) > 0 { 1876 tagCfgStr := base64.StdEncoding.EncodeToString(tagCfg) 1877 err = c.BucketMetaHook(ctx, madmin.SRBucketMeta{ 1878 Type: madmin.SRBucketMetaTypeTags, 1879 Bucket: bucket, 1880 Tags: &tagCfgStr, 1881 UpdatedAt: tm, 1882 }) 1883 if err != nil { 1884 return errSRBucketMetaError(err) 1885 } 1886 } 1887 1888 // Replicate object-lock config if present. 1889 objLockCfgData, tm := meta.ObjectLockConfigXML, meta.ObjectLockConfigUpdatedAt 1890 if len(objLockCfgData) > 0 { 1891 objLockStr := base64.StdEncoding.EncodeToString(objLockCfgData) 1892 err = c.BucketMetaHook(ctx, madmin.SRBucketMeta{ 1893 Type: madmin.SRBucketMetaTypeObjectLockConfig, 1894 Bucket: bucket, 1895 Tags: &objLockStr, 1896 UpdatedAt: tm, 1897 }) 1898 if err != nil { 1899 return errSRBucketMetaError(err) 1900 } 1901 } 1902 1903 // Replicate existing bucket bucket encryption settings 1904 sseConfigData, tm := meta.EncryptionConfigXML, meta.EncryptionConfigUpdatedAt 1905 if len(sseConfigData) > 0 { 1906 sseConfigStr := base64.StdEncoding.EncodeToString(sseConfigData) 1907 err = c.BucketMetaHook(ctx, madmin.SRBucketMeta{ 1908 Type: madmin.SRBucketMetaTypeSSEConfig, 1909 Bucket: bucket, 1910 SSEConfig: &sseConfigStr, 1911 UpdatedAt: tm, 1912 }) 1913 if err != nil { 1914 return errSRBucketMetaError(err) 1915 } 1916 } 1917 1918 // Replicate existing bucket quotas settings 1919 quotaConfigJSON, tm := meta.QuotaConfigJSON, meta.QuotaConfigUpdatedAt 1920 if len(quotaConfigJSON) > 0 { 1921 err = c.BucketMetaHook(ctx, madmin.SRBucketMeta{ 1922 Type: madmin.SRBucketMetaTypeQuotaConfig, 1923 Bucket: bucket, 1924 Quota: quotaConfigJSON, 1925 UpdatedAt: tm, 1926 }) 1927 if err != nil { 1928 return errSRBucketMetaError(err) 1929 } 1930 } 1931 1932 // Replicate ILM expiry rules if needed 1933 if addOpts.ReplicateILMExpiry && (meta.lifecycleConfig != nil && meta.lifecycleConfig.HasExpiry()) { 1934 var expLclCfg lifecycle.Lifecycle 1935 expLclCfg.XMLName = meta.lifecycleConfig.XMLName 1936 for _, rule := range meta.lifecycleConfig.Rules { 1937 if !rule.Expiration.IsNull() || !rule.NoncurrentVersionExpiration.IsNull() { 1938 // copy the non transition details of the rule 1939 expLclCfg.Rules = append(expLclCfg.Rules, rule.CloneNonTransition()) 1940 } 1941 } 1942 currtime := time.Now() 1943 expLclCfg.ExpiryUpdatedAt = &currtime 1944 ilmConfigData, err := xml.Marshal(expLclCfg) 1945 if err != nil { 1946 return errSRBucketMetaError(err) 1947 } 1948 if len(ilmConfigData) > 0 { 1949 configStr := base64.StdEncoding.EncodeToString(ilmConfigData) 1950 err = c.BucketMetaHook(ctx, madmin.SRBucketMeta{ 1951 Type: madmin.SRBucketMetaLCConfig, 1952 Bucket: bucket, 1953 ExpiryLCConfig: &configStr, 1954 UpdatedAt: time.Now(), 1955 }) 1956 if err != nil { 1957 return errSRBucketMetaError(err) 1958 } 1959 } 1960 } 1961 } 1962 1963 // Order matters from now on how the information is 1964 // synced to remote sites. 1965 1966 // Policies should be synced first. 1967 { 1968 // Replicate IAM policies on local to all peers. 1969 allPolicyDocs, err := globalIAMSys.ListPolicyDocs(ctx, "") 1970 if err != nil { 1971 return errSRBackendIssue(err) 1972 } 1973 1974 for pname, pdoc := range allPolicyDocs { 1975 policyJSON, err := json.Marshal(pdoc.Policy) 1976 if err != nil { 1977 return wrapSRErr(err) 1978 } 1979 err = c.IAMChangeHook(ctx, madmin.SRIAMItem{ 1980 Type: madmin.SRIAMItemPolicy, 1981 Name: pname, 1982 Policy: policyJSON, 1983 UpdatedAt: pdoc.UpdateDate, 1984 }) 1985 if err != nil { 1986 return errSRIAMError(err) 1987 } 1988 } 1989 } 1990 1991 // Next should be userAccounts those are local users, OIDC and LDAP will not 1992 // may not have any local users. 1993 { 1994 userAccounts := make(map[string]UserIdentity) 1995 err := globalIAMSys.store.loadUsers(ctx, regUser, userAccounts) 1996 if err != nil { 1997 return errSRBackendIssue(err) 1998 } 1999 2000 for _, acc := range userAccounts { 2001 if err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 2002 Type: madmin.SRIAMItemIAMUser, 2003 IAMUser: &madmin.SRIAMUser{ 2004 AccessKey: acc.Credentials.AccessKey, 2005 IsDeleteReq: false, 2006 UserReq: &madmin.AddOrUpdateUserReq{ 2007 SecretKey: acc.Credentials.SecretKey, 2008 Status: madmin.AccountStatus(acc.Credentials.Status), 2009 }, 2010 }, 2011 UpdatedAt: acc.UpdatedAt, 2012 }); err != nil { 2013 return errSRIAMError(err) 2014 } 2015 } 2016 } 2017 2018 // Next should be Groups for some of these users, LDAP might have some Group 2019 // DNs here 2020 { 2021 groups := make(map[string]GroupInfo) 2022 err := globalIAMSys.store.loadGroups(ctx, groups) 2023 if err != nil { 2024 return errSRBackendIssue(err) 2025 } 2026 2027 for gname, group := range groups { 2028 if err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 2029 Type: madmin.SRIAMItemGroupInfo, 2030 GroupInfo: &madmin.SRGroupInfo{ 2031 UpdateReq: madmin.GroupAddRemove{ 2032 Group: gname, 2033 Members: group.Members, 2034 Status: madmin.GroupStatus(group.Status), 2035 IsRemove: false, 2036 }, 2037 }, 2038 UpdatedAt: group.UpdatedAt, 2039 }); err != nil { 2040 return errSRIAMError(err) 2041 } 2042 } 2043 } 2044 2045 // Followed by group policy mapping 2046 { 2047 // Replicate policy mappings on local to all peers. 2048 groupPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 2049 errG := globalIAMSys.store.loadMappedPolicies(ctx, unknownIAMUserType, true, groupPolicyMap) 2050 if errG != nil { 2051 return errSRBackendIssue(errG) 2052 } 2053 2054 var err error 2055 groupPolicyMap.Range(func(k string, mp MappedPolicy) bool { 2056 err = c.IAMChangeHook(ctx, madmin.SRIAMItem{ 2057 Type: madmin.SRIAMItemPolicyMapping, 2058 PolicyMapping: &madmin.SRPolicyMapping{ 2059 UserOrGroup: k, 2060 UserType: int(unknownIAMUserType), 2061 IsGroup: true, 2062 Policy: mp.Policies, 2063 }, 2064 UpdatedAt: mp.UpdatedAt, 2065 }) 2066 return err == nil 2067 }) 2068 if err != nil { 2069 return errSRIAMError(err) 2070 } 2071 } 2072 2073 // Service accounts are the static accounts that should be synced with 2074 // valid claims. 2075 { 2076 serviceAccounts := make(map[string]UserIdentity) 2077 err := globalIAMSys.store.loadUsers(ctx, svcUser, serviceAccounts) 2078 if err != nil { 2079 return errSRBackendIssue(err) 2080 } 2081 2082 for user, acc := range serviceAccounts { 2083 if user == siteReplicatorSvcAcc { 2084 // skip the site replicate svc account as it is 2085 // already replicated. 2086 continue 2087 } 2088 2089 claims, err := globalIAMSys.GetClaimsForSvcAcc(ctx, acc.Credentials.AccessKey) 2090 if err != nil { 2091 return errSRBackendIssue(err) 2092 } 2093 2094 _, policy, err := globalIAMSys.GetServiceAccount(ctx, acc.Credentials.AccessKey) 2095 if err != nil { 2096 return errSRBackendIssue(err) 2097 } 2098 2099 var policyJSON []byte 2100 if policy != nil { 2101 policyJSON, err = json.Marshal(policy) 2102 if err != nil { 2103 return wrapSRErr(err) 2104 } 2105 } 2106 2107 err = c.IAMChangeHook(ctx, madmin.SRIAMItem{ 2108 Type: madmin.SRIAMItemSvcAcc, 2109 SvcAccChange: &madmin.SRSvcAccChange{ 2110 Create: &madmin.SRSvcAccCreate{ 2111 Parent: acc.Credentials.ParentUser, 2112 AccessKey: user, 2113 SecretKey: acc.Credentials.SecretKey, 2114 Groups: acc.Credentials.Groups, 2115 Claims: claims, 2116 SessionPolicy: json.RawMessage(policyJSON), 2117 Status: acc.Credentials.Status, 2118 Name: acc.Credentials.Name, 2119 Description: acc.Credentials.Description, 2120 Expiration: &acc.Credentials.Expiration, 2121 }, 2122 }, 2123 UpdatedAt: acc.UpdatedAt, 2124 }) 2125 if err != nil { 2126 return errSRIAMError(err) 2127 } 2128 } 2129 } 2130 2131 // Followed by policy mapping for the userAccounts we previously synced. 2132 { 2133 // Replicate policy mappings on local to all peers. 2134 userPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 2135 errU := globalIAMSys.store.loadMappedPolicies(ctx, regUser, false, userPolicyMap) 2136 if errU != nil { 2137 return errSRBackendIssue(errU) 2138 } 2139 var err error 2140 userPolicyMap.Range(func(user string, mp MappedPolicy) bool { 2141 err = c.IAMChangeHook(ctx, madmin.SRIAMItem{ 2142 Type: madmin.SRIAMItemPolicyMapping, 2143 PolicyMapping: &madmin.SRPolicyMapping{ 2144 UserOrGroup: user, 2145 UserType: int(regUser), 2146 IsGroup: false, 2147 Policy: mp.Policies, 2148 }, 2149 UpdatedAt: mp.UpdatedAt, 2150 }) 2151 return err == nil 2152 }) 2153 if err != nil { 2154 return errSRIAMError(err) 2155 } 2156 } 2157 2158 // and finally followed by policy mappings for for STS users. 2159 { 2160 // Replicate policy mappings on local to all peers. 2161 stsPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 2162 errU := globalIAMSys.store.loadMappedPolicies(ctx, stsUser, false, stsPolicyMap) 2163 if errU != nil { 2164 return errSRBackendIssue(errU) 2165 } 2166 2167 var err error 2168 stsPolicyMap.Range(func(user string, mp MappedPolicy) bool { 2169 err = c.IAMChangeHook(ctx, madmin.SRIAMItem{ 2170 Type: madmin.SRIAMItemPolicyMapping, 2171 PolicyMapping: &madmin.SRPolicyMapping{ 2172 UserOrGroup: user, 2173 UserType: int(stsUser), 2174 IsGroup: false, 2175 Policy: mp.Policies, 2176 }, 2177 UpdatedAt: mp.UpdatedAt, 2178 }) 2179 return err == nil 2180 }) 2181 if err != nil { 2182 return errSRIAMError(err) 2183 } 2184 } 2185 2186 return nil 2187 } 2188 2189 // Concurrency helpers 2190 2191 type concErr struct { 2192 errMap map[string]error 2193 summaryErr error 2194 } 2195 2196 func (c concErr) Error() string { 2197 if c.summaryErr != nil { 2198 return c.summaryErr.Error() 2199 } 2200 return "<nil>" 2201 } 2202 2203 func (c concErr) Unwrap() error { 2204 return c.summaryErr 2205 } 2206 2207 func (c *SiteReplicationSys) toErrorFromErrMap(errMap map[string]error, actionName string) error { 2208 if len(errMap) == 0 { 2209 return nil 2210 } 2211 2212 var success int 2213 msgs := []string{} 2214 for d, err := range errMap { 2215 name := c.state.Peers[d].Name 2216 if err == nil { 2217 msgs = append(msgs, fmt.Sprintf("'%s' on site %s (%s): succeeded", actionName, name, d)) 2218 success++ 2219 } else { 2220 msgs = append(msgs, fmt.Sprintf("'%s' on site %s (%s): failed(%v)", actionName, name, d, err)) 2221 } 2222 } 2223 if success == len(errMap) { 2224 return nil 2225 } 2226 return fmt.Errorf("Site replication error(s): \n%s", strings.Join(msgs, "\n")) 2227 } 2228 2229 func (c *SiteReplicationSys) newConcErr(errMap map[string]error, actionName string) error { 2230 return concErr{ 2231 errMap: errMap, 2232 summaryErr: c.toErrorFromErrMap(errMap, actionName), 2233 } 2234 } 2235 2236 // concDo calls actions concurrently. selfActionFn is run for the current 2237 // cluster and peerActionFn is run for each peer replication cluster. 2238 func (c *SiteReplicationSys) concDo(selfActionFn func() error, peerActionFn func(deploymentID string, p madmin.PeerInfo) error, actionName string) error { 2239 depIDs := make([]string, 0, len(c.state.Peers)) 2240 for d := range c.state.Peers { 2241 depIDs = append(depIDs, d) 2242 } 2243 errs := make([]error, len(c.state.Peers)) 2244 var wg sync.WaitGroup 2245 wg.Add(len(depIDs)) 2246 for i := range depIDs { 2247 go func(i int) { 2248 defer wg.Done() 2249 if depIDs[i] == globalDeploymentID() { 2250 if selfActionFn != nil { 2251 errs[i] = selfActionFn() 2252 } 2253 } else { 2254 errs[i] = peerActionFn(depIDs[i], c.state.Peers[depIDs[i]]) 2255 } 2256 }(i) 2257 } 2258 wg.Wait() 2259 errMap := make(map[string]error, len(c.state.Peers)) 2260 for i, depID := range depIDs { 2261 errMap[depID] = errs[i] 2262 if errs[i] != nil && minio.IsNetworkOrHostDown(errs[i], true) { 2263 ep := c.state.Peers[depID].Endpoint 2264 epURL, _ := url.Parse(ep) 2265 if !globalBucketTargetSys.isOffline(epURL) { 2266 globalBucketTargetSys.markOffline(epURL) 2267 } 2268 } 2269 } 2270 return c.newConcErr(errMap, actionName) 2271 } 2272 2273 func (c *SiteReplicationSys) annotateErr(annotation string, err error) error { 2274 if err == nil { 2275 return nil 2276 } 2277 return fmt.Errorf("%s: %s: %w", c.state.Name, annotation, err) 2278 } 2279 2280 func (c *SiteReplicationSys) annotatePeerErr(dstPeer string, annotation string, err error) error { 2281 if err == nil { 2282 return nil 2283 } 2284 return fmt.Errorf("%s->%s: %s: %w", c.state.Name, dstPeer, annotation, err) 2285 } 2286 2287 // isEnabled returns true if site replication is enabled 2288 func (c *SiteReplicationSys) isEnabled() bool { 2289 c.RLock() 2290 defer c.RUnlock() 2291 return c.enabled 2292 } 2293 2294 var errMissingSRConfig = fmt.Errorf("unable to find site replication configuration") 2295 2296 // RemovePeerCluster - removes one or more clusters from site replication configuration. 2297 func (c *SiteReplicationSys) RemovePeerCluster(ctx context.Context, objectAPI ObjectLayer, rreq madmin.SRRemoveReq) (st madmin.ReplicateRemoveStatus, err error) { 2298 if !c.isEnabled() { 2299 return st, errSRNotEnabled 2300 } 2301 info, err := c.GetClusterInfo(ctx) 2302 if err != nil { 2303 return st, errSRBackendIssue(err) 2304 } 2305 peerMap := make(map[string]madmin.PeerInfo) 2306 var rmvEndpoints []string 2307 siteNames := rreq.SiteNames 2308 updatedPeers := make(map[string]madmin.PeerInfo) 2309 2310 for _, pi := range info.Sites { 2311 updatedPeers[pi.DeploymentID] = pi 2312 peerMap[pi.Name] = pi 2313 if rreq.RemoveAll { 2314 siteNames = append(siteNames, pi.Name) 2315 } 2316 } 2317 for _, s := range siteNames { 2318 pinfo, ok := peerMap[s] 2319 if !ok { 2320 return st, errSRConfigMissingError(errMissingSRConfig) 2321 } 2322 rmvEndpoints = append(rmvEndpoints, pinfo.Endpoint) 2323 delete(updatedPeers, pinfo.DeploymentID) 2324 } 2325 var wg sync.WaitGroup 2326 errs := make(map[string]error, len(c.state.Peers)) 2327 2328 for _, v := range info.Sites { 2329 wg.Add(1) 2330 if v.DeploymentID == globalDeploymentID() { 2331 go func() { 2332 defer wg.Done() 2333 err := c.RemoveRemoteTargetsForEndpoint(ctx, objectAPI, rmvEndpoints, false) 2334 errs[globalDeploymentID()] = err 2335 }() 2336 continue 2337 } 2338 go func(pi madmin.PeerInfo) { 2339 defer wg.Done() 2340 admClient, err := c.getAdminClient(ctx, pi.DeploymentID) 2341 if err != nil { 2342 errs[pi.DeploymentID] = errSRPeerResp(fmt.Errorf("unable to create admin client for %s: %w", pi.Name, err)) 2343 return 2344 } 2345 // set the requesting site's deploymentID for verification of peer request 2346 rreq.RequestingDepID = globalDeploymentID() 2347 if _, err = admClient.SRPeerRemove(ctx, rreq); err != nil { 2348 if errors.Is(err, errMissingSRConfig) { 2349 // ignore if peer is already removed. 2350 return 2351 } 2352 errs[pi.DeploymentID] = errSRPeerResp(fmt.Errorf("unable to update peer %s: %w", pi.Name, err)) 2353 return 2354 } 2355 }(v) 2356 } 2357 wg.Wait() 2358 2359 errdID := "" 2360 selfTgtsDeleted := errs[globalDeploymentID()] == nil // true if all remote targets and replication config cleared successfully on local cluster 2361 2362 for dID, err := range errs { 2363 if err != nil { 2364 if !rreq.RemoveAll && !selfTgtsDeleted { 2365 return madmin.ReplicateRemoveStatus{ 2366 ErrDetail: err.Error(), 2367 Status: madmin.ReplicateRemoveStatusPartial, 2368 }, errSRPeerResp(fmt.Errorf("unable to update peer %s: %w", c.state.Peers[dID].Name, err)) 2369 } 2370 errdID = dID 2371 } 2372 } 2373 2374 // force local config to be cleared even if peers failed since the remote targets are deleted 2375 // by now from the replication config and user intended to forcibly clear all site replication 2376 if rreq.RemoveAll { 2377 if err = c.removeFromDisk(ctx); err != nil { 2378 return madmin.ReplicateRemoveStatus{ 2379 Status: madmin.ReplicateRemoveStatusPartial, 2380 ErrDetail: fmt.Sprintf("unable to remove cluster-replication state on local: %v", err), 2381 }, nil 2382 } 2383 if errdID != "" { 2384 err := errs[errdID] 2385 return madmin.ReplicateRemoveStatus{ 2386 Status: madmin.ReplicateRemoveStatusPartial, 2387 ErrDetail: err.Error(), 2388 }, nil 2389 } 2390 return madmin.ReplicateRemoveStatus{ 2391 Status: madmin.ReplicateRemoveStatusSuccess, 2392 }, nil 2393 } 2394 2395 // Update cluster state 2396 var state srState 2397 if len(updatedPeers) > 1 { 2398 state = srState{ 2399 Name: info.Name, 2400 Peers: updatedPeers, 2401 ServiceAccountAccessKey: info.ServiceAccountAccessKey, 2402 } 2403 } 2404 if err = c.saveToDisk(ctx, state); err != nil { 2405 return madmin.ReplicateRemoveStatus{ 2406 Status: madmin.ReplicateRemoveStatusPartial, 2407 ErrDetail: fmt.Sprintf("unable to save cluster-replication state on local: %v", err), 2408 }, err 2409 } 2410 2411 st = madmin.ReplicateRemoveStatus{ 2412 Status: madmin.ReplicateRemoveStatusSuccess, 2413 } 2414 if errs[errdID] != nil { 2415 st.Status = madmin.ReplicateRemoveStatusPartial 2416 st.ErrDetail = errs[errdID].Error() 2417 } 2418 return st, nil 2419 } 2420 2421 // InternalRemoveReq - sends an unlink request to peer cluster to remove one or more sites 2422 // from the site replication configuration. 2423 func (c *SiteReplicationSys) InternalRemoveReq(ctx context.Context, objectAPI ObjectLayer, rreq madmin.SRRemoveReq) error { 2424 if !c.isEnabled() { 2425 return errSRNotEnabled 2426 } 2427 if rreq.RequestingDepID != "" { 2428 // validate if requesting site is still part of site replication 2429 var foundRequestor bool 2430 for _, p := range c.state.Peers { 2431 if p.DeploymentID == rreq.RequestingDepID { 2432 foundRequestor = true 2433 break 2434 } 2435 } 2436 if !foundRequestor { 2437 return errSRRequestorNotFound 2438 } 2439 } 2440 2441 ourName := "" 2442 peerMap := make(map[string]madmin.PeerInfo) 2443 updatedPeers := make(map[string]madmin.PeerInfo) 2444 siteNames := rreq.SiteNames 2445 2446 for _, p := range c.state.Peers { 2447 peerMap[p.Name] = p 2448 if p.DeploymentID == globalDeploymentID() { 2449 ourName = p.Name 2450 } 2451 updatedPeers[p.DeploymentID] = p 2452 if rreq.RemoveAll { 2453 siteNames = append(siteNames, p.Name) 2454 } 2455 } 2456 var rmvEndpoints []string 2457 var unlinkSelf bool 2458 2459 for _, s := range siteNames { 2460 info, ok := peerMap[s] 2461 if !ok { 2462 return errMissingSRConfig 2463 } 2464 if info.DeploymentID == globalDeploymentID() { 2465 unlinkSelf = true 2466 continue 2467 } 2468 delete(updatedPeers, info.DeploymentID) 2469 rmvEndpoints = append(rmvEndpoints, info.Endpoint) 2470 } 2471 if err := c.RemoveRemoteTargetsForEndpoint(ctx, objectAPI, rmvEndpoints, unlinkSelf); err != nil { 2472 return err 2473 } 2474 var state srState 2475 if !unlinkSelf { 2476 state = srState{ 2477 Name: c.state.Name, 2478 Peers: updatedPeers, 2479 ServiceAccountAccessKey: c.state.ServiceAccountAccessKey, 2480 } 2481 } 2482 2483 if err := c.saveToDisk(ctx, state); err != nil { 2484 return errSRBackendIssue(fmt.Errorf("unable to save cluster-replication state to drive on %s: %v", ourName, err)) 2485 } 2486 return nil 2487 } 2488 2489 // RemoveRemoteTargetsForEndpoint removes replication targets corresponding to endpoint 2490 func (c *SiteReplicationSys) RemoveRemoteTargetsForEndpoint(ctx context.Context, objectAPI ObjectLayer, endpoints []string, unlinkSelf bool) (err error) { 2491 targets := globalBucketTargetSys.ListTargets(ctx, "", string(madmin.ReplicationService)) 2492 m := make(map[string]madmin.BucketTarget) 2493 for _, t := range targets { 2494 for _, endpoint := range endpoints { 2495 ep, _ := url.Parse(endpoint) 2496 if t.Endpoint == ep.Host && 2497 t.Secure == (ep.Scheme == "https") && 2498 t.Type == madmin.ReplicationService { 2499 m[t.Arn] = t 2500 } 2501 } 2502 // all remote targets from self are to be delinked 2503 if unlinkSelf { 2504 m[t.Arn] = t 2505 } 2506 } 2507 buckets, err := objectAPI.ListBuckets(ctx, BucketOptions{}) 2508 if err != nil { 2509 return errSRBackendIssue(err) 2510 } 2511 2512 for _, b := range buckets { 2513 config, _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, b.Name) 2514 if err != nil { 2515 if errors.Is(err, BucketReplicationConfigNotFound{Bucket: b.Name}) { 2516 continue 2517 } 2518 return err 2519 } 2520 var nRules []sreplication.Rule 2521 for _, r := range config.Rules { 2522 if _, ok := m[r.Destination.Bucket]; !ok { 2523 nRules = append(nRules, r) 2524 } 2525 } 2526 if len(nRules) > 0 { 2527 config.Rules = nRules 2528 configData, err := xml.Marshal(config) 2529 if err != nil { 2530 return err 2531 } 2532 if _, err = globalBucketMetadataSys.Update(ctx, b.Name, bucketReplicationConfig, configData); err != nil { 2533 return err 2534 } 2535 } else { 2536 if _, err := globalBucketMetadataSys.Delete(ctx, b.Name, bucketReplicationConfig); err != nil { 2537 return err 2538 } 2539 } 2540 } 2541 for arn, t := range m { 2542 if err := globalBucketTargetSys.RemoveTarget(ctx, t.SourceBucket, arn); err != nil { 2543 if errors.Is(err, BucketRemoteTargetNotFound{Bucket: t.SourceBucket}) { 2544 continue 2545 } 2546 return err 2547 } 2548 targets, terr := globalBucketTargetSys.ListBucketTargets(ctx, t.SourceBucket) 2549 if terr != nil { 2550 return err 2551 } 2552 tgtBytes, terr := json.Marshal(&targets) 2553 if terr != nil { 2554 return err 2555 } 2556 if _, err = globalBucketMetadataSys.Update(ctx, t.SourceBucket, bucketTargetsFile, tgtBytes); err != nil { 2557 return err 2558 } 2559 } 2560 return 2561 } 2562 2563 // Other helpers 2564 2565 func getAdminClient(endpoint, accessKey, secretKey string) (*madmin.AdminClient, error) { 2566 epURL, err := url.Parse(endpoint) 2567 if err != nil { 2568 return nil, err 2569 } 2570 if globalBucketTargetSys.isOffline(epURL) { 2571 return nil, RemoteTargetConnectionErr{Endpoint: epURL.String(), Err: fmt.Errorf("remote target is offline for endpoint %s", epURL.String())} 2572 } 2573 client, err := madmin.New(epURL.Host, accessKey, secretKey, epURL.Scheme == "https") 2574 if err != nil { 2575 return nil, err 2576 } 2577 client.SetCustomTransport(globalRemoteTargetTransport) 2578 return client, nil 2579 } 2580 2581 func getS3Client(pc madmin.PeerSite) (*minioClient.Client, error) { 2582 ep, err := url.Parse(pc.Endpoint) 2583 if err != nil { 2584 return nil, err 2585 } 2586 if globalBucketTargetSys.isOffline(ep) { 2587 return nil, RemoteTargetConnectionErr{Endpoint: ep.String(), Err: fmt.Errorf("remote target is offline for endpoint %s", ep.String())} 2588 } 2589 2590 return minioClient.New(ep.Host, &minioClient.Options{ 2591 Creds: credentials.NewStaticV4(pc.AccessKey, pc.SecretKey, ""), 2592 Secure: ep.Scheme == "https", 2593 Transport: globalRemoteTargetTransport, 2594 }) 2595 } 2596 2597 func getPriorityHelper(replicationConfig replication.Config) int { 2598 maxPrio := 0 2599 for _, rule := range replicationConfig.Rules { 2600 if rule.Priority > maxPrio { 2601 maxPrio = rule.Priority 2602 } 2603 } 2604 2605 // leave some gaps in priority numbers for flexibility 2606 return maxPrio + 10 2607 } 2608 2609 // returns a slice with site names participating in site replciation but unspecified while adding 2610 // a new site. 2611 func getMissingSiteNames(oldDeps, newDeps set.StringSet, currSites []madmin.PeerInfo) []string { 2612 diff := oldDeps.Difference(newDeps) 2613 var diffSlc []string 2614 for _, v := range currSites { 2615 if diff.Contains(v.DeploymentID) { 2616 diffSlc = append(diffSlc, v.Name) 2617 } 2618 } 2619 return diffSlc 2620 } 2621 2622 type srBucketMetaInfo struct { 2623 madmin.SRBucketInfo 2624 DeploymentID string 2625 } 2626 2627 type srPolicy struct { 2628 madmin.SRIAMPolicy 2629 DeploymentID string 2630 } 2631 2632 type srPolicyMapping struct { 2633 madmin.SRPolicyMapping 2634 DeploymentID string 2635 } 2636 2637 type srUserInfo struct { 2638 madmin.UserInfo 2639 DeploymentID string 2640 } 2641 2642 type srGroupDesc struct { 2643 madmin.GroupDesc 2644 DeploymentID string 2645 } 2646 2647 type srILMExpiryRule struct { 2648 madmin.ILMExpiryRule 2649 DeploymentID string 2650 } 2651 2652 // SiteReplicationStatus returns the site replication status across clusters participating in site replication. 2653 func (c *SiteReplicationSys) SiteReplicationStatus(ctx context.Context, objAPI ObjectLayer, opts madmin.SRStatusOptions) (info madmin.SRStatusInfo, err error) { 2654 sinfo, err := c.siteReplicationStatus(ctx, objAPI, opts) 2655 if err != nil { 2656 return info, err 2657 } 2658 info = madmin.SRStatusInfo{ 2659 Enabled: sinfo.Enabled, 2660 MaxBuckets: sinfo.MaxBuckets, 2661 MaxUsers: sinfo.MaxUsers, 2662 MaxGroups: sinfo.MaxGroups, 2663 MaxPolicies: sinfo.MaxPolicies, 2664 MaxILMExpiryRules: sinfo.MaxILMExpiryRules, 2665 Sites: sinfo.Sites, 2666 StatsSummary: sinfo.StatsSummary, 2667 Metrics: sinfo.Metrics, 2668 } 2669 info.BucketStats = make(map[string]map[string]madmin.SRBucketStatsSummary, len(sinfo.Sites)) 2670 info.PolicyStats = make(map[string]map[string]madmin.SRPolicyStatsSummary) 2671 info.UserStats = make(map[string]map[string]madmin.SRUserStatsSummary) 2672 info.GroupStats = make(map[string]map[string]madmin.SRGroupStatsSummary) 2673 info.ILMExpiryStats = make(map[string]map[string]madmin.SRILMExpiryStatsSummary) 2674 numSites := len(info.Sites) 2675 for b, stat := range sinfo.BucketStats { 2676 for dID, st := range stat { 2677 if st.TagMismatch || 2678 st.VersioningConfigMismatch || 2679 st.OLockConfigMismatch || 2680 st.SSEConfigMismatch || 2681 st.PolicyMismatch || 2682 st.ReplicationCfgMismatch || 2683 st.QuotaCfgMismatch || 2684 opts.Entity == madmin.SRBucketEntity { 2685 if _, ok := info.BucketStats[b]; !ok { 2686 info.BucketStats[b] = make(map[string]madmin.SRBucketStatsSummary, numSites) 2687 } 2688 info.BucketStats[b][dID] = st.SRBucketStatsSummary 2689 } 2690 } 2691 } 2692 for u, stat := range sinfo.UserStats { 2693 for dID, st := range stat { 2694 if st.PolicyMismatch || st.UserInfoMismatch || opts.Entity == madmin.SRUserEntity { 2695 if _, ok := info.UserStats[u]; !ok { 2696 info.UserStats[u] = make(map[string]madmin.SRUserStatsSummary, numSites) 2697 } 2698 info.UserStats[u][dID] = st.SRUserStatsSummary 2699 } 2700 } 2701 } 2702 for g, stat := range sinfo.GroupStats { 2703 for dID, st := range stat { 2704 if st.PolicyMismatch || st.GroupDescMismatch || opts.Entity == madmin.SRGroupEntity { 2705 if _, ok := info.GroupStats[g]; !ok { 2706 info.GroupStats[g] = make(map[string]madmin.SRGroupStatsSummary, numSites) 2707 } 2708 info.GroupStats[g][dID] = st.SRGroupStatsSummary 2709 } 2710 } 2711 } 2712 for p, stat := range sinfo.PolicyStats { 2713 for dID, st := range stat { 2714 if st.PolicyMismatch || opts.Entity == madmin.SRPolicyEntity { 2715 if _, ok := info.PolicyStats[p]; !ok { 2716 info.PolicyStats[p] = make(map[string]madmin.SRPolicyStatsSummary, numSites) 2717 } 2718 info.PolicyStats[p][dID] = st.SRPolicyStatsSummary 2719 } 2720 } 2721 } 2722 for p, stat := range sinfo.ILMExpiryRulesStats { 2723 for dID, st := range stat { 2724 if st.ILMExpiryRuleMismatch || opts.Entity == madmin.SRILMExpiryRuleEntity { 2725 if _, ok := info.ILMExpiryStats[p]; !ok { 2726 info.ILMExpiryStats[p] = make(map[string]madmin.SRILMExpiryStatsSummary, numSites) 2727 } 2728 info.ILMExpiryStats[p][dID] = st.SRILMExpiryStatsSummary 2729 } 2730 } 2731 } 2732 2733 return 2734 } 2735 2736 const ( 2737 replicationStatus = "ReplicationStatus" 2738 ) 2739 2740 // siteReplicationStatus returns the site replication status across clusters participating in site replication. 2741 func (c *SiteReplicationSys) siteReplicationStatus(ctx context.Context, objAPI ObjectLayer, opts madmin.SRStatusOptions) (info srStatusInfo, err error) { 2742 c.RLock() 2743 defer c.RUnlock() 2744 if !c.enabled { 2745 return info, err 2746 } 2747 2748 sris := make([]madmin.SRInfo, len(c.state.Peers)) 2749 depIdx := make(map[string]int, len(c.state.Peers)) 2750 i := 0 2751 for d := range c.state.Peers { 2752 depIdx[d] = i 2753 i++ 2754 } 2755 2756 metaInfoConcErr := c.concDo( 2757 func() error { 2758 srInfo, err := c.SiteReplicationMetaInfo(ctx, objAPI, opts) 2759 if err != nil { 2760 return err 2761 } 2762 sris[depIdx[globalDeploymentID()]] = srInfo 2763 return nil 2764 }, 2765 func(deploymentID string, p madmin.PeerInfo) error { 2766 admClient, err := c.getAdminClient(ctx, deploymentID) 2767 if err != nil { 2768 switch err.(type) { 2769 case RemoteTargetConnectionErr: 2770 sris[depIdx[deploymentID]] = madmin.SRInfo{} 2771 return nil 2772 default: 2773 return err 2774 } 2775 } 2776 srInfo, err := admClient.SRMetaInfo(ctx, opts) 2777 if err != nil { 2778 return err 2779 } 2780 sris[depIdx[deploymentID]] = srInfo 2781 return nil 2782 }, 2783 replicationStatus, 2784 ) 2785 if err := errors.Unwrap(metaInfoConcErr); err != nil { 2786 return info, errSRBackendIssue(err) 2787 } 2788 2789 info.Enabled = true 2790 info.Sites = make(map[string]madmin.PeerInfo, len(c.state.Peers)) 2791 for d, peer := range c.state.Peers { 2792 info.Sites[d] = peer 2793 } 2794 info.UpdatedAt = c.state.UpdatedAt 2795 2796 var maxBuckets int 2797 for _, sri := range sris { 2798 if len(sri.Buckets) > maxBuckets { 2799 maxBuckets = len(sri.Buckets) 2800 } 2801 } 2802 // mapping b/w entity and entity config across sites 2803 bucketStats := make(map[string][]srBucketMetaInfo) 2804 policyStats := make(map[string][]srPolicy) 2805 userPolicyStats := make(map[string][]srPolicyMapping) 2806 groupPolicyStats := make(map[string][]srPolicyMapping) 2807 userInfoStats := make(map[string][]srUserInfo) 2808 groupDescStats := make(map[string][]srGroupDesc) 2809 ilmExpiryRuleStats := make(map[string][]srILMExpiryRule) 2810 2811 numSites := len(sris) 2812 allBuckets := set.NewStringSet() // across sites 2813 allUsers := set.NewStringSet() 2814 allUserWPolicies := set.NewStringSet() 2815 allGroups := set.NewStringSet() 2816 allGroupWPolicies := set.NewStringSet() 2817 allILMExpiryRules := set.NewStringSet() 2818 2819 allPolicies := set.NewStringSet() 2820 for _, sri := range sris { 2821 for b := range sri.Buckets { 2822 allBuckets.Add(b) 2823 } 2824 for u := range sri.UserInfoMap { 2825 allUsers.Add(u) 2826 } 2827 for g := range sri.GroupDescMap { 2828 allGroups.Add(g) 2829 } 2830 for p := range sri.Policies { 2831 allPolicies.Add(p) 2832 } 2833 for u := range sri.UserPolicies { 2834 allUserWPolicies.Add(u) 2835 } 2836 for g := range sri.GroupPolicies { 2837 allGroupWPolicies.Add(g) 2838 } 2839 for r := range sri.ILMExpiryRules { 2840 allILMExpiryRules.Add(r) 2841 } 2842 } 2843 2844 for i, sri := range sris { 2845 for b := range allBuckets { 2846 if _, ok := bucketStats[b]; !ok { 2847 bucketStats[b] = make([]srBucketMetaInfo, numSites) 2848 } 2849 si, ok := sri.Buckets[b] 2850 if !ok { 2851 si = madmin.SRBucketInfo{Bucket: b} 2852 } 2853 bucketStats[b][i] = srBucketMetaInfo{SRBucketInfo: si, DeploymentID: sri.DeploymentID} 2854 } 2855 2856 for pname := range allPolicies { 2857 if _, ok := policyStats[pname]; !ok { 2858 policyStats[pname] = make([]srPolicy, numSites) 2859 } 2860 2861 // if pname is not present in the map, the zero value 2862 // will be returned. 2863 pi := sri.Policies[pname] 2864 policyStats[pname][i] = srPolicy{SRIAMPolicy: pi, DeploymentID: sri.DeploymentID} 2865 } 2866 for user := range allUserWPolicies { 2867 if _, ok := userPolicyStats[user]; !ok { 2868 userPolicyStats[user] = make([]srPolicyMapping, numSites) 2869 } 2870 up := sri.UserPolicies[user] 2871 userPolicyStats[user][i] = srPolicyMapping{SRPolicyMapping: up, DeploymentID: sri.DeploymentID} 2872 } 2873 for group := range allGroupWPolicies { 2874 if _, ok := groupPolicyStats[group]; !ok { 2875 groupPolicyStats[group] = make([]srPolicyMapping, numSites) 2876 } 2877 up := sri.GroupPolicies[group] 2878 groupPolicyStats[group][i] = srPolicyMapping{SRPolicyMapping: up, DeploymentID: sri.DeploymentID} 2879 } 2880 for u := range allUsers { 2881 if _, ok := userInfoStats[u]; !ok { 2882 userInfoStats[u] = make([]srUserInfo, numSites) 2883 } 2884 ui := sri.UserInfoMap[u] 2885 userInfoStats[u][i] = srUserInfo{UserInfo: ui, DeploymentID: sri.DeploymentID} 2886 } 2887 for g := range allGroups { 2888 if _, ok := groupDescStats[g]; !ok { 2889 groupDescStats[g] = make([]srGroupDesc, numSites) 2890 } 2891 gd := sri.GroupDescMap[g] 2892 groupDescStats[g][i] = srGroupDesc{GroupDesc: gd, DeploymentID: sri.DeploymentID} 2893 } 2894 for r := range allILMExpiryRules { 2895 if _, ok := ilmExpiryRuleStats[r]; !ok { 2896 ilmExpiryRuleStats[r] = make([]srILMExpiryRule, numSites) 2897 } 2898 rl := sri.ILMExpiryRules[r] 2899 ilmExpiryRuleStats[r][i] = srILMExpiryRule{ILMExpiryRule: rl, DeploymentID: sri.DeploymentID} 2900 } 2901 } 2902 2903 info.StatsSummary = make(map[string]madmin.SRSiteSummary, len(c.state.Peers)) 2904 info.BucketStats = make(map[string]map[string]srBucketStatsSummary) 2905 info.PolicyStats = make(map[string]map[string]srPolicyStatsSummary) 2906 info.UserStats = make(map[string]map[string]srUserStatsSummary) 2907 info.GroupStats = make(map[string]map[string]srGroupStatsSummary) 2908 info.ILMExpiryRulesStats = make(map[string]map[string]srILMExpiryRuleStatsSummary) 2909 // collect user policy mapping replication status across sites 2910 if opts.Users || opts.Entity == madmin.SRUserEntity { 2911 for u, pslc := range userPolicyStats { 2912 if len(info.UserStats[u]) == 0 { 2913 info.UserStats[u] = make(map[string]srUserStatsSummary) 2914 } 2915 var policyMappings []madmin.SRPolicyMapping 2916 uPolicyCount := 0 2917 for _, ps := range pslc { 2918 policyMappings = append(policyMappings, ps.SRPolicyMapping) 2919 uPolicyCount++ 2920 sum := info.StatsSummary[ps.DeploymentID] 2921 sum.TotalUserPolicyMappingCount++ 2922 info.StatsSummary[ps.DeploymentID] = sum 2923 } 2924 userPolicyMismatch := !isPolicyMappingReplicated(uPolicyCount, numSites, policyMappings) 2925 for _, ps := range pslc { 2926 dID := depIdx[ps.DeploymentID] 2927 _, hasUser := sris[dID].UserPolicies[u] 2928 info.UserStats[u][ps.DeploymentID] = srUserStatsSummary{ 2929 SRUserStatsSummary: madmin.SRUserStatsSummary{ 2930 PolicyMismatch: userPolicyMismatch, 2931 HasUser: hasUser, 2932 HasPolicyMapping: ps.Policy != "", 2933 }, 2934 userPolicy: ps, 2935 } 2936 if !userPolicyMismatch || opts.Entity != madmin.SRUserEntity { 2937 sum := info.StatsSummary[ps.DeploymentID] 2938 if !ps.IsGroup { 2939 sum.ReplicatedUserPolicyMappings++ 2940 } 2941 info.StatsSummary[ps.DeploymentID] = sum 2942 } 2943 } 2944 } 2945 2946 // collect user info replication status across sites 2947 for u, pslc := range userInfoStats { 2948 var uiSlc []madmin.UserInfo 2949 userCount := 0 2950 for _, ps := range pslc { 2951 uiSlc = append(uiSlc, ps.UserInfo) 2952 userCount++ 2953 sum := info.StatsSummary[ps.DeploymentID] 2954 sum.TotalUsersCount++ 2955 info.StatsSummary[ps.DeploymentID] = sum 2956 } 2957 userInfoMismatch := !isUserInfoReplicated(userCount, numSites, uiSlc) 2958 for _, ps := range pslc { 2959 dID := depIdx[ps.DeploymentID] 2960 _, hasUser := sris[dID].UserInfoMap[u] 2961 if len(info.UserStats[u]) == 0 { 2962 info.UserStats[u] = make(map[string]srUserStatsSummary) 2963 } 2964 umis, ok := info.UserStats[u][ps.DeploymentID] 2965 if !ok { 2966 umis = srUserStatsSummary{ 2967 SRUserStatsSummary: madmin.SRUserStatsSummary{ 2968 HasUser: hasUser, 2969 }, 2970 } 2971 } 2972 umis.UserInfoMismatch = userInfoMismatch 2973 umis.userInfo = ps 2974 info.UserStats[u][ps.DeploymentID] = umis 2975 if !userInfoMismatch || opts.Entity != madmin.SRUserEntity { 2976 sum := info.StatsSummary[ps.DeploymentID] 2977 sum.ReplicatedUsers++ 2978 info.StatsSummary[ps.DeploymentID] = sum 2979 } 2980 } 2981 } 2982 } 2983 if opts.Groups || opts.Entity == madmin.SRGroupEntity { 2984 // collect group policy mapping replication status across sites 2985 for g, pslc := range groupPolicyStats { 2986 var policyMappings []madmin.SRPolicyMapping 2987 gPolicyCount := 0 2988 for _, ps := range pslc { 2989 policyMappings = append(policyMappings, ps.SRPolicyMapping) 2990 gPolicyCount++ 2991 sum := info.StatsSummary[ps.DeploymentID] 2992 sum.TotalGroupPolicyMappingCount++ 2993 info.StatsSummary[ps.DeploymentID] = sum 2994 } 2995 groupPolicyMismatch := !isPolicyMappingReplicated(gPolicyCount, numSites, policyMappings) 2996 if len(info.GroupStats[g]) == 0 { 2997 info.GroupStats[g] = make(map[string]srGroupStatsSummary) 2998 } 2999 for _, ps := range pslc { 3000 dID := depIdx[ps.DeploymentID] 3001 _, hasGroup := sris[dID].GroupPolicies[g] 3002 info.GroupStats[g][ps.DeploymentID] = srGroupStatsSummary{ 3003 SRGroupStatsSummary: madmin.SRGroupStatsSummary{ 3004 PolicyMismatch: groupPolicyMismatch, 3005 HasGroup: hasGroup, 3006 HasPolicyMapping: ps.Policy != "", 3007 DeploymentID: ps.DeploymentID, 3008 }, 3009 groupPolicy: ps, 3010 } 3011 if !groupPolicyMismatch && opts.Entity != madmin.SRGroupEntity { 3012 sum := info.StatsSummary[ps.DeploymentID] 3013 sum.ReplicatedGroupPolicyMappings++ 3014 info.StatsSummary[ps.DeploymentID] = sum 3015 } 3016 3017 } 3018 } 3019 3020 // collect group desc replication status across sites 3021 for g, pslc := range groupDescStats { 3022 var gds []madmin.GroupDesc 3023 groupCount := 0 3024 for _, ps := range pslc { 3025 groupCount++ 3026 sum := info.StatsSummary[ps.DeploymentID] 3027 sum.TotalGroupsCount++ 3028 info.StatsSummary[ps.DeploymentID] = sum 3029 gds = append(gds, ps.GroupDesc) 3030 } 3031 gdMismatch := !isGroupDescReplicated(groupCount, numSites, gds) 3032 for _, ps := range pslc { 3033 dID := depIdx[ps.DeploymentID] 3034 _, hasGroup := sris[dID].GroupDescMap[g] 3035 if len(info.GroupStats[g]) == 0 { 3036 info.GroupStats[g] = make(map[string]srGroupStatsSummary) 3037 } 3038 gmis, ok := info.GroupStats[g][ps.DeploymentID] 3039 if !ok { 3040 gmis = srGroupStatsSummary{ 3041 SRGroupStatsSummary: madmin.SRGroupStatsSummary{ 3042 HasGroup: hasGroup, 3043 }, 3044 } 3045 } 3046 gmis.GroupDescMismatch = gdMismatch 3047 gmis.groupDesc = ps 3048 info.GroupStats[g][ps.DeploymentID] = gmis 3049 if !gdMismatch && opts.Entity != madmin.SRGroupEntity { 3050 sum := info.StatsSummary[ps.DeploymentID] 3051 sum.ReplicatedGroups++ 3052 info.StatsSummary[ps.DeploymentID] = sum 3053 } 3054 } 3055 } 3056 } 3057 if opts.Policies || opts.Entity == madmin.SRPolicyEntity { 3058 // collect IAM policy replication status across sites 3059 for p, pslc := range policyStats { 3060 var policies []*policy.Policy 3061 uPolicyCount := 0 3062 for _, ps := range pslc { 3063 plcy, err := policy.ParseConfig(bytes.NewReader([]byte(ps.SRIAMPolicy.Policy))) 3064 if err != nil { 3065 continue 3066 } 3067 policies = append(policies, plcy) 3068 uPolicyCount++ 3069 sum := info.StatsSummary[ps.DeploymentID] 3070 sum.TotalIAMPoliciesCount++ 3071 info.StatsSummary[ps.DeploymentID] = sum 3072 } 3073 if len(info.PolicyStats[p]) == 0 { 3074 info.PolicyStats[p] = make(map[string]srPolicyStatsSummary) 3075 } 3076 policyMismatch := !isIAMPolicyReplicated(uPolicyCount, numSites, policies) 3077 for _, ps := range pslc { 3078 dID := depIdx[ps.DeploymentID] 3079 _, hasPolicy := sris[dID].Policies[p] 3080 info.PolicyStats[p][ps.DeploymentID] = srPolicyStatsSummary{ 3081 SRPolicyStatsSummary: madmin.SRPolicyStatsSummary{ 3082 PolicyMismatch: policyMismatch, 3083 HasPolicy: hasPolicy, 3084 }, 3085 policy: ps, 3086 } 3087 switch { 3088 case policyMismatch, opts.Entity == madmin.SRPolicyEntity: 3089 default: 3090 sum := info.StatsSummary[ps.DeploymentID] 3091 if !policyMismatch { 3092 sum.ReplicatedIAMPolicies++ 3093 } 3094 info.StatsSummary[ps.DeploymentID] = sum 3095 } 3096 } 3097 } 3098 } 3099 if opts.Buckets || opts.Entity == madmin.SRBucketEntity { 3100 // collect bucket metadata replication stats across sites 3101 for b, slc := range bucketStats { 3102 tagSet := set.NewStringSet() 3103 olockConfigSet := set.NewStringSet() 3104 policies := make([]*policy.BucketPolicy, numSites) 3105 replCfgs := make([]*sreplication.Config, numSites) 3106 quotaCfgs := make([]*madmin.BucketQuota, numSites) 3107 sseCfgSet := set.NewStringSet() 3108 versionCfgSet := set.NewStringSet() 3109 var tagCount, olockCfgCount, sseCfgCount, versionCfgCount int 3110 for i, s := range slc { 3111 if s.ReplicationConfig != nil { 3112 cfgBytes, err := base64.StdEncoding.DecodeString(*s.ReplicationConfig) 3113 if err != nil { 3114 continue 3115 } 3116 cfg, err := sreplication.ParseConfig(bytes.NewReader(cfgBytes)) 3117 if err != nil { 3118 continue 3119 } 3120 replCfgs[i] = cfg 3121 } 3122 if s.Versioning != nil { 3123 configData, err := base64.StdEncoding.DecodeString(*s.Versioning) 3124 if err != nil { 3125 continue 3126 } 3127 versionCfgCount++ 3128 if !versionCfgSet.Contains(string(configData)) { 3129 versionCfgSet.Add(string(configData)) 3130 } 3131 } 3132 if s.QuotaConfig != nil { 3133 cfgBytes, err := base64.StdEncoding.DecodeString(*s.QuotaConfig) 3134 if err != nil { 3135 continue 3136 } 3137 cfg, err := parseBucketQuota(b, cfgBytes) 3138 if err != nil { 3139 continue 3140 } 3141 quotaCfgs[i] = cfg 3142 } 3143 if s.Tags != nil { 3144 tagBytes, err := base64.StdEncoding.DecodeString(*s.Tags) 3145 if err != nil { 3146 continue 3147 } 3148 tagCount++ 3149 if !tagSet.Contains(string(tagBytes)) { 3150 tagSet.Add(string(tagBytes)) 3151 } 3152 } 3153 if len(s.Policy) > 0 { 3154 plcy, err := policy.ParseBucketPolicyConfig(bytes.NewReader(s.Policy), b) 3155 if err != nil { 3156 continue 3157 } 3158 policies[i] = plcy 3159 } 3160 if s.ObjectLockConfig != nil { 3161 configData, err := base64.StdEncoding.DecodeString(*s.ObjectLockConfig) 3162 if err != nil { 3163 continue 3164 } 3165 olockCfgCount++ 3166 if !olockConfigSet.Contains(string(configData)) { 3167 olockConfigSet.Add(string(configData)) 3168 } 3169 } 3170 if s.SSEConfig != nil { 3171 configData, err := base64.StdEncoding.DecodeString(*s.SSEConfig) 3172 if err != nil { 3173 continue 3174 } 3175 sseCfgCount++ 3176 if !sseCfgSet.Contains(string(configData)) { 3177 sseCfgSet.Add(string(configData)) 3178 } 3179 } 3180 ss, ok := info.StatsSummary[s.DeploymentID] 3181 if !ok { 3182 ss = madmin.SRSiteSummary{} 3183 } 3184 // increment total number of replicated buckets 3185 if len(slc) == numSites { 3186 ss.ReplicatedBuckets++ 3187 } 3188 ss.TotalBucketsCount++ 3189 if tagCount > 0 { 3190 ss.TotalTagsCount++ 3191 } 3192 if olockCfgCount > 0 { 3193 ss.TotalLockConfigCount++ 3194 } 3195 if sseCfgCount > 0 { 3196 ss.TotalSSEConfigCount++ 3197 } 3198 if versionCfgCount > 0 { 3199 ss.TotalVersioningConfigCount++ 3200 } 3201 if len(policies) > 0 { 3202 ss.TotalBucketPoliciesCount++ 3203 } 3204 info.StatsSummary[s.DeploymentID] = ss 3205 } 3206 tagMismatch := !isReplicated(tagCount, numSites, tagSet) 3207 olockCfgMismatch := !isReplicated(olockCfgCount, numSites, olockConfigSet) 3208 sseCfgMismatch := !isReplicated(sseCfgCount, numSites, sseCfgSet) 3209 versionCfgMismatch := !isReplicated(versionCfgCount, numSites, versionCfgSet) 3210 policyMismatch := !isBktPolicyReplicated(numSites, policies) 3211 replCfgMismatch := !isBktReplCfgReplicated(numSites, replCfgs) 3212 quotaCfgMismatch := !isBktQuotaCfgReplicated(numSites, quotaCfgs) 3213 info.BucketStats[b] = make(map[string]srBucketStatsSummary, numSites) 3214 for i, s := range slc { 3215 dIdx := depIdx[s.DeploymentID] 3216 var hasBucket, isBucketMarkedDeleted bool 3217 3218 bi, ok := sris[dIdx].Buckets[s.Bucket] 3219 if ok { 3220 isBucketMarkedDeleted = !bi.DeletedAt.IsZero() && (bi.CreatedAt.IsZero() || bi.DeletedAt.After(bi.CreatedAt)) 3221 hasBucket = !bi.CreatedAt.IsZero() 3222 } 3223 quotaCfgSet := hasBucket && quotaCfgs[i] != nil && *quotaCfgs[i] != madmin.BucketQuota{} 3224 ss := madmin.SRBucketStatsSummary{ 3225 DeploymentID: s.DeploymentID, 3226 HasBucket: hasBucket, 3227 BucketMarkedDeleted: isBucketMarkedDeleted, 3228 TagMismatch: tagMismatch, 3229 OLockConfigMismatch: olockCfgMismatch, 3230 SSEConfigMismatch: sseCfgMismatch, 3231 VersioningConfigMismatch: versionCfgMismatch, 3232 PolicyMismatch: policyMismatch, 3233 ReplicationCfgMismatch: replCfgMismatch, 3234 QuotaCfgMismatch: quotaCfgMismatch, 3235 HasReplicationCfg: s.ReplicationConfig != nil, 3236 HasTagsSet: s.Tags != nil, 3237 HasOLockConfigSet: s.ObjectLockConfig != nil, 3238 HasPolicySet: s.Policy != nil, 3239 HasQuotaCfgSet: quotaCfgSet, 3240 HasSSECfgSet: s.SSEConfig != nil, 3241 } 3242 var m srBucketMetaInfo 3243 if len(bucketStats[s.Bucket]) > dIdx { 3244 m = bucketStats[s.Bucket][dIdx] 3245 } 3246 info.BucketStats[b][s.DeploymentID] = srBucketStatsSummary{ 3247 SRBucketStatsSummary: ss, 3248 meta: m, 3249 } 3250 } 3251 // no mismatch 3252 for _, s := range slc { 3253 sum := info.StatsSummary[s.DeploymentID] 3254 if !olockCfgMismatch && olockCfgCount == numSites { 3255 sum.ReplicatedLockConfig++ 3256 } 3257 if !versionCfgMismatch && versionCfgCount == numSites { 3258 sum.ReplicatedVersioningConfig++ 3259 } 3260 if !sseCfgMismatch && sseCfgCount == numSites { 3261 sum.ReplicatedSSEConfig++ 3262 } 3263 if !policyMismatch && len(policies) == numSites { 3264 sum.ReplicatedBucketPolicies++ 3265 } 3266 if !tagMismatch && tagCount == numSites { 3267 sum.ReplicatedTags++ 3268 } 3269 info.StatsSummary[s.DeploymentID] = sum 3270 } 3271 } 3272 } 3273 if opts.ILMExpiryRules || opts.Entity == madmin.SRILMExpiryRuleEntity { 3274 // collect ILM expiry rules replication status across sites 3275 for id, ilmExpRules := range ilmExpiryRuleStats { 3276 var rules []*lifecycle.Rule 3277 uRuleCount := 0 3278 for _, rl := range ilmExpRules { 3279 var rule lifecycle.Rule 3280 if err := xml.Unmarshal([]byte(rl.ILMExpiryRule.ILMRule), &rule); err != nil { 3281 continue 3282 } 3283 rules = append(rules, &rule) 3284 uRuleCount++ 3285 sum := info.StatsSummary[rl.DeploymentID] 3286 sum.TotalILMExpiryRulesCount++ 3287 info.StatsSummary[rl.DeploymentID] = sum 3288 } 3289 if len(info.ILMExpiryRulesStats[id]) == 0 { 3290 info.ILMExpiryRulesStats[id] = make(map[string]srILMExpiryRuleStatsSummary) 3291 } 3292 ilmExpRuleMismatch := !isILMExpRuleReplicated(uRuleCount, numSites, rules) 3293 for _, rl := range ilmExpRules { 3294 dID := depIdx[rl.DeploymentID] 3295 _, hasILMExpRule := sris[dID].ILMExpiryRules[id] 3296 info.ILMExpiryRulesStats[id][rl.DeploymentID] = srILMExpiryRuleStatsSummary{ 3297 SRILMExpiryStatsSummary: madmin.SRILMExpiryStatsSummary{ 3298 ILMExpiryRuleMismatch: ilmExpRuleMismatch, 3299 HasILMExpiryRules: hasILMExpRule, 3300 }, 3301 ilmExpiryRule: rl, 3302 } 3303 switch { 3304 case ilmExpRuleMismatch, opts.Entity == madmin.SRILMExpiryRuleEntity: 3305 default: 3306 sum := info.StatsSummary[rl.DeploymentID] 3307 if !ilmExpRuleMismatch { 3308 sum.ReplicatedILMExpiryRules++ 3309 } 3310 info.StatsSummary[rl.DeploymentID] = sum 3311 } 3312 } 3313 } 3314 } 3315 if opts.PeerState { 3316 info.PeerStates = make(map[string]madmin.SRStateInfo, numSites) 3317 for _, sri := range sris { 3318 info.PeerStates[sri.DeploymentID] = sri.State 3319 } 3320 } 3321 3322 if opts.Metrics { 3323 m, err := globalSiteReplicationSys.getSiteMetrics(ctx) 3324 if err != nil { 3325 return info, err 3326 } 3327 info.Metrics = m 3328 } 3329 3330 // maximum buckets users etc seen across sites 3331 info.MaxBuckets = len(bucketStats) 3332 info.MaxUsers = len(userInfoStats) 3333 info.MaxGroups = len(groupDescStats) 3334 info.MaxPolicies = len(policyStats) 3335 info.MaxILMExpiryRules = len(ilmExpiryRuleStats) 3336 return 3337 } 3338 3339 // isReplicated returns true if count of replicated matches the number of 3340 // sites and there is atmost one unique entry in the set. 3341 func isReplicated(cntReplicated, total int, valSet set.StringSet) bool { 3342 if cntReplicated > 0 && cntReplicated < total { 3343 return false 3344 } 3345 if len(valSet) > 1 { 3346 // mismatch - one or more sites has differing tags/policy 3347 return false 3348 } 3349 return true 3350 } 3351 3352 // isIAMPolicyReplicated returns true if count of replicated IAM policies matches total 3353 // number of sites and IAM policies are identical. 3354 func isIAMPolicyReplicated(cntReplicated, total int, policies []*policy.Policy) bool { 3355 if cntReplicated > 0 && cntReplicated != total { 3356 return false 3357 } 3358 // check if policies match between sites 3359 var prev *policy.Policy 3360 for i, p := range policies { 3361 if i == 0 { 3362 prev = p 3363 continue 3364 } 3365 if !prev.Equals(*p) { 3366 return false 3367 } 3368 } 3369 return true 3370 } 3371 3372 // isPolicyMappingReplicated returns true if count of replicated IAM policy mappings matches total 3373 // number of sites and IAM policy mappings are identical. 3374 func isPolicyMappingReplicated(cntReplicated, total int, policies []madmin.SRPolicyMapping) bool { 3375 if cntReplicated > 0 && cntReplicated != total { 3376 return false 3377 } 3378 // check if policies match between sites 3379 var prev madmin.SRPolicyMapping 3380 for i, p := range policies { 3381 if i == 0 { 3382 prev = p 3383 continue 3384 } 3385 if prev.IsGroup != p.IsGroup || 3386 prev.Policy != p.Policy || 3387 prev.UserOrGroup != p.UserOrGroup { 3388 return false 3389 } 3390 } 3391 return true 3392 } 3393 3394 func isUserInfoReplicated(cntReplicated, total int, uis []madmin.UserInfo) bool { 3395 if cntReplicated > 0 && cntReplicated != total { 3396 return false 3397 } 3398 // check if policies match between sites 3399 var prev madmin.UserInfo 3400 for i, ui := range uis { 3401 if i == 0 { 3402 prev = ui 3403 continue 3404 } 3405 if !isUserInfoEqual(prev, ui) { 3406 return false 3407 } 3408 } 3409 return true 3410 } 3411 3412 func isGroupDescReplicated(cntReplicated, total int, gds []madmin.GroupDesc) bool { 3413 if cntReplicated > 0 && cntReplicated != total { 3414 return false 3415 } 3416 // check if policies match between sites 3417 var prev madmin.GroupDesc 3418 for i, gd := range gds { 3419 if i == 0 { 3420 prev = gd 3421 continue 3422 } 3423 if !isGroupDescEqual(prev, gd) { 3424 return false 3425 } 3426 } 3427 return true 3428 } 3429 3430 func isBktQuotaCfgReplicated(total int, quotaCfgs []*madmin.BucketQuota) bool { 3431 numquotaCfgs := 0 3432 for _, q := range quotaCfgs { 3433 if q == nil { 3434 continue 3435 } 3436 numquotaCfgs++ 3437 } 3438 if numquotaCfgs == 0 { 3439 return true 3440 } 3441 if numquotaCfgs > 0 && numquotaCfgs != total { 3442 return false 3443 } 3444 var prev *madmin.BucketQuota 3445 for i, q := range quotaCfgs { 3446 if q == nil { 3447 return false 3448 } 3449 if i == 0 { 3450 prev = q 3451 continue 3452 } 3453 if prev.Quota != q.Quota || prev.Type != q.Type { 3454 return false 3455 } 3456 } 3457 return true 3458 } 3459 3460 // isBktPolicyReplicated returns true if count of replicated bucket policies matches total 3461 // number of sites and bucket policies are identical. 3462 func isBktPolicyReplicated(total int, policies []*policy.BucketPolicy) bool { 3463 numPolicies := 0 3464 for _, p := range policies { 3465 if p == nil { 3466 continue 3467 } 3468 numPolicies++ 3469 } 3470 if numPolicies > 0 && numPolicies != total { 3471 return false 3472 } 3473 // check if policies match between sites 3474 var prev *policy.BucketPolicy 3475 for i, p := range policies { 3476 if p == nil { 3477 continue 3478 } 3479 if i == 0 { 3480 prev = p 3481 continue 3482 } 3483 if !prev.Equals(*p) { 3484 return false 3485 } 3486 } 3487 return true 3488 } 3489 3490 // isBktReplCfgReplicated returns true if all the sites have same number 3491 // of replication rules with all replication features enabled. 3492 func isBktReplCfgReplicated(total int, cfgs []*sreplication.Config) bool { 3493 cntReplicated := 0 3494 for _, c := range cfgs { 3495 if c == nil { 3496 continue 3497 } 3498 cntReplicated++ 3499 } 3500 3501 if cntReplicated > 0 && cntReplicated != total { 3502 return false 3503 } 3504 // check if policies match between sites 3505 var prev *sreplication.Config 3506 for i, c := range cfgs { 3507 if c == nil { 3508 continue 3509 } 3510 if i == 0 { 3511 prev = c 3512 continue 3513 } 3514 if len(prev.Rules) != len(c.Rules) { 3515 return false 3516 } 3517 if len(c.Rules) != total-1 { 3518 return false 3519 } 3520 for _, r := range c.Rules { 3521 if !strings.HasPrefix(r.ID, "site-repl-") { 3522 return false 3523 } 3524 if r.DeleteMarkerReplication.Status == sreplication.Disabled || 3525 r.DeleteReplication.Status == sreplication.Disabled || 3526 r.ExistingObjectReplication.Status == sreplication.Disabled || 3527 r.SourceSelectionCriteria.ReplicaModifications.Status == sreplication.Disabled { 3528 return false 3529 } 3530 } 3531 } 3532 return true 3533 } 3534 3535 // isILMExpRuleReplicated returns true if count of replicated ILM Expiry rules matches total 3536 // number of sites and ILM expiry rules are identical. 3537 func isILMExpRuleReplicated(cntReplicated, total int, rules []*lifecycle.Rule) bool { 3538 if cntReplicated > 0 && cntReplicated != total { 3539 return false 3540 } 3541 // check if policies match between sites 3542 var prev *lifecycle.Rule 3543 for i, r := range rules { 3544 if i == 0 { 3545 prev = r 3546 continue 3547 } 3548 // Check equality of rules 3549 prevRData, err := xml.Marshal(prev) 3550 if err != nil { 3551 return false 3552 } 3553 rData, err := xml.Marshal(*r) 3554 if err != nil { 3555 return false 3556 } 3557 if !(string(prevRData) == string(rData)) { 3558 return false 3559 } 3560 } 3561 return true 3562 } 3563 3564 // cache of IAM info fetched in last SiteReplicationMetaInfo call 3565 type srIAMCache struct { 3566 sync.RWMutex 3567 lastUpdate time.Time 3568 srIAMInfo madmin.SRInfo // caches IAM info 3569 } 3570 3571 func (c *SiteReplicationSys) getSRCachedIAMInfo() (info madmin.SRInfo, ok bool) { 3572 c.iamMetaCache.RLock() 3573 defer c.iamMetaCache.RUnlock() 3574 if c.iamMetaCache.lastUpdate.IsZero() { 3575 return info, false 3576 } 3577 if time.Since(c.iamMetaCache.lastUpdate) < siteHealTimeInterval { 3578 return c.iamMetaCache.srIAMInfo, true 3579 } 3580 return info, false 3581 } 3582 3583 func (c *SiteReplicationSys) srCacheIAMInfo(info madmin.SRInfo) { 3584 c.iamMetaCache.Lock() 3585 defer c.iamMetaCache.Unlock() 3586 c.iamMetaCache.srIAMInfo = info 3587 c.iamMetaCache.lastUpdate = time.Now() 3588 } 3589 3590 // SiteReplicationMetaInfo returns the metadata info on buckets, policies etc for the replicated site 3591 func (c *SiteReplicationSys) SiteReplicationMetaInfo(ctx context.Context, objAPI ObjectLayer, opts madmin.SRStatusOptions) (info madmin.SRInfo, err error) { 3592 if objAPI == nil { 3593 return info, errSRObjectLayerNotReady 3594 } 3595 c.RLock() 3596 defer c.RUnlock() 3597 if !c.enabled { 3598 return info, nil 3599 } 3600 info.DeploymentID = globalDeploymentID() 3601 if opts.Buckets || opts.Entity == madmin.SRBucketEntity { 3602 var ( 3603 buckets []BucketInfo 3604 err error 3605 ) 3606 if opts.Entity == madmin.SRBucketEntity { 3607 bi, err := objAPI.GetBucketInfo(ctx, opts.EntityValue, BucketOptions{Deleted: opts.ShowDeleted}) 3608 if err != nil { 3609 if isErrBucketNotFound(err) { 3610 return info, nil 3611 } 3612 return info, errSRBackendIssue(err) 3613 } 3614 buckets = append(buckets, bi) 3615 } else { 3616 buckets, err = objAPI.ListBuckets(ctx, BucketOptions{Deleted: opts.ShowDeleted}) 3617 if err != nil { 3618 return info, errSRBackendIssue(err) 3619 } 3620 } 3621 info.Buckets = make(map[string]madmin.SRBucketInfo, len(buckets)) 3622 for _, bucketInfo := range buckets { 3623 bucket := bucketInfo.Name 3624 bucketExists := bucketInfo.Deleted.IsZero() || (!bucketInfo.Created.IsZero() && bucketInfo.Created.After(bucketInfo.Deleted)) 3625 bms := madmin.SRBucketInfo{ 3626 Bucket: bucket, 3627 CreatedAt: bucketInfo.Created.UTC(), 3628 DeletedAt: bucketInfo.Deleted.UTC(), 3629 } 3630 if !bucketExists { 3631 info.Buckets[bucket] = bms 3632 continue 3633 } 3634 3635 meta, err := globalBucketMetadataSys.GetConfigFromDisk(ctx, bucket) 3636 if err != nil && !errors.Is(err, errConfigNotFound) { 3637 return info, errSRBackendIssue(err) 3638 } 3639 3640 bms.Policy = meta.PolicyConfigJSON 3641 bms.PolicyUpdatedAt = meta.PolicyConfigUpdatedAt 3642 3643 if len(meta.TaggingConfigXML) > 0 { 3644 tagCfgStr := base64.StdEncoding.EncodeToString(meta.TaggingConfigXML) 3645 bms.Tags = &tagCfgStr 3646 bms.TagConfigUpdatedAt = meta.TaggingConfigUpdatedAt 3647 } 3648 3649 if len(meta.VersioningConfigXML) > 0 { 3650 versioningCfgStr := base64.StdEncoding.EncodeToString(meta.VersioningConfigXML) 3651 bms.Versioning = &versioningCfgStr 3652 bms.VersioningConfigUpdatedAt = meta.VersioningConfigUpdatedAt 3653 } 3654 3655 if len(meta.ObjectLockConfigXML) > 0 { 3656 objLockStr := base64.StdEncoding.EncodeToString(meta.ObjectLockConfigXML) 3657 bms.ObjectLockConfig = &objLockStr 3658 bms.ObjectLockConfigUpdatedAt = meta.ObjectLockConfigUpdatedAt 3659 } 3660 3661 if len(meta.QuotaConfigJSON) > 0 { 3662 quotaConfigStr := base64.StdEncoding.EncodeToString(meta.QuotaConfigJSON) 3663 bms.QuotaConfig = "aConfigStr 3664 bms.QuotaConfigUpdatedAt = meta.QuotaConfigUpdatedAt 3665 } 3666 3667 if len(meta.EncryptionConfigXML) > 0 { 3668 sseConfigStr := base64.StdEncoding.EncodeToString(meta.EncryptionConfigXML) 3669 bms.SSEConfig = &sseConfigStr 3670 bms.SSEConfigUpdatedAt = meta.EncryptionConfigUpdatedAt 3671 } 3672 3673 if len(meta.ReplicationConfigXML) > 0 { 3674 rcfgXMLStr := base64.StdEncoding.EncodeToString(meta.ReplicationConfigXML) 3675 bms.ReplicationConfig = &rcfgXMLStr 3676 bms.ReplicationConfigUpdatedAt = meta.ReplicationConfigUpdatedAt 3677 } 3678 3679 if meta.lifecycleConfig != nil { 3680 var expLclCfg lifecycle.Lifecycle 3681 expLclCfg.XMLName = meta.lifecycleConfig.XMLName 3682 for _, rule := range meta.lifecycleConfig.Rules { 3683 if !rule.Expiration.IsNull() || !rule.NoncurrentVersionExpiration.IsNull() { 3684 // copy the non transition details of the rule 3685 expLclCfg.Rules = append(expLclCfg.Rules, rule.CloneNonTransition()) 3686 } 3687 } 3688 expLclCfg.ExpiryUpdatedAt = meta.lifecycleConfig.ExpiryUpdatedAt 3689 ilmConfigData, err := xml.Marshal(expLclCfg) 3690 if err != nil { 3691 return info, errSRBackendIssue(err) 3692 } 3693 3694 expLclCfgStr := base64.StdEncoding.EncodeToString(ilmConfigData) 3695 bms.ExpiryLCConfig = &expLclCfgStr 3696 // if all non expiry rules only, ExpiryUpdatedAt would be nil 3697 if meta.lifecycleConfig.ExpiryUpdatedAt != nil { 3698 bms.ExpiryLCConfigUpdatedAt = *(meta.lifecycleConfig.ExpiryUpdatedAt) 3699 } 3700 } 3701 3702 info.Buckets[bucket] = bms 3703 } 3704 } 3705 3706 if opts.Users && opts.Groups && opts.Policies && !opts.Buckets { 3707 // serialize SiteReplicationMetaInfo calls - if data in cache is within 3708 // healing interval, avoid fetching IAM data again from disk. 3709 if metaInfo, ok := c.getSRCachedIAMInfo(); ok { 3710 return metaInfo, nil 3711 } 3712 } 3713 if opts.Policies || opts.Entity == madmin.SRPolicyEntity { 3714 var allPolicies map[string]PolicyDoc 3715 if opts.Entity == madmin.SRPolicyEntity { 3716 if p, err := globalIAMSys.store.GetPolicyDoc(opts.EntityValue); err == nil { 3717 allPolicies = map[string]PolicyDoc{opts.EntityValue: p} 3718 } 3719 } else { 3720 // Replicate IAM policies on local to all peers. 3721 allPolicies, err = globalIAMSys.store.listPolicyDocs(ctx, "") 3722 if err != nil { 3723 return info, errSRBackendIssue(err) 3724 } 3725 } 3726 info.Policies = make(map[string]madmin.SRIAMPolicy, len(allPolicies)) 3727 for pname, policyDoc := range allPolicies { 3728 policyJSON, err := json.Marshal(policyDoc.Policy) 3729 if err != nil { 3730 return info, wrapSRErr(err) 3731 } 3732 info.Policies[pname] = madmin.SRIAMPolicy{Policy: json.RawMessage(policyJSON), UpdatedAt: policyDoc.UpdateDate} 3733 } 3734 } 3735 if opts.ILMExpiryRules || opts.Entity == madmin.SRILMExpiryRuleEntity { 3736 info.ILMExpiryRules = make(map[string]madmin.ILMExpiryRule) 3737 buckets, err := objAPI.ListBuckets(ctx, BucketOptions{Deleted: opts.ShowDeleted}) 3738 if err != nil { 3739 return info, errSRBackendIssue(err) 3740 } 3741 3742 allRules := make(map[string]madmin.ILMExpiryRule) 3743 for _, bucketInfo := range buckets { 3744 bucket := bucketInfo.Name 3745 bucketExists := bucketInfo.Deleted.IsZero() || (!bucketInfo.Created.IsZero() && bucketInfo.Created.After(bucketInfo.Deleted)) 3746 if !bucketExists { 3747 continue 3748 } 3749 3750 meta, err := globalBucketMetadataSys.GetConfigFromDisk(ctx, bucket) 3751 if err != nil && !errors.Is(err, errConfigNotFound) { 3752 return info, errSRBackendIssue(err) 3753 } 3754 3755 if meta.lifecycleConfig != nil && meta.lifecycleConfig.HasExpiry() { 3756 var updatedAt time.Time 3757 if meta.lifecycleConfig.ExpiryUpdatedAt != nil { 3758 updatedAt = *meta.lifecycleConfig.ExpiryUpdatedAt 3759 } 3760 for _, rule := range meta.lifecycleConfig.Rules { 3761 if !rule.Expiration.IsNull() || !rule.NoncurrentVersionExpiration.IsNull() { 3762 // copy the non transition details of the rule 3763 ruleData, err := xml.Marshal(rule.CloneNonTransition()) 3764 if err != nil { 3765 return info, errSRBackendIssue(err) 3766 } 3767 allRules[rule.ID] = madmin.ILMExpiryRule{ILMRule: string(ruleData), Bucket: bucket, UpdatedAt: updatedAt} 3768 } 3769 } 3770 } 3771 } 3772 if opts.Entity == madmin.SRILMExpiryRuleEntity { 3773 if rule, ok := allRules[opts.EntityValue]; ok { 3774 info.ILMExpiryRules[opts.EntityValue] = rule 3775 } 3776 } else { 3777 for id, rule := range allRules { 3778 info.ILMExpiryRules[id] = rule 3779 } 3780 } 3781 } 3782 if opts.PeerState { 3783 info.State = madmin.SRStateInfo{ 3784 Name: c.state.Name, 3785 Peers: c.state.Peers, 3786 UpdatedAt: c.state.UpdatedAt, 3787 } 3788 } 3789 3790 if opts.Users || opts.Entity == madmin.SRUserEntity { 3791 // Replicate policy mappings on local to all peers. 3792 userPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 3793 stsPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 3794 svcPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 3795 if opts.Entity == madmin.SRUserEntity { 3796 if mp, ok := globalIAMSys.store.GetMappedPolicy(opts.EntityValue, false); ok { 3797 userPolicyMap.Store(opts.EntityValue, mp) 3798 } 3799 } else { 3800 stsErr := globalIAMSys.store.loadMappedPolicies(ctx, stsUser, false, stsPolicyMap) 3801 if stsErr != nil { 3802 return info, errSRBackendIssue(stsErr) 3803 } 3804 usrErr := globalIAMSys.store.loadMappedPolicies(ctx, regUser, false, userPolicyMap) 3805 if usrErr != nil { 3806 return info, errSRBackendIssue(usrErr) 3807 } 3808 svcErr := globalIAMSys.store.loadMappedPolicies(ctx, svcUser, false, svcPolicyMap) 3809 if svcErr != nil { 3810 return info, errSRBackendIssue(svcErr) 3811 } 3812 } 3813 info.UserPolicies = make(map[string]madmin.SRPolicyMapping, userPolicyMap.Size()) 3814 addPolicy := func(t IAMUserType, mp *xsync.MapOf[string, MappedPolicy]) { 3815 mp.Range(func(k string, mp MappedPolicy) bool { 3816 info.UserPolicies[k] = madmin.SRPolicyMapping{ 3817 IsGroup: false, 3818 UserOrGroup: k, 3819 UserType: int(t), 3820 Policy: mp.Policies, 3821 UpdatedAt: mp.UpdatedAt, 3822 } 3823 return true 3824 }) 3825 } 3826 addPolicy(regUser, userPolicyMap) 3827 addPolicy(stsUser, stsPolicyMap) 3828 addPolicy(svcUser, svcPolicyMap) 3829 3830 info.UserInfoMap = make(map[string]madmin.UserInfo) 3831 if opts.Entity == madmin.SRUserEntity { 3832 if ui, err := globalIAMSys.GetUserInfo(ctx, opts.EntityValue); err == nil { 3833 info.UserInfoMap[opts.EntityValue] = ui 3834 } 3835 } else { 3836 userAccounts := make(map[string]UserIdentity) 3837 uerr := globalIAMSys.store.loadUsers(ctx, regUser, userAccounts) 3838 if uerr != nil { 3839 return info, errSRBackendIssue(uerr) 3840 } 3841 3842 serr := globalIAMSys.store.loadUsers(ctx, svcUser, userAccounts) 3843 if serr != nil { 3844 return info, errSRBackendIssue(serr) 3845 } 3846 3847 terr := globalIAMSys.store.loadUsers(ctx, stsUser, userAccounts) 3848 if terr != nil { 3849 return info, errSRBackendIssue(terr) 3850 } 3851 3852 for k, v := range userAccounts { 3853 if k == siteReplicatorSvcAcc { 3854 // skip the site replicate svc account as it is 3855 // already replicated. 3856 continue 3857 } 3858 3859 if v.Credentials.ParentUser != "" && v.Credentials.ParentUser == globalActiveCred.AccessKey { 3860 // skip all root user service accounts. 3861 continue 3862 } 3863 3864 info.UserInfoMap[k] = madmin.UserInfo{ 3865 Status: madmin.AccountStatus(v.Credentials.Status), 3866 } 3867 } 3868 } 3869 } 3870 3871 if opts.Groups || opts.Entity == madmin.SRGroupEntity { 3872 // Replicate policy mappings on local to all peers. 3873 groupPolicyMap := xsync.NewMapOf[string, MappedPolicy]() 3874 if opts.Entity == madmin.SRGroupEntity { 3875 if mp, ok := globalIAMSys.store.GetMappedPolicy(opts.EntityValue, true); ok { 3876 groupPolicyMap.Store(opts.EntityValue, mp) 3877 } 3878 } else { 3879 stsErr := globalIAMSys.store.loadMappedPolicies(ctx, stsUser, true, groupPolicyMap) 3880 if stsErr != nil { 3881 return info, errSRBackendIssue(stsErr) 3882 } 3883 userErr := globalIAMSys.store.loadMappedPolicies(ctx, regUser, true, groupPolicyMap) 3884 if userErr != nil { 3885 return info, errSRBackendIssue(userErr) 3886 } 3887 } 3888 3889 info.GroupPolicies = make(map[string]madmin.SRPolicyMapping, groupPolicyMap.Size()) 3890 groupPolicyMap.Range(func(group string, mp MappedPolicy) bool { 3891 info.GroupPolicies[group] = madmin.SRPolicyMapping{ 3892 IsGroup: true, 3893 UserOrGroup: group, 3894 Policy: mp.Policies, 3895 UpdatedAt: mp.UpdatedAt, 3896 } 3897 return true 3898 }) 3899 info.GroupDescMap = make(map[string]madmin.GroupDesc) 3900 if opts.Entity == madmin.SRGroupEntity { 3901 if gd, err := globalIAMSys.GetGroupDescription(opts.EntityValue); err == nil { 3902 info.GroupDescMap[opts.EntityValue] = gd 3903 } 3904 } else { 3905 // get users/group info on local. 3906 groups, errG := globalIAMSys.store.listGroups(ctx) 3907 if errG != nil { 3908 return info, errSRBackendIssue(errG) 3909 } 3910 groupDescMap := make(map[string]madmin.GroupDesc, len(groups)) 3911 for _, g := range groups { 3912 groupDescMap[g], errG = globalIAMSys.GetGroupDescription(g) 3913 if errG != nil { 3914 return info, errSRBackendIssue(errG) 3915 } 3916 } 3917 for group, d := range groupDescMap { 3918 info.GroupDescMap[group] = d 3919 } 3920 } 3921 } 3922 // cache SR metadata info for IAM 3923 if opts.Users && opts.Groups && opts.Policies && !opts.Buckets { 3924 c.srCacheIAMInfo(info) 3925 } 3926 3927 return info, nil 3928 } 3929 3930 // EditPeerCluster - edits replication configuration and updates peer endpoint. 3931 func (c *SiteReplicationSys) EditPeerCluster(ctx context.Context, peer madmin.PeerInfo, opts madmin.SREditOptions) (madmin.ReplicateEditStatus, error) { 3932 sites, err := c.GetClusterInfo(ctx) 3933 if err != nil { 3934 return madmin.ReplicateEditStatus{}, errSRBackendIssue(err) 3935 } 3936 if !sites.Enabled { 3937 return madmin.ReplicateEditStatus{}, errSRNotEnabled 3938 } 3939 3940 var ( 3941 found bool 3942 admClient *madmin.AdminClient 3943 ) 3944 3945 if globalDeploymentID() == peer.DeploymentID && !peer.SyncState.Empty() && !peer.DefaultBandwidth.IsSet { 3946 return madmin.ReplicateEditStatus{}, errSRInvalidRequest(fmt.Errorf("a peer cluster, rather than the local cluster (endpoint=%s, deployment-id=%s) needs to be specified while setting a 'sync' replication mode", peer.Endpoint, peer.DeploymentID)) 3947 } 3948 3949 for _, v := range sites.Sites { 3950 if peer.DeploymentID == v.DeploymentID { 3951 found = true 3952 if (!peer.SyncState.Empty() || peer.DefaultBandwidth.IsSet) && peer.Endpoint == "" { // peer.Endpoint may be "" if only sync state/bandwidth is being updated 3953 break 3954 } 3955 if peer.Endpoint == v.Endpoint && peer.SyncState.Empty() && !peer.DefaultBandwidth.IsSet { 3956 return madmin.ReplicateEditStatus{}, errSRInvalidRequest(fmt.Errorf("Endpoint %s entered for deployment id %s already configured in site replication", v.Endpoint, v.DeploymentID)) 3957 } 3958 admClient, err = c.getAdminClientWithEndpoint(ctx, v.DeploymentID, peer.Endpoint) 3959 if err != nil { 3960 return madmin.ReplicateEditStatus{}, errSRPeerResp(fmt.Errorf("unable to create admin client for %s: %w", v.Name, err)) 3961 } 3962 // check if endpoint is reachable 3963 info, err := admClient.ServerInfo(ctx) 3964 if err != nil { 3965 return madmin.ReplicateEditStatus{}, errSRInvalidRequest(fmt.Errorf("Endpoint %s not reachable: %w", peer.Endpoint, err)) 3966 } 3967 if info.DeploymentID != v.DeploymentID { 3968 return madmin.ReplicateEditStatus{}, errSRInvalidRequest(fmt.Errorf("Endpoint %s does not belong to deployment expected: %s (found %s) ", peer.Endpoint, v.DeploymentID, info.DeploymentID)) 3969 } 3970 } 3971 } 3972 3973 // if disable/enable ILM expiry replication, deployment id not needed. 3974 // check for below error only if other options being updated (e.g. endpoint, sync, bandwidth) 3975 if !opts.DisableILMExpiryReplication && !opts.EnableILMExpiryReplication && !found { 3976 return madmin.ReplicateEditStatus{}, errSRInvalidRequest(fmt.Errorf("%s not found in existing replicated sites", peer.DeploymentID)) 3977 } 3978 successMsg := "Cluster replication configuration updated successfully with:" 3979 var state srState 3980 c.RLock() 3981 state = c.state 3982 c.RUnlock() 3983 3984 // in case of --disable-ilm-expiry-replication and --enable-ilm-expiry-replication 3985 // --deployment-id is not passed 3986 var ( 3987 prevPeerInfo, pi madmin.PeerInfo 3988 ) 3989 if peer.DeploymentID != "" { 3990 pi = c.state.Peers[peer.DeploymentID] 3991 prevPeerInfo = pi 3992 if !peer.SyncState.Empty() { // update replication to peer to be sync/async 3993 pi.SyncState = peer.SyncState 3994 successMsg = fmt.Sprintf("%s\n- sync state %s for peer %s", successMsg, peer.SyncState, peer.Name) 3995 } 3996 if peer.Endpoint != "" { // `admin replicate update` requested an endpoint change 3997 pi.Endpoint = peer.Endpoint 3998 successMsg = fmt.Sprintf("%s\n- endpoint %s for peer %s", successMsg, peer.Endpoint, peer.Name) 3999 } 4000 4001 if peer.DefaultBandwidth.IsSet { 4002 if peer.DeploymentID == globalDeploymentID() { 4003 return madmin.ReplicateEditStatus{}, errSRInvalidRequest(fmt.Errorf("invalid deployment id specified: expecting a peer deployment-id to be specified for restricting bandwidth from %s, found self %s", peer.Name, globalDeploymentID())) 4004 } 4005 pi.DefaultBandwidth = peer.DefaultBandwidth 4006 pi.DefaultBandwidth.UpdatedAt = UTCNow() 4007 successMsg = fmt.Sprintf("%s\n- default bandwidth %v for peer %s", successMsg, peer.DefaultBandwidth.Limit, peer.Name) 4008 } 4009 state.Peers[peer.DeploymentID] = pi 4010 } 4011 4012 // If ILM expiry replications enabled/disabled, set accordingly 4013 if opts.DisableILMExpiryReplication { 4014 for dID, pi := range state.Peers { 4015 if !pi.ReplicateILMExpiry { 4016 return madmin.ReplicateEditStatus{ 4017 Status: madmin.ReplicateAddStatusPartial, 4018 ErrDetail: "ILM expiry already set to false", 4019 }, nil 4020 } 4021 pi.ReplicateILMExpiry = false 4022 state.Peers[dID] = pi 4023 } 4024 successMsg = fmt.Sprintf("%s\n- replicate-ilm-expiry: false", successMsg) 4025 } 4026 if opts.EnableILMExpiryReplication { 4027 for dID, pi := range state.Peers { 4028 if pi.ReplicateILMExpiry { 4029 return madmin.ReplicateEditStatus{ 4030 Status: madmin.ReplicateAddStatusPartial, 4031 ErrDetail: "ILM expiry already set to true", 4032 }, nil 4033 } 4034 pi.ReplicateILMExpiry = true 4035 state.Peers[dID] = pi 4036 } 4037 successMsg = fmt.Sprintf("%s\n- replicate-ilm-expiry: true", successMsg) 4038 } 4039 state.UpdatedAt = time.Now() 4040 4041 errs := make(map[string]error, len(state.Peers)) 4042 var wg sync.WaitGroup 4043 4044 for dID, v := range state.Peers { 4045 if v.DeploymentID == globalDeploymentID() { 4046 continue 4047 } 4048 // if individual deployment change like mode, endpoint, default bandwidth 4049 // send it to all sites. Else send the current node details to all sites 4050 // for ILM expiry flag update 4051 var p madmin.PeerInfo 4052 if peer.DeploymentID != "" { 4053 p = pi 4054 } else { 4055 p = v 4056 } 4057 wg.Add(1) 4058 go func(pi madmin.PeerInfo, dID string) { 4059 defer wg.Done() 4060 admClient, err := c.getAdminClient(ctx, dID) 4061 if dID == peer.DeploymentID { 4062 admClient, err = c.getAdminClientWithEndpoint(ctx, dID, pi.Endpoint) 4063 } 4064 if err != nil { 4065 errs[dID] = errSRPeerResp(fmt.Errorf("unable to create admin client for %s: %w", pi.Name, err)) 4066 return 4067 } 4068 if err = admClient.SRPeerEdit(ctx, pi); err != nil { 4069 errs[dID] = errSRPeerResp(fmt.Errorf("unable to update peer %s: %w", pi.Name, err)) 4070 return 4071 } 4072 }(p, dID) 4073 } 4074 4075 wg.Wait() 4076 for dID, err := range errs { 4077 logger.LogOnceIf(ctx, fmt.Errorf("unable to update peer %s: %w", state.Peers[dID].Name, err), "site-relication-edit") 4078 } 4079 4080 // we can now save the cluster replication configuration state. 4081 if err = c.saveToDisk(ctx, state); err != nil { 4082 return madmin.ReplicateEditStatus{ 4083 Status: madmin.ReplicateAddStatusPartial, 4084 ErrDetail: fmt.Sprintf("unable to save cluster-replication state on local: %v", err), 4085 }, nil 4086 } 4087 4088 if peer.DeploymentID != "" { 4089 if err = c.updateTargetEndpoints(ctx, prevPeerInfo, pi); err != nil { 4090 return madmin.ReplicateEditStatus{ 4091 Status: madmin.ReplicateAddStatusPartial, 4092 ErrDetail: fmt.Sprintf("unable to update peer targets on local: %v", err), 4093 }, nil 4094 } 4095 } 4096 4097 // set partial error message if remote site updates failed for few cases 4098 if len(errs) > 0 { 4099 successMsg = fmt.Sprintf("%s\n- partially failed for few remote sites as they could be down/unreachable at the moment", successMsg) 4100 } 4101 result := madmin.ReplicateEditStatus{ 4102 Success: true, 4103 Status: successMsg, 4104 } 4105 return result, nil 4106 } 4107 4108 func (c *SiteReplicationSys) updateTargetEndpoints(ctx context.Context, prevInfo, peer madmin.PeerInfo) error { 4109 objAPI := newObjectLayerFn() 4110 if objAPI == nil { 4111 return errSRObjectLayerNotReady 4112 } 4113 4114 buckets, err := objAPI.ListBuckets(ctx, BucketOptions{}) 4115 if err != nil { 4116 return err 4117 } 4118 4119 for _, bucketInfo := range buckets { 4120 bucket := bucketInfo.Name 4121 ep, _ := url.Parse(peer.Endpoint) 4122 prevEp, _ := url.Parse(prevInfo.Endpoint) 4123 targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 4124 if err != nil { 4125 continue // site healing will take care of configuring new targets 4126 } 4127 for _, target := range targets.Targets { 4128 if target.SourceBucket == bucket && 4129 target.TargetBucket == bucket && 4130 target.Endpoint == prevEp.Host && 4131 target.Secure == (prevEp.Scheme == "https") && 4132 target.Type == madmin.ReplicationService { 4133 bucketTarget := target 4134 bucketTarget.Secure = ep.Scheme == "https" 4135 bucketTarget.Endpoint = ep.Host 4136 if peer.DefaultBandwidth.IsSet && target.BandwidthLimit == 0 { 4137 bucketTarget.BandwidthLimit = int64(peer.DefaultBandwidth.Limit) 4138 } 4139 if !peer.SyncState.Empty() { 4140 bucketTarget.ReplicationSync = (peer.SyncState == madmin.SyncEnabled) 4141 } 4142 err := globalBucketTargetSys.SetTarget(ctx, bucket, &bucketTarget, true) 4143 if err != nil { 4144 logger.LogIf(ctx, c.annotatePeerErr(peer.Name, "Bucket target creation error", err)) 4145 continue 4146 } 4147 targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 4148 if err != nil { 4149 logger.LogIf(ctx, err) 4150 continue 4151 } 4152 tgtBytes, err := json.Marshal(&targets) 4153 if err != nil { 4154 logger.LogIf(ctx, err) 4155 continue 4156 } 4157 if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil { 4158 logger.LogIf(ctx, err) 4159 continue 4160 } 4161 } 4162 } 4163 } 4164 return nil 4165 } 4166 4167 // PeerEditReq - internal API handler to respond to a peer cluster's request 4168 // to edit endpoint. 4169 func (c *SiteReplicationSys) PeerEditReq(ctx context.Context, arg madmin.PeerInfo) error { 4170 ourName := "" 4171 4172 // Set ReplicateILMExpiry for all peers 4173 currTime := time.Now() 4174 for i := range c.state.Peers { 4175 p := c.state.Peers[i] 4176 if p.ReplicateILMExpiry == arg.ReplicateILMExpiry { 4177 // its already set due to previous edit req 4178 break 4179 } 4180 p.ReplicateILMExpiry = arg.ReplicateILMExpiry 4181 c.state.UpdatedAt = currTime 4182 c.state.Peers[i] = p 4183 } 4184 4185 for i := range c.state.Peers { 4186 p := c.state.Peers[i] 4187 if p.DeploymentID == arg.DeploymentID { 4188 p.Endpoint = arg.Endpoint 4189 c.state.Peers[arg.DeploymentID] = p 4190 } 4191 if p.DeploymentID == globalDeploymentID() { 4192 ourName = p.Name 4193 } 4194 } 4195 if err := c.saveToDisk(ctx, c.state); err != nil { 4196 return errSRBackendIssue(fmt.Errorf("unable to save cluster-replication state to drive on %s: %v", ourName, err)) 4197 } 4198 return nil 4199 } 4200 4201 // PeerStateEditReq - internal API handler to respond to a peer cluster's request 4202 // to edit state. 4203 func (c *SiteReplicationSys) PeerStateEditReq(ctx context.Context, arg madmin.SRStateEditReq) error { 4204 if arg.UpdatedAt.After(c.state.UpdatedAt) { 4205 state := c.state 4206 // update only the ReplicateILMExpiry flag for the peers from incoming request 4207 for _, peer := range arg.Peers { 4208 currPeer := c.state.Peers[peer.DeploymentID] 4209 currPeer.ReplicateILMExpiry = peer.ReplicateILMExpiry 4210 state.Peers[peer.DeploymentID] = currPeer 4211 } 4212 state.UpdatedAt = arg.UpdatedAt 4213 if err := c.saveToDisk(ctx, state); err != nil { 4214 return errSRBackendIssue(fmt.Errorf("unable to save cluster-replication state to drive on %s: %v", state.Name, err)) 4215 } 4216 } 4217 return nil 4218 } 4219 4220 const siteHealTimeInterval = 30 * time.Second 4221 4222 func (c *SiteReplicationSys) startHealRoutine(ctx context.Context, objAPI ObjectLayer) { 4223 ctx, cancel := globalLeaderLock.GetLock(ctx) 4224 defer cancel() 4225 4226 healTimer := time.NewTimer(siteHealTimeInterval) 4227 defer healTimer.Stop() 4228 4229 var maxRefreshDurationSecondsForLog float64 = 10 // 10 seconds.. 4230 4231 for { 4232 select { 4233 case <-healTimer.C: 4234 c.RLock() 4235 enabled := c.enabled 4236 c.RUnlock() 4237 if enabled { 4238 refreshStart := time.Now() 4239 c.healIAMSystem(ctx, objAPI) // heal IAM system first 4240 c.healBuckets(ctx, objAPI) // heal buckets subsequently 4241 4242 took := time.Since(refreshStart).Seconds() 4243 if took > maxRefreshDurationSecondsForLog { 4244 // Log if we took a lot of time. 4245 logger.Info("Site replication healing refresh took %.2fs", took) 4246 } 4247 4248 // wait for 200 millisecond, if we are experience lot of I/O 4249 waitForLowIO(runtime.GOMAXPROCS(0), 200*time.Millisecond, currentHTTPIO) 4250 } 4251 healTimer.Reset(siteHealTimeInterval) 4252 4253 case <-ctx.Done(): 4254 return 4255 } 4256 } 4257 } 4258 4259 type srBucketStatsSummary struct { 4260 madmin.SRBucketStatsSummary 4261 meta srBucketMetaInfo 4262 } 4263 4264 type srPolicyStatsSummary struct { 4265 madmin.SRPolicyStatsSummary 4266 policy srPolicy 4267 } 4268 4269 type srUserStatsSummary struct { 4270 madmin.SRUserStatsSummary 4271 userInfo srUserInfo 4272 userPolicy srPolicyMapping 4273 } 4274 4275 type srGroupStatsSummary struct { 4276 madmin.SRGroupStatsSummary 4277 groupDesc srGroupDesc 4278 groupPolicy srPolicyMapping 4279 } 4280 4281 type srILMExpiryRuleStatsSummary struct { 4282 madmin.SRILMExpiryStatsSummary 4283 ilmExpiryRule srILMExpiryRule 4284 } 4285 4286 type srStatusInfo struct { 4287 // SRStatusInfo returns detailed status on site replication status 4288 Enabled bool 4289 MaxBuckets int // maximum buckets seen across sites 4290 MaxUsers int // maximum users seen across sites 4291 MaxGroups int // maximum groups seen across sites 4292 MaxPolicies int // maximum policies across sites 4293 MaxILMExpiryRules int // maximum ILM expiry rules across sites 4294 Sites map[string]madmin.PeerInfo // deployment->sitename 4295 StatsSummary map[string]madmin.SRSiteSummary // map of deployment id -> site stat 4296 // BucketStats map of bucket to slice of deployment IDs with stats. This is populated only if there are 4297 // mismatches or if a specific bucket's stats are requested 4298 BucketStats map[string]map[string]srBucketStatsSummary 4299 // PolicyStats map of policy to slice of deployment IDs with stats. This is populated only if there are 4300 // mismatches or if a specific bucket's stats are requested 4301 PolicyStats map[string]map[string]srPolicyStatsSummary 4302 // UserStats map of user to slice of deployment IDs with stats. This is populated only if there are 4303 // mismatches or if a specific bucket's stats are requested 4304 UserStats map[string]map[string]srUserStatsSummary 4305 // GroupStats map of group to slice of deployment IDs with stats. This is populated only if there are 4306 // mismatches or if a specific bucket's stats are requested 4307 GroupStats map[string]map[string]srGroupStatsSummary 4308 // ILMExpiryRulesStats map of ILM expiry rules to slice of deployment IDs with stats. This is populated only if there are 4309 // mismatches or if a specific ILM expiry rule's stats are requested 4310 ILMExpiryRulesStats map[string]map[string]srILMExpiryRuleStatsSummary 4311 // PeerStates map of site replication sites to their site replication states 4312 PeerStates map[string]madmin.SRStateInfo 4313 Metrics madmin.SRMetricsSummary 4314 UpdatedAt time.Time 4315 } 4316 4317 // SRBucketDeleteOp - type of delete op 4318 type SRBucketDeleteOp string 4319 4320 const ( 4321 // MarkDelete creates .minio.sys/buckets/.deleted/<bucket> vol entry to hold onto deleted bucket's state 4322 // until peers are synced in site replication setup. 4323 MarkDelete SRBucketDeleteOp = "MarkDelete" 4324 4325 // Purge deletes the .minio.sys/buckets/.deleted/<bucket> vol entry 4326 Purge SRBucketDeleteOp = "Purge" 4327 // NoOp no action needed 4328 NoOp SRBucketDeleteOp = "NoOp" 4329 ) 4330 4331 // Empty returns true if this Op is not set 4332 func (s SRBucketDeleteOp) Empty() bool { 4333 return string(s) == "" || string(s) == string(NoOp) 4334 } 4335 4336 func getSRBucketDeleteOp(isSiteReplicated bool) SRBucketDeleteOp { 4337 if !isSiteReplicated { 4338 return NoOp 4339 } 4340 return MarkDelete 4341 } 4342 4343 func (c *SiteReplicationSys) healILMExpiryConfig(ctx context.Context, objAPI ObjectLayer, info srStatusInfo) error { 4344 c.RLock() 4345 defer c.RUnlock() 4346 if !c.enabled { 4347 return nil 4348 } 4349 var ( 4350 latestID, latestPeerName string 4351 lastUpdate time.Time 4352 latestPeers map[string]madmin.PeerInfo 4353 ) 4354 4355 for dID, ps := range info.PeerStates { 4356 if lastUpdate.IsZero() { 4357 lastUpdate = ps.UpdatedAt 4358 latestID = dID 4359 latestPeers = ps.Peers 4360 } 4361 if ps.UpdatedAt.After(lastUpdate) { 4362 lastUpdate = ps.UpdatedAt 4363 latestID = dID 4364 latestPeers = ps.Peers 4365 } 4366 } 4367 latestPeerName = info.Sites[latestID].Name 4368 4369 for dID, ps := range info.PeerStates { 4370 // If latest peers ILM expiry flags are equal to current peer, no need to heal 4371 flagEqual := true 4372 for id, peer := range latestPeers { 4373 if !(ps.Peers[id].ReplicateILMExpiry == peer.ReplicateILMExpiry) { 4374 flagEqual = false 4375 break 4376 } 4377 } 4378 if flagEqual { 4379 continue 4380 } 4381 4382 // Dont apply the self state to self 4383 if dID == globalDeploymentID() { 4384 continue 4385 } 4386 4387 // Send details to other sites for healing 4388 admClient, err := c.getAdminClient(ctx, dID) 4389 if err != nil { 4390 return wrapSRErr(err) 4391 } 4392 if err = admClient.SRStateEdit(ctx, madmin.SRStateEditReq{Peers: latestPeers, UpdatedAt: lastUpdate}); err != nil { 4393 logger.LogIf(ctx, c.annotatePeerErr(ps.Name, siteReplicationEdit, 4394 fmt.Errorf("Unable to heal site replication state for peer %s from peer %s : %w", 4395 ps.Name, latestPeerName, err))) 4396 } 4397 } 4398 return nil 4399 } 4400 4401 func (c *SiteReplicationSys) healBuckets(ctx context.Context, objAPI ObjectLayer) error { 4402 buckets, err := c.listBuckets(ctx) 4403 if err != nil { 4404 return err 4405 } 4406 ilmExpiryCfgHealed := false 4407 for _, bi := range buckets { 4408 bucket := bi.Name 4409 info, err := c.siteReplicationStatus(ctx, objAPI, madmin.SRStatusOptions{ 4410 Entity: madmin.SRBucketEntity, 4411 EntityValue: bucket, 4412 ShowDeleted: true, 4413 ILMExpiryRules: true, 4414 PeerState: true, 4415 }) 4416 if err != nil { 4417 return err 4418 } 4419 4420 c.healBucket(ctx, objAPI, bucket, info) 4421 4422 if bi.Deleted.IsZero() || (!bi.Created.IsZero() && bi.Deleted.Before(bi.Created)) { 4423 c.healVersioningMetadata(ctx, objAPI, bucket, info) 4424 c.healOLockConfigMetadata(ctx, objAPI, bucket, info) 4425 c.healSSEMetadata(ctx, objAPI, bucket, info) 4426 c.healBucketReplicationConfig(ctx, objAPI, bucket, info) 4427 c.healBucketPolicies(ctx, objAPI, bucket, info) 4428 c.healTagMetadata(ctx, objAPI, bucket, info) 4429 c.healBucketQuotaConfig(ctx, objAPI, bucket, info) 4430 if !ilmExpiryCfgHealed { 4431 c.healILMExpiryConfig(ctx, objAPI, info) 4432 ilmExpiryCfgHealed = true 4433 } 4434 if ilmExpiryReplicationEnabled(c.state.Peers) { 4435 c.healBucketILMExpiry(ctx, objAPI, bucket, info) 4436 } 4437 } 4438 // Notification and ILM are site specific settings. 4439 } 4440 return nil 4441 } 4442 4443 func (c *SiteReplicationSys) healBucketILMExpiry(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4444 bs := info.BucketStats[bucket] 4445 4446 c.RLock() 4447 defer c.RUnlock() 4448 if !c.enabled { 4449 return nil 4450 } 4451 4452 var ( 4453 latestID, latestPeerName string 4454 lastUpdate time.Time 4455 latestExpLCConfig *string 4456 ) 4457 4458 for dID, ss := range bs { 4459 if lastUpdate.IsZero() { 4460 lastUpdate = ss.meta.ExpiryLCConfigUpdatedAt 4461 latestID = dID 4462 latestExpLCConfig = ss.meta.ExpiryLCConfig 4463 } 4464 // avoid considering just created buckets as latest. Perhaps this site 4465 // just joined cluster replication and yet to be sync'd 4466 if ss.meta.CreatedAt.Equal(ss.meta.ExpiryLCConfigUpdatedAt) { 4467 continue 4468 } 4469 if ss.meta.ExpiryLCConfigUpdatedAt.After(lastUpdate) { 4470 lastUpdate = ss.meta.ExpiryLCConfigUpdatedAt 4471 latestID = dID 4472 latestExpLCConfig = ss.meta.ExpiryLCConfig 4473 } 4474 } 4475 latestPeerName = info.Sites[latestID].Name 4476 var err error 4477 if latestExpLCConfig != nil { 4478 _, err = base64.StdEncoding.DecodeString(*latestExpLCConfig) 4479 if err != nil { 4480 return err 4481 } 4482 } 4483 4484 for dID, bStatus := range bs { 4485 if latestExpLCConfig != nil && bStatus.meta.ExpiryLCConfig != nil && strings.EqualFold(*latestExpLCConfig, *bStatus.meta.ExpiryLCConfig) { 4486 continue 4487 } 4488 4489 finalConfigData, err := mergeWithCurrentLCConfig(ctx, bucket, latestExpLCConfig, lastUpdate) 4490 if err != nil { 4491 return wrapSRErr(err) 4492 } 4493 4494 if dID == globalDeploymentID() { 4495 if _, err := globalBucketMetadataSys.Update(ctx, bucket, bucketLifecycleConfig, finalConfigData); err != nil { 4496 logger.LogIf(ctx, fmt.Errorf("Unable to heal bucket ILM expiry data from peer site %s : %w", latestPeerName, err)) 4497 } 4498 continue 4499 } 4500 4501 admClient, err := c.getAdminClient(ctx, dID) 4502 if err != nil { 4503 return wrapSRErr(err) 4504 } 4505 peerName := info.Sites[dID].Name 4506 if err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4507 Type: madmin.SRBucketMetaLCConfig, 4508 Bucket: bucket, 4509 ExpiryLCConfig: latestExpLCConfig, 4510 UpdatedAt: lastUpdate, 4511 }); err != nil { 4512 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4513 fmt.Errorf("Unable to heal bucket ILM expiry data for peer %s from peer %s : %w", 4514 peerName, latestPeerName, err))) 4515 } 4516 } 4517 return nil 4518 } 4519 4520 func (c *SiteReplicationSys) healTagMetadata(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4521 bs := info.BucketStats[bucket] 4522 4523 c.RLock() 4524 defer c.RUnlock() 4525 if !c.enabled { 4526 return nil 4527 } 4528 var ( 4529 latestID, latestPeerName string 4530 lastUpdate time.Time 4531 latestTaggingConfig *string 4532 ) 4533 4534 for dID, ss := range bs { 4535 if lastUpdate.IsZero() { 4536 lastUpdate = ss.meta.TagConfigUpdatedAt 4537 latestID = dID 4538 latestTaggingConfig = ss.meta.Tags 4539 } 4540 // avoid considering just created buckets as latest. Perhaps this site 4541 // just joined cluster replication and yet to be sync'd 4542 if ss.meta.CreatedAt.Equal(ss.meta.TagConfigUpdatedAt) { 4543 continue 4544 } 4545 if ss.meta.TagConfigUpdatedAt.After(lastUpdate) { 4546 lastUpdate = ss.meta.TagConfigUpdatedAt 4547 latestID = dID 4548 latestTaggingConfig = ss.meta.Tags 4549 } 4550 } 4551 latestPeerName = info.Sites[latestID].Name 4552 var latestTaggingConfigBytes []byte 4553 var err error 4554 if latestTaggingConfig != nil { 4555 latestTaggingConfigBytes, err = base64.StdEncoding.DecodeString(*latestTaggingConfig) 4556 if err != nil { 4557 return err 4558 } 4559 } 4560 for dID, bStatus := range bs { 4561 if !bStatus.TagMismatch { 4562 continue 4563 } 4564 if isBucketMetadataEqual(latestTaggingConfig, bStatus.meta.Tags) { 4565 continue 4566 } 4567 if dID == globalDeploymentID() { 4568 if _, err := globalBucketMetadataSys.Update(ctx, bucket, bucketTaggingConfig, latestTaggingConfigBytes); err != nil { 4569 logger.LogIf(ctx, fmt.Errorf("Unable to heal tagging metadata from peer site %s : %w", latestPeerName, err)) 4570 } 4571 continue 4572 } 4573 4574 admClient, err := c.getAdminClient(ctx, dID) 4575 if err != nil { 4576 return wrapSRErr(err) 4577 } 4578 peerName := info.Sites[dID].Name 4579 err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4580 Type: madmin.SRBucketMetaTypeTags, 4581 Bucket: bucket, 4582 Tags: latestTaggingConfig, 4583 }) 4584 if err != nil { 4585 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4586 fmt.Errorf("Unable to heal tagging metadata for peer %s from peer %s : %w", peerName, latestPeerName, err))) 4587 } 4588 } 4589 return nil 4590 } 4591 4592 func (c *SiteReplicationSys) healBucketPolicies(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4593 bs := info.BucketStats[bucket] 4594 4595 c.RLock() 4596 defer c.RUnlock() 4597 if !c.enabled { 4598 return nil 4599 } 4600 var ( 4601 latestID, latestPeerName string 4602 lastUpdate time.Time 4603 latestIAMPolicy json.RawMessage 4604 ) 4605 4606 for dID, ss := range bs { 4607 if lastUpdate.IsZero() { 4608 lastUpdate = ss.meta.PolicyUpdatedAt 4609 latestID = dID 4610 latestIAMPolicy = ss.meta.Policy 4611 } 4612 // avoid considering just created buckets as latest. Perhaps this site 4613 // just joined cluster replication and yet to be sync'd 4614 if ss.meta.CreatedAt.Equal(ss.meta.PolicyUpdatedAt) { 4615 continue 4616 } 4617 if ss.meta.PolicyUpdatedAt.After(lastUpdate) { 4618 lastUpdate = ss.meta.PolicyUpdatedAt 4619 latestID = dID 4620 latestIAMPolicy = ss.meta.Policy 4621 } 4622 } 4623 latestPeerName = info.Sites[latestID].Name 4624 for dID, bStatus := range bs { 4625 if !bStatus.PolicyMismatch { 4626 continue 4627 } 4628 if strings.EqualFold(string(latestIAMPolicy), string(bStatus.meta.Policy)) { 4629 continue 4630 } 4631 if dID == globalDeploymentID() { 4632 if _, err := globalBucketMetadataSys.Update(ctx, bucket, bucketPolicyConfig, latestIAMPolicy); err != nil { 4633 logger.LogIf(ctx, fmt.Errorf("Unable to heal bucket policy metadata from peer site %s : %w", latestPeerName, err)) 4634 } 4635 continue 4636 } 4637 4638 admClient, err := c.getAdminClient(ctx, dID) 4639 if err != nil { 4640 return wrapSRErr(err) 4641 } 4642 peerName := info.Sites[dID].Name 4643 if err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4644 Type: madmin.SRBucketMetaTypePolicy, 4645 Bucket: bucket, 4646 Policy: latestIAMPolicy, 4647 UpdatedAt: lastUpdate, 4648 }); err != nil { 4649 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4650 fmt.Errorf("Unable to heal bucket policy metadata for peer %s from peer %s : %w", 4651 peerName, latestPeerName, err))) 4652 } 4653 } 4654 return nil 4655 } 4656 4657 func (c *SiteReplicationSys) healBucketQuotaConfig(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4658 bs := info.BucketStats[bucket] 4659 4660 c.RLock() 4661 defer c.RUnlock() 4662 if !c.enabled { 4663 return nil 4664 } 4665 var ( 4666 latestID, latestPeerName string 4667 lastUpdate time.Time 4668 latestQuotaConfig *string 4669 latestQuotaConfigBytes []byte 4670 ) 4671 4672 for dID, ss := range bs { 4673 if lastUpdate.IsZero() { 4674 lastUpdate = ss.meta.QuotaConfigUpdatedAt 4675 latestID = dID 4676 latestQuotaConfig = ss.meta.QuotaConfig 4677 } 4678 // avoid considering just created buckets as latest. Perhaps this site 4679 // just joined cluster replication and yet to be sync'd 4680 if ss.meta.CreatedAt.Equal(ss.meta.QuotaConfigUpdatedAt) { 4681 continue 4682 } 4683 if ss.meta.QuotaConfigUpdatedAt.After(lastUpdate) { 4684 lastUpdate = ss.meta.QuotaConfigUpdatedAt 4685 latestID = dID 4686 latestQuotaConfig = ss.meta.QuotaConfig 4687 } 4688 } 4689 4690 var err error 4691 if latestQuotaConfig != nil { 4692 latestQuotaConfigBytes, err = base64.StdEncoding.DecodeString(*latestQuotaConfig) 4693 if err != nil { 4694 return err 4695 } 4696 } 4697 4698 latestPeerName = info.Sites[latestID].Name 4699 for dID, bStatus := range bs { 4700 if !bStatus.QuotaCfgMismatch { 4701 continue 4702 } 4703 if isBucketMetadataEqual(latestQuotaConfig, bStatus.meta.QuotaConfig) { 4704 continue 4705 } 4706 if dID == globalDeploymentID() { 4707 if _, err := globalBucketMetadataSys.Update(ctx, bucket, bucketQuotaConfigFile, latestQuotaConfigBytes); err != nil { 4708 logger.LogIf(ctx, fmt.Errorf("Unable to heal quota metadata from peer site %s : %w", latestPeerName, err)) 4709 } 4710 continue 4711 } 4712 4713 admClient, err := c.getAdminClient(ctx, dID) 4714 if err != nil { 4715 return wrapSRErr(err) 4716 } 4717 peerName := info.Sites[dID].Name 4718 4719 if err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4720 Type: madmin.SRBucketMetaTypeQuotaConfig, 4721 Bucket: bucket, 4722 Quota: latestQuotaConfigBytes, 4723 UpdatedAt: lastUpdate, 4724 }); err != nil { 4725 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4726 fmt.Errorf("Unable to heal quota config metadata for peer %s from peer %s : %w", 4727 peerName, latestPeerName, err))) 4728 } 4729 } 4730 return nil 4731 } 4732 4733 func (c *SiteReplicationSys) healVersioningMetadata(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4734 c.RLock() 4735 defer c.RUnlock() 4736 if !c.enabled { 4737 return nil 4738 } 4739 var ( 4740 latestID, latestPeerName string 4741 lastUpdate time.Time 4742 latestVersioningConfig *string 4743 ) 4744 4745 bs := info.BucketStats[bucket] 4746 for dID, ss := range bs { 4747 if lastUpdate.IsZero() { 4748 lastUpdate = ss.meta.VersioningConfigUpdatedAt 4749 latestID = dID 4750 latestVersioningConfig = ss.meta.Versioning 4751 } 4752 // avoid considering just created buckets as latest. Perhaps this site 4753 // just joined cluster replication and yet to be sync'd 4754 if ss.meta.CreatedAt.Equal(ss.meta.VersioningConfigUpdatedAt) { 4755 continue 4756 } 4757 if ss.meta.VersioningConfigUpdatedAt.After(lastUpdate) { 4758 lastUpdate = ss.meta.VersioningConfigUpdatedAt 4759 latestID = dID 4760 latestVersioningConfig = ss.meta.Versioning 4761 } 4762 } 4763 4764 latestPeerName = info.Sites[latestID].Name 4765 var latestVersioningConfigBytes []byte 4766 var err error 4767 if latestVersioningConfig != nil { 4768 latestVersioningConfigBytes, err = base64.StdEncoding.DecodeString(*latestVersioningConfig) 4769 if err != nil { 4770 return err 4771 } 4772 } 4773 4774 for dID, bStatus := range bs { 4775 if !bStatus.VersioningConfigMismatch { 4776 continue 4777 } 4778 if isBucketMetadataEqual(latestVersioningConfig, bStatus.meta.Versioning) { 4779 continue 4780 } 4781 if dID == globalDeploymentID() { 4782 if _, err := globalBucketMetadataSys.Update(ctx, bucket, bucketVersioningConfig, latestVersioningConfigBytes); err != nil { 4783 logger.LogIf(ctx, fmt.Errorf("Unable to heal versioning metadata from peer site %s : %w", latestPeerName, err)) 4784 } 4785 continue 4786 } 4787 4788 admClient, err := c.getAdminClient(ctx, dID) 4789 if err != nil { 4790 return wrapSRErr(err) 4791 } 4792 peerName := info.Sites[dID].Name 4793 err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4794 Type: madmin.SRBucketMetaTypeVersionConfig, 4795 Bucket: bucket, 4796 Versioning: latestVersioningConfig, 4797 UpdatedAt: lastUpdate, 4798 }) 4799 if err != nil { 4800 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4801 fmt.Errorf("Unable to heal versioning config metadata for peer %s from peer %s : %w", 4802 peerName, latestPeerName, err))) 4803 } 4804 } 4805 return nil 4806 } 4807 4808 func (c *SiteReplicationSys) healSSEMetadata(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4809 c.RLock() 4810 defer c.RUnlock() 4811 if !c.enabled { 4812 return nil 4813 } 4814 var ( 4815 latestID, latestPeerName string 4816 lastUpdate time.Time 4817 latestSSEConfig *string 4818 ) 4819 4820 bs := info.BucketStats[bucket] 4821 for dID, ss := range bs { 4822 if lastUpdate.IsZero() { 4823 lastUpdate = ss.meta.SSEConfigUpdatedAt 4824 latestID = dID 4825 latestSSEConfig = ss.meta.SSEConfig 4826 } 4827 // avoid considering just created buckets as latest. Perhaps this site 4828 // just joined cluster replication and yet to be sync'd 4829 if ss.meta.CreatedAt.Equal(ss.meta.SSEConfigUpdatedAt) { 4830 continue 4831 } 4832 if ss.meta.SSEConfigUpdatedAt.After(lastUpdate) { 4833 lastUpdate = ss.meta.SSEConfigUpdatedAt 4834 latestID = dID 4835 latestSSEConfig = ss.meta.SSEConfig 4836 } 4837 } 4838 4839 latestPeerName = info.Sites[latestID].Name 4840 var latestSSEConfigBytes []byte 4841 var err error 4842 if latestSSEConfig != nil { 4843 latestSSEConfigBytes, err = base64.StdEncoding.DecodeString(*latestSSEConfig) 4844 if err != nil { 4845 return err 4846 } 4847 } 4848 4849 for dID, bStatus := range bs { 4850 if !bStatus.SSEConfigMismatch { 4851 continue 4852 } 4853 if isBucketMetadataEqual(latestSSEConfig, bStatus.meta.SSEConfig) { 4854 continue 4855 } 4856 if dID == globalDeploymentID() { 4857 if _, err := globalBucketMetadataSys.Update(ctx, bucket, bucketSSEConfig, latestSSEConfigBytes); err != nil { 4858 logger.LogIf(ctx, fmt.Errorf("Unable to heal sse metadata from peer site %s : %w", latestPeerName, err)) 4859 } 4860 continue 4861 } 4862 4863 admClient, err := c.getAdminClient(ctx, dID) 4864 if err != nil { 4865 return wrapSRErr(err) 4866 } 4867 peerName := info.Sites[dID].Name 4868 err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4869 Type: madmin.SRBucketMetaTypeSSEConfig, 4870 Bucket: bucket, 4871 SSEConfig: latestSSEConfig, 4872 UpdatedAt: lastUpdate, 4873 }) 4874 if err != nil { 4875 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4876 fmt.Errorf("Unable to heal SSE config metadata for peer %s from peer %s : %w", 4877 peerName, latestPeerName, err))) 4878 } 4879 } 4880 return nil 4881 } 4882 4883 func (c *SiteReplicationSys) healOLockConfigMetadata(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4884 bs := info.BucketStats[bucket] 4885 4886 c.RLock() 4887 defer c.RUnlock() 4888 if !c.enabled { 4889 return nil 4890 } 4891 var ( 4892 latestID, latestPeerName string 4893 lastUpdate time.Time 4894 latestObjLockConfig *string 4895 ) 4896 4897 for dID, ss := range bs { 4898 if lastUpdate.IsZero() { 4899 lastUpdate = ss.meta.ObjectLockConfigUpdatedAt 4900 latestID = dID 4901 latestObjLockConfig = ss.meta.ObjectLockConfig 4902 } 4903 // avoid considering just created buckets as latest. Perhaps this site 4904 // just joined cluster replication and yet to be sync'd 4905 if ss.meta.CreatedAt.Equal(ss.meta.ObjectLockConfigUpdatedAt) { 4906 continue 4907 } 4908 if ss.meta.ObjectLockConfig != nil && ss.meta.ObjectLockConfigUpdatedAt.After(lastUpdate) { 4909 lastUpdate = ss.meta.ObjectLockConfigUpdatedAt 4910 latestID = dID 4911 latestObjLockConfig = ss.meta.ObjectLockConfig 4912 } 4913 } 4914 latestPeerName = info.Sites[latestID].Name 4915 var latestObjLockConfigBytes []byte 4916 var err error 4917 if latestObjLockConfig != nil { 4918 latestObjLockConfigBytes, err = base64.StdEncoding.DecodeString(*latestObjLockConfig) 4919 if err != nil { 4920 return err 4921 } 4922 } 4923 4924 for dID, bStatus := range bs { 4925 if !bStatus.OLockConfigMismatch { 4926 continue 4927 } 4928 if isBucketMetadataEqual(latestObjLockConfig, bStatus.meta.ObjectLockConfig) { 4929 continue 4930 } 4931 if dID == globalDeploymentID() { 4932 if _, err := globalBucketMetadataSys.Update(ctx, bucket, objectLockConfig, latestObjLockConfigBytes); err != nil { 4933 logger.LogIf(ctx, fmt.Errorf("Unable to heal objectlock config metadata from peer site %s : %w", latestPeerName, err)) 4934 } 4935 continue 4936 } 4937 4938 admClient, err := c.getAdminClient(ctx, dID) 4939 if err != nil { 4940 return wrapSRErr(err) 4941 } 4942 peerName := info.Sites[dID].Name 4943 err = admClient.SRPeerReplicateBucketMeta(ctx, madmin.SRBucketMeta{ 4944 Type: madmin.SRBucketMetaTypeObjectLockConfig, 4945 Bucket: bucket, 4946 Tags: latestObjLockConfig, 4947 UpdatedAt: lastUpdate, 4948 }) 4949 if err != nil { 4950 logger.LogIf(ctx, c.annotatePeerErr(peerName, replicateBucketMetadata, 4951 fmt.Errorf("Unable to heal object lock config metadata for peer %s from peer %s : %w", 4952 peerName, latestPeerName, err))) 4953 } 4954 } 4955 return nil 4956 } 4957 4958 func (c *SiteReplicationSys) purgeDeletedBucket(ctx context.Context, objAPI ObjectLayer, bucket string) { 4959 z, ok := objAPI.(*erasureServerPools) 4960 if !ok { 4961 return 4962 } 4963 z.s3Peer.DeleteBucket(context.Background(), pathJoin(minioMetaBucket, bucketMetaPrefix, deletedBucketsPrefix, bucket), DeleteBucketOptions{}) 4964 } 4965 4966 // healBucket creates/deletes the bucket according to latest state across clusters participating in site replication. 4967 func (c *SiteReplicationSys) healBucket(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 4968 bs := info.BucketStats[bucket] 4969 c.RLock() 4970 defer c.RUnlock() 4971 if !c.enabled { 4972 return nil 4973 } 4974 numSites := len(c.state.Peers) 4975 mostRecent := func(d1, d2 time.Time) time.Time { 4976 if d1.IsZero() { 4977 return d2 4978 } 4979 if d2.IsZero() { 4980 return d1 4981 } 4982 if d1.After(d2) { 4983 return d1 4984 } 4985 return d2 4986 } 4987 4988 var ( 4989 latestID string 4990 lastUpdate time.Time 4991 withB []string 4992 missingB []string 4993 deletedCnt int 4994 ) 4995 for dID, ss := range bs { 4996 if lastUpdate.IsZero() { 4997 lastUpdate = mostRecent(ss.meta.CreatedAt, ss.meta.DeletedAt) 4998 latestID = dID 4999 } 5000 recentUpdt := mostRecent(ss.meta.CreatedAt, ss.meta.DeletedAt) 5001 if recentUpdt.After(lastUpdate) { 5002 lastUpdate = recentUpdt 5003 latestID = dID 5004 } 5005 if ss.BucketMarkedDeleted { 5006 deletedCnt++ 5007 } 5008 if ss.HasBucket { 5009 withB = append(withB, dID) 5010 } else { 5011 missingB = append(missingB, dID) 5012 } 5013 } 5014 5015 latestPeerName := info.Sites[latestID].Name 5016 bStatus := info.BucketStats[bucket][latestID].meta 5017 isMakeBucket := len(missingB) > 0 5018 deleteOp := NoOp 5019 if latestID != globalDeploymentID() { 5020 return nil 5021 } 5022 if lastUpdate.Equal(bStatus.DeletedAt) { 5023 isMakeBucket = false 5024 switch { 5025 case len(withB) == numSites && deletedCnt == numSites: 5026 deleteOp = NoOp 5027 case len(withB) == 0 && len(missingB) == numSites: 5028 deleteOp = Purge 5029 default: 5030 deleteOp = MarkDelete 5031 } 5032 } 5033 if isMakeBucket { 5034 var opts MakeBucketOptions 5035 optsMap := make(map[string]string) 5036 optsMap["versioningEnabled"] = "true" 5037 opts.VersioningEnabled = true 5038 opts.CreatedAt = bStatus.CreatedAt 5039 optsMap["createdAt"] = bStatus.CreatedAt.UTC().Format(time.RFC3339Nano) 5040 5041 if bStatus.ObjectLockConfig != nil { 5042 config, err := base64.StdEncoding.DecodeString(*bStatus.ObjectLockConfig) 5043 if err != nil { 5044 return err 5045 } 5046 if bytes.Equal([]byte(string(config)), enabledBucketObjectLockConfig) { 5047 optsMap["lockEnabled"] = "true" 5048 opts.LockEnabled = true 5049 } 5050 } 5051 for _, dID := range missingB { 5052 peerName := info.Sites[dID].Name 5053 if dID == globalDeploymentID() { 5054 err := c.PeerBucketMakeWithVersioningHandler(ctx, bucket, opts) 5055 if err != nil { 5056 return c.annotateErr(makeBucketWithVersion, fmt.Errorf("error healing bucket for site replication %w from %s -> %s", 5057 err, latestPeerName, peerName)) 5058 } 5059 } else { 5060 admClient, err := c.getAdminClient(ctx, dID) 5061 if err != nil { 5062 return c.annotateErr(configureReplication, fmt.Errorf("unable to use admin client for %s: %w", dID, err)) 5063 } 5064 if err = admClient.SRPeerBucketOps(ctx, bucket, madmin.MakeWithVersioningBktOp, optsMap); err != nil { 5065 return c.annotatePeerErr(peerName, makeBucketWithVersion, err) 5066 } 5067 if err = admClient.SRPeerBucketOps(ctx, bucket, madmin.ConfigureReplBktOp, nil); err != nil { 5068 return c.annotatePeerErr(peerName, configureReplication, err) 5069 } 5070 } 5071 } 5072 if len(missingB) > 0 { 5073 // configure replication from current cluster to other clusters 5074 err := c.PeerBucketConfigureReplHandler(ctx, bucket) 5075 if err != nil { 5076 return c.annotateErr(configureReplication, err) 5077 } 5078 } 5079 return nil 5080 } 5081 // all buckets are marked deleted across sites at this point. It should be safe to purge the .minio.sys/buckets/.deleted/<bucket> entry 5082 // from disk 5083 if deleteOp == Purge { 5084 for _, dID := range missingB { 5085 peerName := info.Sites[dID].Name 5086 if dID == globalDeploymentID() { 5087 c.purgeDeletedBucket(ctx, objAPI, bucket) 5088 } else { 5089 admClient, err := c.getAdminClient(ctx, dID) 5090 if err != nil { 5091 return c.annotateErr(configureReplication, fmt.Errorf("unable to use admin client for %s: %w", dID, err)) 5092 } 5093 if err = admClient.SRPeerBucketOps(ctx, bucket, madmin.PurgeDeletedBucketOp, nil); err != nil { 5094 return c.annotatePeerErr(peerName, deleteBucket, err) 5095 } 5096 } 5097 } 5098 } 5099 // Mark buckets deleted on remaining peers 5100 if deleteOp == MarkDelete { 5101 for _, dID := range withB { 5102 peerName := info.Sites[dID].Name 5103 if dID == globalDeploymentID() { 5104 err := c.PeerBucketDeleteHandler(ctx, bucket, DeleteBucketOptions{ 5105 Force: true, 5106 }) 5107 if err != nil { 5108 return c.annotateErr(deleteBucket, fmt.Errorf("error healing bucket for site replication %w from %s -> %s", 5109 err, latestPeerName, peerName)) 5110 } 5111 } else { 5112 admClient, err := c.getAdminClient(ctx, dID) 5113 if err != nil { 5114 return c.annotateErr(configureReplication, fmt.Errorf("unable to use admin client for %s: %w", dID, err)) 5115 } 5116 if err = admClient.SRPeerBucketOps(ctx, bucket, madmin.ForceDeleteBucketBktOp, nil); err != nil { 5117 return c.annotatePeerErr(peerName, deleteBucket, err) 5118 } 5119 } 5120 } 5121 } 5122 5123 return nil 5124 } 5125 5126 func (c *SiteReplicationSys) healBucketReplicationConfig(ctx context.Context, objAPI ObjectLayer, bucket string, info srStatusInfo) error { 5127 bs := info.BucketStats[bucket] 5128 5129 c.RLock() 5130 defer c.RUnlock() 5131 if !c.enabled { 5132 return nil 5133 } 5134 5135 var replMismatch bool 5136 for _, ss := range bs { 5137 if ss.ReplicationCfgMismatch { 5138 replMismatch = true 5139 break 5140 } 5141 } 5142 rcfg, _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, bucket) 5143 if err != nil { 5144 _, ok := err.(BucketReplicationConfigNotFound) 5145 if !ok { 5146 return err 5147 } 5148 replMismatch = true 5149 } 5150 5151 var ( 5152 epDeplIDMap = make(map[string]string) 5153 arnTgtMap = make(map[string]madmin.BucketTarget) 5154 ) 5155 if targetsPtr, _ := globalBucketTargetSys.ListBucketTargets(ctx, bucket); targetsPtr != nil { 5156 for _, t := range targetsPtr.Targets { 5157 arnTgtMap[t.Arn] = t 5158 } 5159 } 5160 for _, p := range c.state.Peers { 5161 epDeplIDMap[p.Endpoint] = p.DeploymentID 5162 } 5163 // fix stale ARN's in replication config and endpoint mismatch between site config and 5164 // targets associated to this config. 5165 if rcfg != nil { 5166 for _, rule := range rcfg.Rules { 5167 if rule.Status != sreplication.Status(replication.Disabled) { 5168 tgt, isValidARN := arnTgtMap[rule.Destination.ARN] // detect stale ARN in replication config 5169 _, epFound := epDeplIDMap[tgt.URL().String()] // detect end point change at site level 5170 if !isValidARN || !epFound { 5171 replMismatch = true 5172 break 5173 } 5174 } 5175 } 5176 } 5177 5178 if rcfg != nil && !replMismatch { 5179 // validate remote targets on current cluster for this bucket 5180 _, apiErr := validateReplicationDestination(ctx, bucket, rcfg, false) 5181 if apiErr != noError { 5182 replMismatch = true 5183 } 5184 } 5185 5186 if replMismatch { 5187 logger.LogIf(ctx, c.annotateErr(configureReplication, c.PeerBucketConfigureReplHandler(ctx, bucket))) 5188 } 5189 return nil 5190 } 5191 5192 func isBucketMetadataEqual(one, two *string) bool { 5193 switch { 5194 case one == nil && two == nil: 5195 return true 5196 case one == nil || two == nil: 5197 return false 5198 default: 5199 return strings.EqualFold(*one, *two) 5200 } 5201 } 5202 5203 func (c *SiteReplicationSys) healIAMSystem(ctx context.Context, objAPI ObjectLayer) error { 5204 info, err := c.siteReplicationStatus(ctx, objAPI, madmin.SRStatusOptions{ 5205 Users: true, 5206 Policies: true, 5207 Groups: true, 5208 }) 5209 if err != nil { 5210 return err 5211 } 5212 for policy := range info.PolicyStats { 5213 c.healPolicies(ctx, objAPI, policy, info) 5214 } 5215 for user := range info.UserStats { 5216 c.healUsers(ctx, objAPI, user, info) 5217 } 5218 for group := range info.GroupStats { 5219 c.healGroups(ctx, objAPI, group, info) 5220 } 5221 for user := range info.UserStats { 5222 c.healUserPolicies(ctx, objAPI, user, info) 5223 } 5224 for group := range info.GroupStats { 5225 c.healGroupPolicies(ctx, objAPI, group, info) 5226 } 5227 5228 return nil 5229 } 5230 5231 // heal iam policies present on this site to peers, provided current cluster has the most recent update. 5232 func (c *SiteReplicationSys) healPolicies(ctx context.Context, objAPI ObjectLayer, policy string, info srStatusInfo) error { 5233 // create IAM policy on peer cluster if missing 5234 ps := info.PolicyStats[policy] 5235 5236 c.RLock() 5237 defer c.RUnlock() 5238 if !c.enabled { 5239 return nil 5240 } 5241 5242 var ( 5243 latestID, latestPeerName string 5244 lastUpdate time.Time 5245 latestPolicyStat srPolicyStatsSummary 5246 ) 5247 for dID, ss := range ps { 5248 if lastUpdate.IsZero() { 5249 lastUpdate = ss.policy.UpdatedAt 5250 latestID = dID 5251 latestPolicyStat = ss 5252 } 5253 if !ss.policy.UpdatedAt.IsZero() && ss.policy.UpdatedAt.After(lastUpdate) { 5254 lastUpdate = ss.policy.UpdatedAt 5255 latestID = dID 5256 latestPolicyStat = ss 5257 } 5258 } 5259 if latestID != globalDeploymentID() { 5260 // heal only from the site with latest info. 5261 return nil 5262 } 5263 latestPeerName = info.Sites[latestID].Name 5264 // heal policy of peers if peer does not have it. 5265 for dID, pStatus := range ps { 5266 if dID == globalDeploymentID() { 5267 continue 5268 } 5269 if !pStatus.PolicyMismatch && pStatus.HasPolicy { 5270 continue 5271 } 5272 peerName := info.Sites[dID].Name 5273 err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5274 Type: madmin.SRIAMItemPolicy, 5275 Name: policy, 5276 Policy: latestPolicyStat.policy.Policy, 5277 UpdatedAt: lastUpdate, 5278 }) 5279 if err != nil { 5280 logger.LogIf(ctx, fmt.Errorf("Unable to heal IAM policy %s from peer site %s -> site %s : %w", policy, latestPeerName, peerName, err)) 5281 } 5282 } 5283 return nil 5284 } 5285 5286 // heal user policy mappings present on this site to peers, provided current cluster has the most recent update. 5287 func (c *SiteReplicationSys) healUserPolicies(ctx context.Context, objAPI ObjectLayer, user string, info srStatusInfo) error { 5288 // create user policy mapping on peer cluster if missing 5289 us := info.UserStats[user] 5290 5291 c.RLock() 5292 defer c.RUnlock() 5293 if !c.enabled { 5294 return nil 5295 } 5296 var ( 5297 latestID, latestPeerName string 5298 lastUpdate time.Time 5299 latestUserStat srUserStatsSummary 5300 ) 5301 for dID, ss := range us { 5302 if lastUpdate.IsZero() { 5303 lastUpdate = ss.userPolicy.UpdatedAt 5304 latestID = dID 5305 latestUserStat = ss 5306 } 5307 if !ss.userPolicy.UpdatedAt.IsZero() && ss.userPolicy.UpdatedAt.After(lastUpdate) { 5308 lastUpdate = ss.userPolicy.UpdatedAt 5309 latestID = dID 5310 latestUserStat = ss 5311 } 5312 } 5313 if latestID != globalDeploymentID() { 5314 // heal only from the site with latest info. 5315 return nil 5316 } 5317 latestPeerName = info.Sites[latestID].Name 5318 // heal policy of peers if peer does not have it. 5319 for dID, pStatus := range us { 5320 if dID == globalDeploymentID() { 5321 continue 5322 } 5323 if !pStatus.PolicyMismatch && pStatus.HasPolicyMapping { 5324 continue 5325 } 5326 if isPolicyMappingEqual(pStatus.userPolicy, latestUserStat.userPolicy) { 5327 continue 5328 } 5329 peerName := info.Sites[dID].Name 5330 err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5331 Type: madmin.SRIAMItemPolicyMapping, 5332 PolicyMapping: &madmin.SRPolicyMapping{ 5333 UserOrGroup: user, 5334 IsGroup: false, 5335 UserType: latestUserStat.userPolicy.UserType, 5336 Policy: latestUserStat.userPolicy.Policy, 5337 }, 5338 UpdatedAt: lastUpdate, 5339 }) 5340 if err != nil { 5341 logger.LogIf(ctx, fmt.Errorf("Unable to heal IAM user policy mapping for %s from peer site %s -> site %s : %w", user, latestPeerName, peerName, err)) 5342 } 5343 } 5344 return nil 5345 } 5346 5347 // heal group policy mappings present on this site to peers, provided current cluster has the most recent update. 5348 func (c *SiteReplicationSys) healGroupPolicies(ctx context.Context, objAPI ObjectLayer, group string, info srStatusInfo) error { 5349 // create group policy mapping on peer cluster if missing 5350 gs := info.GroupStats[group] 5351 5352 c.RLock() 5353 defer c.RUnlock() 5354 if !c.enabled { 5355 return nil 5356 } 5357 5358 var ( 5359 latestID, latestPeerName string 5360 lastUpdate time.Time 5361 latestGroupStat srGroupStatsSummary 5362 ) 5363 for dID, ss := range gs { 5364 if lastUpdate.IsZero() { 5365 lastUpdate = ss.groupPolicy.UpdatedAt 5366 latestID = dID 5367 latestGroupStat = ss 5368 } 5369 if !ss.groupPolicy.UpdatedAt.IsZero() && ss.groupPolicy.UpdatedAt.After(lastUpdate) { 5370 lastUpdate = ss.groupPolicy.UpdatedAt 5371 latestID = dID 5372 latestGroupStat = ss 5373 } 5374 } 5375 if latestID != globalDeploymentID() { 5376 // heal only from the site with latest info. 5377 return nil 5378 } 5379 latestPeerName = info.Sites[latestID].Name 5380 // heal policy of peers if peer does not have it. 5381 for dID, pStatus := range gs { 5382 if dID == globalDeploymentID() { 5383 continue 5384 } 5385 if !pStatus.PolicyMismatch && pStatus.HasPolicyMapping { 5386 continue 5387 } 5388 if isPolicyMappingEqual(pStatus.groupPolicy, latestGroupStat.groupPolicy) { 5389 continue 5390 } 5391 peerName := info.Sites[dID].Name 5392 5393 err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5394 Type: madmin.SRIAMItemPolicyMapping, 5395 PolicyMapping: &madmin.SRPolicyMapping{ 5396 UserOrGroup: group, 5397 IsGroup: true, 5398 UserType: int(unknownIAMUserType), 5399 Policy: latestGroupStat.groupPolicy.Policy, 5400 }, 5401 UpdatedAt: lastUpdate, 5402 }) 5403 if err != nil { 5404 logger.LogIf(ctx, fmt.Errorf("Unable to heal IAM group policy mapping for %s from peer site %s -> site %s : %w", group, latestPeerName, peerName, err)) 5405 } 5406 } 5407 return nil 5408 } 5409 5410 // heal all users and their service accounts that are present on this site, 5411 // provided current cluster has the most recent update. 5412 func (c *SiteReplicationSys) healUsers(ctx context.Context, objAPI ObjectLayer, user string, info srStatusInfo) error { 5413 // create user if missing; fix user policy mapping if missing 5414 us := info.UserStats[user] 5415 5416 c.RLock() 5417 defer c.RUnlock() 5418 if !c.enabled { 5419 return nil 5420 } 5421 var ( 5422 latestID, latestPeerName string 5423 lastUpdate time.Time 5424 latestUserStat srUserStatsSummary 5425 ) 5426 for dID, ss := range us { 5427 if lastUpdate.IsZero() { 5428 lastUpdate = ss.userInfo.UserInfo.UpdatedAt 5429 latestID = dID 5430 latestUserStat = ss 5431 } 5432 if !ss.userInfo.UserInfo.UpdatedAt.IsZero() && ss.userInfo.UserInfo.UpdatedAt.After(lastUpdate) { 5433 lastUpdate = ss.userInfo.UserInfo.UpdatedAt 5434 latestID = dID 5435 latestUserStat = ss 5436 } 5437 } 5438 if latestID != globalDeploymentID() { 5439 // heal only from the site with latest info. 5440 return nil 5441 } 5442 latestPeerName = info.Sites[latestID].Name 5443 for dID, uStatus := range us { 5444 if dID == globalDeploymentID() { 5445 continue 5446 } 5447 if !uStatus.UserInfoMismatch { 5448 continue 5449 } 5450 5451 if isUserInfoEqual(latestUserStat.userInfo.UserInfo, uStatus.userInfo.UserInfo) { 5452 continue 5453 } 5454 5455 peerName := info.Sites[dID].Name 5456 5457 u, ok := globalIAMSys.GetUser(ctx, user) 5458 if !ok { 5459 continue 5460 } 5461 creds := u.Credentials 5462 if creds.IsServiceAccount() { 5463 claims, err := globalIAMSys.GetClaimsForSvcAcc(ctx, creds.AccessKey) 5464 if err != nil { 5465 logger.LogIf(ctx, fmt.Errorf("Unable to heal service account %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5466 continue 5467 } 5468 5469 _, policy, err := globalIAMSys.GetServiceAccount(ctx, creds.AccessKey) 5470 if err != nil { 5471 logger.LogIf(ctx, fmt.Errorf("Unable to heal service account %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5472 continue 5473 } 5474 5475 var policyJSON []byte 5476 if policy != nil { 5477 policyJSON, err = json.Marshal(policy) 5478 if err != nil { 5479 logger.LogIf(ctx, fmt.Errorf("Unable to heal service account %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5480 continue 5481 } 5482 } 5483 5484 if err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5485 Type: madmin.SRIAMItemSvcAcc, 5486 SvcAccChange: &madmin.SRSvcAccChange{ 5487 Create: &madmin.SRSvcAccCreate{ 5488 Parent: creds.ParentUser, 5489 AccessKey: creds.AccessKey, 5490 SecretKey: creds.SecretKey, 5491 Groups: creds.Groups, 5492 Claims: claims, 5493 SessionPolicy: json.RawMessage(policyJSON), 5494 Status: creds.Status, 5495 Name: creds.Name, 5496 Description: creds.Description, 5497 Expiration: &creds.Expiration, 5498 }, 5499 }, 5500 UpdatedAt: lastUpdate, 5501 }); err != nil { 5502 logger.LogIf(ctx, fmt.Errorf("Unable to heal service account %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5503 } 5504 continue 5505 } 5506 if creds.IsTemp() && !creds.IsExpired() { 5507 var parentPolicy string 5508 u, err := globalIAMSys.GetUserInfo(ctx, creds.ParentUser) 5509 if err != nil { 5510 // Parent may be "virtual" (for ldap, oidc, client tls auth, 5511 // custom auth plugin), so in such cases we apply no parent 5512 // policy. The session token will contain info about policy to 5513 // be applied. 5514 if !errors.Is(err, errNoSuchUser) { 5515 logger.LogIf(ctx, fmt.Errorf("Unable to heal temporary credentials %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5516 continue 5517 } 5518 } else { 5519 parentPolicy = u.PolicyName 5520 } 5521 // Call hook for site replication. 5522 if err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5523 Type: madmin.SRIAMItemSTSAcc, 5524 STSCredential: &madmin.SRSTSCredential{ 5525 AccessKey: creds.AccessKey, 5526 SecretKey: creds.SecretKey, 5527 SessionToken: creds.SessionToken, 5528 ParentUser: creds.ParentUser, 5529 ParentPolicyMapping: parentPolicy, 5530 }, 5531 UpdatedAt: lastUpdate, 5532 }); err != nil { 5533 logger.LogIf(ctx, fmt.Errorf("Unable to heal temporary credentials %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5534 } 5535 continue 5536 } 5537 if err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5538 Type: madmin.SRIAMItemIAMUser, 5539 IAMUser: &madmin.SRIAMUser{ 5540 AccessKey: user, 5541 IsDeleteReq: false, 5542 UserReq: &madmin.AddOrUpdateUserReq{ 5543 SecretKey: creds.SecretKey, 5544 Status: latestUserStat.userInfo.Status, 5545 }, 5546 }, 5547 UpdatedAt: lastUpdate, 5548 }); err != nil { 5549 logger.LogIf(ctx, fmt.Errorf("Unable to heal user %s from peer site %s -> %s : %w", user, latestPeerName, peerName, err)) 5550 } 5551 } 5552 return nil 5553 } 5554 5555 func (c *SiteReplicationSys) healGroups(ctx context.Context, objAPI ObjectLayer, group string, info srStatusInfo) error { 5556 c.RLock() 5557 defer c.RUnlock() 5558 if !c.enabled { 5559 return nil 5560 } 5561 5562 var ( 5563 latestID, latestPeerName string 5564 lastUpdate time.Time 5565 latestGroupStat srGroupStatsSummary 5566 ) 5567 // create group if missing; fix group policy mapping if missing 5568 gs, ok := info.GroupStats[group] 5569 if !ok { 5570 return nil 5571 } 5572 for dID, ss := range gs { 5573 if lastUpdate.IsZero() { 5574 lastUpdate = ss.groupDesc.UpdatedAt 5575 latestID = dID 5576 latestGroupStat = ss 5577 } 5578 if !ss.groupDesc.UpdatedAt.IsZero() && ss.groupDesc.UpdatedAt.After(lastUpdate) { 5579 lastUpdate = ss.groupDesc.UpdatedAt 5580 latestID = dID 5581 latestGroupStat = ss 5582 } 5583 } 5584 if latestID != globalDeploymentID() { 5585 // heal only from the site with latest info. 5586 return nil 5587 } 5588 latestPeerName = info.Sites[latestID].Name 5589 for dID, gStatus := range gs { 5590 if dID == globalDeploymentID() { 5591 continue 5592 } 5593 if !gStatus.GroupDescMismatch { 5594 continue 5595 } 5596 5597 if isGroupDescEqual(latestGroupStat.groupDesc.GroupDesc, gStatus.groupDesc.GroupDesc) { 5598 continue 5599 } 5600 peerName := info.Sites[dID].Name 5601 if err := c.IAMChangeHook(ctx, madmin.SRIAMItem{ 5602 Type: madmin.SRIAMItemGroupInfo, 5603 GroupInfo: &madmin.SRGroupInfo{ 5604 UpdateReq: madmin.GroupAddRemove{ 5605 Group: group, 5606 Status: madmin.GroupStatus(latestGroupStat.groupDesc.Status), 5607 Members: latestGroupStat.groupDesc.Members, 5608 IsRemove: false, 5609 }, 5610 }, 5611 UpdatedAt: lastUpdate, 5612 }); err != nil { 5613 logger.LogIf(ctx, fmt.Errorf("Unable to heal group %s from peer site %s -> site %s : %w", group, latestPeerName, peerName, err)) 5614 } 5615 } 5616 return nil 5617 } 5618 5619 func isGroupDescEqual(g1, g2 madmin.GroupDesc) bool { 5620 if g1.Name != g2.Name || 5621 g1.Status != g2.Status || 5622 g1.Policy != g2.Policy { 5623 return false 5624 } 5625 if len(g1.Members) != len(g2.Members) { 5626 return false 5627 } 5628 for _, v1 := range g1.Members { 5629 var found bool 5630 for _, v2 := range g2.Members { 5631 if v1 == v2 { 5632 found = true 5633 break 5634 } 5635 } 5636 if !found { 5637 return false 5638 } 5639 } 5640 return true 5641 } 5642 5643 func isUserInfoEqual(u1, u2 madmin.UserInfo) bool { 5644 if u1.PolicyName != u2.PolicyName || 5645 u1.Status != u2.Status || 5646 u1.SecretKey != u2.SecretKey { 5647 return false 5648 } 5649 for len(u1.MemberOf) != len(u2.MemberOf) { 5650 return false 5651 } 5652 for _, v1 := range u1.MemberOf { 5653 var found bool 5654 for _, v2 := range u2.MemberOf { 5655 if v1 == v2 { 5656 found = true 5657 break 5658 } 5659 } 5660 if !found { 5661 return false 5662 } 5663 } 5664 return true 5665 } 5666 5667 func isPolicyMappingEqual(p1, p2 srPolicyMapping) bool { 5668 return p1.Policy == p2.Policy && p1.IsGroup == p2.IsGroup && p1.UserOrGroup == p2.UserOrGroup 5669 } 5670 5671 type srPeerInfo struct { 5672 madmin.PeerInfo 5673 EndpointURL *url.URL 5674 } 5675 5676 // getPeerForUpload returns the site replication peer handling this upload. Defaults to local cluster otherwise 5677 func (c *SiteReplicationSys) getPeerForUpload(deplID string) (pi srPeerInfo, local bool) { 5678 ci, _ := c.GetClusterInfo(GlobalContext) 5679 if !ci.Enabled { 5680 return pi, true 5681 } 5682 for _, site := range ci.Sites { 5683 if deplID == site.DeploymentID { 5684 ep, _ := url.Parse(site.Endpoint) 5685 pi = srPeerInfo{ 5686 PeerInfo: site, 5687 EndpointURL: ep, 5688 } 5689 return pi, site.DeploymentID == globalDeploymentID() 5690 } 5691 } 5692 return pi, true 5693 } 5694 5695 // startResync initiates resync of data to peerSite specified. The overall site resync status 5696 // is maintained in .minio.sys/buckets/site-replication/resync/<deployment-id.meta>, while collecting 5697 // individual bucket resync status in .minio.sys/buckets/<bucket-name>/replication/resync.bin 5698 func (c *SiteReplicationSys) startResync(ctx context.Context, objAPI ObjectLayer, peer madmin.PeerInfo) (res madmin.SRResyncOpStatus, err error) { 5699 if !c.isEnabled() { 5700 return res, errSRNotEnabled 5701 } 5702 if objAPI == nil { 5703 return res, errSRObjectLayerNotReady 5704 } 5705 5706 if peer.DeploymentID == globalDeploymentID() { 5707 return res, errSRResyncToSelf 5708 } 5709 if _, ok := c.state.Peers[peer.DeploymentID]; !ok { 5710 return res, errSRPeerNotFound 5711 } 5712 rs, err := globalSiteResyncMetrics.siteStatus(ctx, objAPI, peer.DeploymentID) 5713 if err != nil { 5714 return res, err 5715 } 5716 if rs.Status == ResyncStarted { 5717 return res, errSRResyncStarted 5718 } 5719 var buckets []BucketInfo 5720 buckets, err = objAPI.ListBuckets(ctx, BucketOptions{}) 5721 if err != nil { 5722 return res, err 5723 } 5724 rs = newSiteResyncStatus(peer.DeploymentID, buckets) 5725 defer func() { 5726 if err != nil { 5727 rs.Status = ResyncFailed 5728 saveSiteResyncMetadata(ctx, rs, objAPI) 5729 globalSiteResyncMetrics.updateState(rs) 5730 } 5731 }() 5732 5733 if err := globalSiteResyncMetrics.updateState(rs); err != nil { 5734 return res, err 5735 } 5736 5737 for _, bi := range buckets { 5738 bucket := bi.Name 5739 if _, err := getReplicationConfig(ctx, bucket); err != nil { 5740 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5741 ErrDetail: err.Error(), 5742 Bucket: bucket, 5743 Status: ResyncFailed.String(), 5744 }) 5745 continue 5746 } 5747 // mark remote target for this deployment with the new reset id 5748 tgtArn := globalBucketTargetSys.getRemoteARNForPeer(bucket, peer) 5749 if tgtArn == "" { 5750 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5751 ErrDetail: fmt.Sprintf("no valid remote target found for this peer %s (%s)", peer.Name, peer.DeploymentID), 5752 Bucket: bucket, 5753 }) 5754 continue 5755 } 5756 target := globalBucketTargetSys.GetRemoteBucketTargetByArn(ctx, bucket, tgtArn) 5757 target.ResetBeforeDate = UTCNow() 5758 target.ResetID = rs.ResyncID 5759 if err = globalBucketTargetSys.SetTarget(ctx, bucket, &target, true); err != nil { 5760 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5761 ErrDetail: err.Error(), 5762 Bucket: bucket, 5763 }) 5764 continue 5765 } 5766 targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 5767 if err != nil { 5768 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5769 ErrDetail: err.Error(), 5770 Bucket: bucket, 5771 }) 5772 continue 5773 } 5774 tgtBytes, err := json.Marshal(&targets) 5775 if err != nil { 5776 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5777 ErrDetail: err.Error(), 5778 Bucket: bucket, 5779 }) 5780 continue 5781 } 5782 if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil { 5783 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5784 ErrDetail: err.Error(), 5785 Bucket: bucket, 5786 }) 5787 continue 5788 } 5789 if err := globalReplicationPool.resyncer.start(ctx, objAPI, resyncOpts{ 5790 bucket: bucket, 5791 arn: tgtArn, 5792 resyncID: rs.ResyncID, 5793 }); err != nil { 5794 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5795 ErrDetail: err.Error(), 5796 Bucket: bucket, 5797 }) 5798 continue 5799 } 5800 } 5801 res = madmin.SRResyncOpStatus{ 5802 Status: ResyncStarted.String(), 5803 OpType: "start", 5804 ResyncID: rs.ResyncID, 5805 Buckets: res.Buckets, 5806 } 5807 if len(res.Buckets) > 0 { 5808 res.ErrDetail = "partial failure in starting site resync" 5809 } 5810 if len(buckets) != 0 && len(res.Buckets) == len(buckets) { 5811 return res, fmt.Errorf("all buckets resync failed") 5812 } 5813 return res, nil 5814 } 5815 5816 // cancelResync stops an ongoing site level resync for the peer specified. 5817 func (c *SiteReplicationSys) cancelResync(ctx context.Context, objAPI ObjectLayer, peer madmin.PeerInfo) (res madmin.SRResyncOpStatus, err error) { 5818 if !c.isEnabled() { 5819 return res, errSRNotEnabled 5820 } 5821 if objAPI == nil { 5822 return res, errSRObjectLayerNotReady 5823 } 5824 if peer.DeploymentID == globalDeploymentID() { 5825 return res, errSRResyncToSelf 5826 } 5827 if _, ok := c.state.Peers[peer.DeploymentID]; !ok { 5828 return res, errSRPeerNotFound 5829 } 5830 rs, err := globalSiteResyncMetrics.siteStatus(ctx, objAPI, peer.DeploymentID) 5831 if err != nil { 5832 return res, err 5833 } 5834 res = madmin.SRResyncOpStatus{ 5835 Status: rs.Status.String(), 5836 OpType: "cancel", 5837 ResyncID: rs.ResyncID, 5838 } 5839 switch rs.Status { 5840 case ResyncCanceled: 5841 return res, errSRResyncCanceled 5842 case ResyncCompleted, NoResync: 5843 return res, errSRNoResync 5844 } 5845 targets := globalBucketTargetSys.ListTargets(ctx, "", string(madmin.ReplicationService)) 5846 // clear the remote target resetID set while initiating resync to stop replication 5847 for _, t := range targets { 5848 if t.ResetID == rs.ResyncID { 5849 // get tgt with credentials 5850 tgt := globalBucketTargetSys.GetRemoteBucketTargetByArn(ctx, t.SourceBucket, t.Arn) 5851 tgt.ResetID = "" 5852 bucket := t.SourceBucket 5853 if err = globalBucketTargetSys.SetTarget(ctx, bucket, &tgt, true); err != nil { 5854 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5855 ErrDetail: err.Error(), 5856 Bucket: bucket, 5857 }) 5858 continue 5859 } 5860 targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket) 5861 if err != nil { 5862 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5863 ErrDetail: err.Error(), 5864 Bucket: bucket, 5865 }) 5866 continue 5867 } 5868 tgtBytes, err := json.Marshal(&targets) 5869 if err != nil { 5870 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5871 ErrDetail: err.Error(), 5872 Bucket: bucket, 5873 }) 5874 continue 5875 } 5876 if _, err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil { 5877 res.Buckets = append(res.Buckets, madmin.ResyncBucketStatus{ 5878 ErrDetail: err.Error(), 5879 Bucket: bucket, 5880 }) 5881 continue 5882 } 5883 // update resync state for the bucket 5884 globalReplicationPool.resyncer.Lock() 5885 m, ok := globalReplicationPool.resyncer.statusMap[bucket] 5886 if !ok { 5887 m = newBucketResyncStatus(bucket) 5888 } 5889 if st, ok := m.TargetsMap[t.Arn]; ok { 5890 st.LastUpdate = UTCNow() 5891 st.ResyncStatus = ResyncCanceled 5892 m.TargetsMap[t.Arn] = st 5893 m.LastUpdate = UTCNow() 5894 } 5895 globalReplicationPool.resyncer.statusMap[bucket] = m 5896 globalReplicationPool.resyncer.Unlock() 5897 } 5898 } 5899 5900 rs.Status = ResyncCanceled 5901 rs.LastUpdate = UTCNow() 5902 if err := saveSiteResyncMetadata(ctx, rs, objAPI); err != nil { 5903 return res, err 5904 } 5905 select { 5906 case globalReplicationPool.resyncer.resyncCancelCh <- struct{}{}: 5907 case <-ctx.Done(): 5908 } 5909 5910 globalSiteResyncMetrics.updateState(rs) 5911 5912 res.Status = rs.Status.String() 5913 return res, nil 5914 } 5915 5916 const ( 5917 siteResyncMetaFormat = 1 5918 siteResyncMetaVersionV1 = 1 5919 siteResyncMetaVersion = siteResyncMetaVersionV1 5920 siteResyncSaveInterval = 10 * time.Second 5921 ) 5922 5923 func newSiteResyncStatus(dID string, buckets []BucketInfo) SiteResyncStatus { 5924 now := UTCNow() 5925 s := SiteResyncStatus{ 5926 Version: siteResyncMetaVersion, 5927 Status: ResyncStarted, 5928 DeplID: dID, 5929 TotBuckets: len(buckets), 5930 BucketStatuses: make(map[string]ResyncStatusType), 5931 } 5932 for _, bi := range buckets { 5933 s.BucketStatuses[bi.Name] = ResyncPending 5934 } 5935 s.ResyncID = mustGetUUID() 5936 s.StartTime = now 5937 s.LastUpdate = now 5938 return s 5939 } 5940 5941 // load site resync metadata from disk 5942 func loadSiteResyncMetadata(ctx context.Context, objAPI ObjectLayer, dID string) (rs SiteResyncStatus, e error) { 5943 data, err := readConfig(GlobalContext, objAPI, getSRResyncFilePath(dID)) 5944 if err != nil { 5945 return rs, err 5946 } 5947 if len(data) == 0 { 5948 // Seems to be empty. 5949 return rs, nil 5950 } 5951 if len(data) <= 4 { 5952 return rs, fmt.Errorf("site resync: no data") 5953 } 5954 // Read resync meta header 5955 switch binary.LittleEndian.Uint16(data[0:2]) { 5956 case siteResyncMetaFormat: 5957 default: 5958 return rs, fmt.Errorf("resyncMeta: unknown format: %d", binary.LittleEndian.Uint16(data[0:2])) 5959 } 5960 switch binary.LittleEndian.Uint16(data[2:4]) { 5961 case siteResyncMetaVersion: 5962 default: 5963 return rs, fmt.Errorf("resyncMeta: unknown version: %d", binary.LittleEndian.Uint16(data[2:4])) 5964 } 5965 // OK, parse data. 5966 if _, err = rs.UnmarshalMsg(data[4:]); err != nil { 5967 return rs, err 5968 } 5969 5970 switch rs.Version { 5971 case siteResyncMetaVersionV1: 5972 default: 5973 return rs, fmt.Errorf("unexpected resync meta version: %d", rs.Version) 5974 } 5975 return rs, nil 5976 } 5977 5978 // save resync status of peer to resync/depl-id.meta 5979 func saveSiteResyncMetadata(ctx context.Context, ss SiteResyncStatus, objectAPI ObjectLayer) error { 5980 if objectAPI == nil { 5981 return errSRObjectLayerNotReady 5982 } 5983 data := make([]byte, 4, ss.Msgsize()+4) 5984 5985 // Initialize the resync meta header. 5986 binary.LittleEndian.PutUint16(data[0:2], siteResyncMetaFormat) 5987 binary.LittleEndian.PutUint16(data[2:4], siteResyncMetaVersion) 5988 5989 buf, err := ss.MarshalMsg(data) 5990 if err != nil { 5991 return err 5992 } 5993 return saveConfig(ctx, objectAPI, getSRResyncFilePath(ss.DeplID), buf) 5994 } 5995 5996 func getSRResyncFilePath(dID string) string { 5997 return pathJoin(siteResyncPrefix, dID+".meta") 5998 } 5999 6000 func (c *SiteReplicationSys) getDeplIDForEndpoint(ep string) (dID string, err error) { 6001 if ep == "" { 6002 return dID, fmt.Errorf("no deployment id found for endpoint %s", ep) 6003 } 6004 c.RLock() 6005 defer c.RUnlock() 6006 if !c.enabled { 6007 return dID, errSRNotEnabled 6008 } 6009 for _, peer := range c.state.Peers { 6010 if ep == peer.Endpoint { 6011 return peer.DeploymentID, nil 6012 } 6013 } 6014 return dID, fmt.Errorf("no deployment id found for endpoint %s", ep) 6015 } 6016 6017 func (c *SiteReplicationSys) getSiteMetrics(ctx context.Context) (madmin.SRMetricsSummary, error) { 6018 if !c.isEnabled() { 6019 return madmin.SRMetricsSummary{}, errSRNotEnabled 6020 } 6021 peerSMetricsList := globalNotificationSys.GetClusterSiteMetrics(ctx) 6022 var sm madmin.SRMetricsSummary 6023 sm.Metrics = make(map[string]madmin.SRMetric) 6024 6025 for _, peer := range peerSMetricsList { 6026 sm.ActiveWorkers.Avg += peer.ActiveWorkers.Avg 6027 sm.ActiveWorkers.Curr += peer.ActiveWorkers.Curr 6028 if peer.ActiveWorkers.Max > sm.ActiveWorkers.Max { 6029 sm.ActiveWorkers.Max += peer.ActiveWorkers.Max 6030 } 6031 sm.Queued.Avg.Bytes += peer.Queued.Avg.Bytes 6032 sm.Queued.Avg.Count += peer.Queued.Avg.Count 6033 sm.Queued.Curr.Bytes += peer.Queued.Curr.Bytes 6034 sm.Queued.Curr.Count += peer.Queued.Curr.Count 6035 if peer.Queued.Max.Count > sm.Queued.Max.Count { 6036 sm.Queued.Max.Bytes = peer.Queued.Max.Bytes 6037 sm.Queued.Max.Count = peer.Queued.Max.Count 6038 } 6039 sm.ReplicaCount += peer.ReplicaCount 6040 sm.ReplicaSize += peer.ReplicaSize 6041 sm.Proxied.Add(madmin.ReplProxyMetric(peer.Proxied)) 6042 for dID, v := range peer.Metrics { 6043 v2, ok := sm.Metrics[dID] 6044 if !ok { 6045 v2 = madmin.SRMetric{} 6046 v2.Failed.ErrCounts = make(map[string]int) 6047 } 6048 6049 // use target endpoint metrics from node which has been up the longest 6050 if v2.LastOnline.After(v.LastOnline) || v2.LastOnline.IsZero() { 6051 v2.Endpoint = v.Endpoint 6052 v2.LastOnline = v.LastOnline 6053 v2.Latency = v.Latency 6054 v2.Online = v.Online 6055 v2.TotalDowntime = v.TotalDowntime 6056 v2.DeploymentID = v.DeploymentID 6057 } 6058 v2.ReplicatedCount += v.ReplicatedCount 6059 v2.ReplicatedSize += v.ReplicatedSize 6060 v2.Failed = v2.Failed.Add(v.Failed) 6061 for k, v := range v.Failed.ErrCounts { 6062 v2.Failed.ErrCounts[k] += v 6063 } 6064 if v2.XferStats == nil { 6065 v2.XferStats = make(map[replication.MetricName]replication.XferStats) 6066 } 6067 for rm, x := range v.XferStats { 6068 x2, ok := v2.XferStats[replication.MetricName(rm)] 6069 if !ok { 6070 x2 = replication.XferStats{} 6071 } 6072 x2.AvgRate += x.Avg 6073 x2.CurrRate += x.Curr 6074 if x.Peak > x2.PeakRate { 6075 x2.PeakRate = x.Peak 6076 } 6077 v2.XferStats[replication.MetricName(rm)] = x2 6078 } 6079 sm.Metrics[dID] = v2 6080 } 6081 } 6082 sm.Uptime = UTCNow().Unix() - globalBootTime.Unix() 6083 return sm, nil 6084 } 6085 6086 // mergeWithCurrentLCConfig - merges the given ilm expiry configuration with existing for the current site and returns 6087 func mergeWithCurrentLCConfig(ctx context.Context, bucket string, expLCCfg *string, updatedAt time.Time) ([]byte, error) { 6088 // Get bucket config from current site 6089 meta, e := globalBucketMetadataSys.GetConfigFromDisk(ctx, bucket) 6090 if e != nil && !errors.Is(e, errConfigNotFound) { 6091 return []byte{}, e 6092 } 6093 rMap := make(map[string]lifecycle.Rule) 6094 var xmlName xml.Name 6095 if len(meta.LifecycleConfigXML) > 0 { 6096 var lcCfg lifecycle.Lifecycle 6097 if err := xml.Unmarshal(meta.LifecycleConfigXML, &lcCfg); err != nil { 6098 return []byte{}, err 6099 } 6100 for _, rl := range lcCfg.Rules { 6101 rMap[rl.ID] = rl 6102 } 6103 xmlName = meta.lifecycleConfig.XMLName 6104 } 6105 6106 // get latest expiry rules 6107 newRMap := make(map[string]lifecycle.Rule) 6108 if expLCCfg != nil { 6109 var cfg lifecycle.Lifecycle 6110 expLcCfgData, err := base64.StdEncoding.DecodeString(*expLCCfg) 6111 if err != nil { 6112 return []byte{}, err 6113 } 6114 if err := xml.Unmarshal(expLcCfgData, &cfg); err != nil { 6115 return []byte{}, err 6116 } 6117 for _, rl := range cfg.Rules { 6118 newRMap[rl.ID] = rl 6119 } 6120 xmlName = cfg.XMLName 6121 } 6122 6123 // check if current expiry rules are there in new one. if not remove the expiration 6124 // part of rule as they may have been removed from latest updated one 6125 for id, rl := range rMap { 6126 if !rl.Expiration.IsNull() || !rl.NoncurrentVersionExpiration.IsNull() { 6127 if _, ok := newRMap[id]; !ok { 6128 // if rule getting removed was pure expiry rule (may be got to this site 6129 // as part of replication of expiry rules), remove it. Otherwise remove 6130 // only the expiry part of it 6131 if rl.Transition.IsNull() && rl.NoncurrentVersionTransition.IsNull() { 6132 delete(rMap, id) 6133 } else { 6134 rl.Expiration = lifecycle.Expiration{} 6135 rl.NoncurrentVersionExpiration = lifecycle.NoncurrentVersionExpiration{} 6136 rMap[id] = rl 6137 } 6138 } 6139 } 6140 } 6141 6142 // append now 6143 for id, rl := range newRMap { 6144 // if rule is already in original list update non tranisition details with latest 6145 // else simply add to the map. This may happen if ILM expiry replication 6146 // was disabled for sometime and rules were updated independently in different 6147 // sites. Latest changes would get applied but merge only the non transition details 6148 if existingRl, ok := rMap[id]; ok { 6149 clonedRl := rl.CloneNonTransition() 6150 clonedRl.Transition = existingRl.Transition 6151 clonedRl.NoncurrentVersionTransition = existingRl.NoncurrentVersionTransition 6152 rMap[id] = clonedRl 6153 } else { 6154 rMap[id] = rl 6155 } 6156 } 6157 6158 var rules []lifecycle.Rule 6159 for _, rule := range rMap { 6160 rules = append(rules, rule) 6161 } 6162 6163 // no rules, return 6164 if len(rules) == 0 { 6165 return []byte{}, nil 6166 } 6167 6168 // get final list for write 6169 finalLcCfg := lifecycle.Lifecycle{ 6170 XMLName: xmlName, 6171 Rules: rules, 6172 ExpiryUpdatedAt: &updatedAt, 6173 } 6174 if err := finalLcCfg.Validate(); err != nil { 6175 return []byte{}, err 6176 } 6177 finalConfigData, err := xml.Marshal(finalLcCfg) 6178 if err != nil { 6179 return []byte{}, err 6180 } 6181 6182 return finalConfigData, nil 6183 } 6184 6185 func ilmExpiryReplicationEnabled(sites map[string]madmin.PeerInfo) bool { 6186 flag := true 6187 for _, pi := range sites { 6188 flag = flag && pi.ReplicateILMExpiry 6189 } 6190 return flag 6191 } 6192 6193 type siteReplicatorCred struct { 6194 Creds auth.Credentials 6195 sync.RWMutex 6196 } 6197 6198 // Get or attempt to load site replicator credentials from disk. 6199 func (s *siteReplicatorCred) Get(ctx context.Context) (auth.Credentials, error) { 6200 s.RLock() 6201 if s.Creds.IsValid() { 6202 s.RUnlock() 6203 return s.Creds, nil 6204 } 6205 s.RUnlock() 6206 m := make(map[string]UserIdentity) 6207 if err := globalIAMSys.store.loadUser(ctx, siteReplicatorSvcAcc, svcUser, m); err != nil { 6208 return auth.Credentials{}, err 6209 } 6210 s.Set(m[siteReplicatorSvcAcc].Credentials) 6211 return m[siteReplicatorSvcAcc].Credentials, nil 6212 } 6213 6214 func (s *siteReplicatorCred) Set(c auth.Credentials) { 6215 s.Lock() 6216 defer s.Unlock() 6217 s.Creds = c 6218 } 6219 6220 func (s *siteReplicatorCred) IsValid() bool { 6221 s.RLock() 6222 defer s.RUnlock() 6223 return s.Creds.IsValid() 6224 }