github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/notification.go (about) 1 // Copyright (c) 2015-2023 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "bytes" 22 "context" 23 "errors" 24 "fmt" 25 "io" 26 "math/rand" 27 "net/http" 28 "net/url" 29 "runtime" 30 "sync" 31 "time" 32 33 "github.com/cespare/xxhash/v2" 34 "github.com/klauspost/compress/zip" 35 "github.com/minio/madmin-go/v3" 36 xioutil "github.com/minio/minio/internal/ioutil" 37 xnet "github.com/minio/pkg/v2/net" 38 "github.com/minio/pkg/v2/sync/errgroup" 39 "github.com/minio/pkg/v2/workers" 40 41 "github.com/minio/minio/internal/bucket/bandwidth" 42 "github.com/minio/minio/internal/logger" 43 ) 44 45 // This file contains peer related notifications. For sending notifications to 46 // external systems, see event-notification.go 47 48 // NotificationSys - notification system. 49 type NotificationSys struct { 50 peerClients []*peerRESTClient // Excludes self 51 allPeerClients []*peerRESTClient // Includes nil client for self 52 } 53 54 // NotificationPeerErr returns error associated for a remote peer. 55 type NotificationPeerErr struct { 56 Host xnet.Host // Remote host on which the rpc call was initiated 57 Err error // Error returned by the remote peer for an rpc call 58 } 59 60 // A NotificationGroup is a collection of goroutines working on subtasks that are part of 61 // the same overall task. 62 // 63 // A zero NotificationGroup is valid and does not cancel on error. 64 type NotificationGroup struct { 65 workers *workers.Workers 66 errs []NotificationPeerErr 67 retryCount int 68 } 69 70 // WithNPeers returns a new NotificationGroup with length of errs slice upto nerrs, 71 // upon Wait() errors are returned collected from all tasks. 72 func WithNPeers(nerrs int) *NotificationGroup { 73 if nerrs <= 0 { 74 nerrs = 1 75 } 76 wk, _ := workers.New(nerrs) 77 return &NotificationGroup{errs: make([]NotificationPeerErr, nerrs), workers: wk, retryCount: 3} 78 } 79 80 // WithNPeersThrottled returns a new NotificationGroup with length of errs slice upto nerrs, 81 // upon Wait() errors are returned collected from all tasks, optionally allows for X workers 82 // only "per" parallel task. 83 func WithNPeersThrottled(nerrs, wks int) *NotificationGroup { 84 if nerrs <= 0 { 85 nerrs = 1 86 } 87 wk, _ := workers.New(wks) 88 return &NotificationGroup{errs: make([]NotificationPeerErr, nerrs), workers: wk, retryCount: 3} 89 } 90 91 // WithRetries sets the retry count for all function calls from the Go method. 92 func (g *NotificationGroup) WithRetries(retryCount int) *NotificationGroup { 93 if g != nil { 94 g.retryCount = retryCount 95 } 96 return g 97 } 98 99 // Wait blocks until all function calls from the Go method have returned, then 100 // returns the slice of errors from all function calls. 101 func (g *NotificationGroup) Wait() []NotificationPeerErr { 102 g.workers.Wait() 103 return g.errs 104 } 105 106 // Go calls the given function in a new goroutine. 107 // 108 // The first call to return a non-nil error will be 109 // collected in errs slice and returned by Wait(). 110 func (g *NotificationGroup) Go(ctx context.Context, f func() error, index int, addr xnet.Host) { 111 r := rand.New(rand.NewSource(time.Now().UnixNano())) 112 113 g.workers.Take() 114 115 go func() { 116 defer g.workers.Give() 117 118 g.errs[index] = NotificationPeerErr{ 119 Host: addr, 120 } 121 for i := 0; i < g.retryCount; i++ { 122 g.errs[index].Err = nil 123 if err := f(); err != nil { 124 g.errs[index].Err = err 125 // Last iteration log the error. 126 if i == g.retryCount-1 { 127 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", addr.String()) 128 ctx := logger.SetReqInfo(ctx, reqInfo) 129 logger.LogOnceIf(ctx, err, addr.String()) 130 } 131 // Wait for a minimum of 100ms and dynamically increase this based on number of attempts. 132 if i < g.retryCount-1 { 133 time.Sleep(100*time.Millisecond + time.Duration(r.Float64()*float64(time.Second))) 134 } 135 continue 136 } 137 break 138 } 139 }() 140 } 141 142 // DeletePolicy - deletes policy across all peers. 143 func (sys *NotificationSys) DeletePolicy(policyName string) []NotificationPeerErr { 144 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 145 for idx, client := range sys.peerClients { 146 client := client 147 ng.Go(GlobalContext, func() error { 148 if client == nil { 149 return errPeerNotReachable 150 } 151 return client.DeletePolicy(policyName) 152 }, idx, *client.host) 153 } 154 return ng.Wait() 155 } 156 157 // LoadPolicy - reloads a specific modified policy across all peers 158 func (sys *NotificationSys) LoadPolicy(policyName string) []NotificationPeerErr { 159 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 160 for idx, client := range sys.peerClients { 161 client := client 162 ng.Go(GlobalContext, func() error { 163 if client == nil { 164 return errPeerNotReachable 165 } 166 return client.LoadPolicy(policyName) 167 }, idx, *client.host) 168 } 169 return ng.Wait() 170 } 171 172 // LoadPolicyMapping - reloads a policy mapping across all peers 173 func (sys *NotificationSys) LoadPolicyMapping(userOrGroup string, userType IAMUserType, isGroup bool) []NotificationPeerErr { 174 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 175 for idx, client := range sys.peerClients { 176 client := client 177 ng.Go(GlobalContext, func() error { 178 if client == nil { 179 return errPeerNotReachable 180 } 181 return client.LoadPolicyMapping(userOrGroup, userType, isGroup) 182 }, idx, *client.host) 183 } 184 return ng.Wait() 185 } 186 187 // DeleteUser - deletes a specific user across all peers 188 func (sys *NotificationSys) DeleteUser(accessKey string) []NotificationPeerErr { 189 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 190 for idx, client := range sys.peerClients { 191 client := client 192 ng.Go(GlobalContext, func() error { 193 if client == nil { 194 return errPeerNotReachable 195 } 196 return client.DeleteUser(accessKey) 197 }, idx, *client.host) 198 } 199 return ng.Wait() 200 } 201 202 // LoadUser - reloads a specific user across all peers 203 func (sys *NotificationSys) LoadUser(accessKey string, temp bool) []NotificationPeerErr { 204 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 205 for idx, client := range sys.peerClients { 206 client := client 207 ng.Go(GlobalContext, func() error { 208 if client == nil { 209 return errPeerNotReachable 210 } 211 return client.LoadUser(accessKey, temp) 212 }, idx, *client.host) 213 } 214 return ng.Wait() 215 } 216 217 // LoadGroup - loads a specific group on all peers. 218 func (sys *NotificationSys) LoadGroup(group string) []NotificationPeerErr { 219 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 220 for idx, client := range sys.peerClients { 221 client := client 222 ng.Go(GlobalContext, func() error { 223 if client == nil { 224 return errPeerNotReachable 225 } 226 return client.LoadGroup(group) 227 }, idx, *client.host) 228 } 229 return ng.Wait() 230 } 231 232 // DeleteServiceAccount - deletes a specific service account across all peers 233 func (sys *NotificationSys) DeleteServiceAccount(accessKey string) []NotificationPeerErr { 234 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 235 for idx, client := range sys.peerClients { 236 client := client 237 ng.Go(GlobalContext, func() error { 238 if client == nil { 239 return errPeerNotReachable 240 } 241 return client.DeleteServiceAccount(accessKey) 242 }, idx, *client.host) 243 } 244 return ng.Wait() 245 } 246 247 // LoadServiceAccount - reloads a specific service account across all peers 248 func (sys *NotificationSys) LoadServiceAccount(accessKey string) []NotificationPeerErr { 249 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 250 for idx, client := range sys.peerClients { 251 client := client 252 ng.Go(GlobalContext, func() error { 253 if client == nil { 254 return errPeerNotReachable 255 } 256 return client.LoadServiceAccount(accessKey) 257 }, idx, *client.host) 258 } 259 return ng.Wait() 260 } 261 262 // BackgroundHealStatus - returns background heal status of all peers 263 func (sys *NotificationSys) BackgroundHealStatus() ([]madmin.BgHealState, []NotificationPeerErr) { 264 ng := WithNPeers(len(sys.peerClients)) 265 states := make([]madmin.BgHealState, len(sys.peerClients)) 266 for idx, client := range sys.peerClients { 267 idx := idx 268 client := client 269 ng.Go(GlobalContext, func() error { 270 if client == nil { 271 return errPeerNotReachable 272 } 273 st, err := client.BackgroundHealStatus() 274 if err != nil { 275 return err 276 } 277 states[idx] = st 278 return nil 279 }, idx, *client.host) 280 } 281 282 return states, ng.Wait() 283 } 284 285 // StartProfiling - start profiling on remote peers, by initiating a remote RPC. 286 func (sys *NotificationSys) StartProfiling(profiler string) []NotificationPeerErr { 287 ng := WithNPeers(len(sys.peerClients)) 288 for idx, client := range sys.peerClients { 289 if client == nil { 290 continue 291 } 292 client := client 293 ng.Go(GlobalContext, func() error { 294 return client.StartProfiling(profiler) 295 }, idx, *client.host) 296 } 297 return ng.Wait() 298 } 299 300 // DownloadProfilingData - download profiling data from all remote peers. 301 func (sys *NotificationSys) DownloadProfilingData(ctx context.Context, writer io.Writer) (profilingDataFound bool) { 302 // Initialize a zip writer which will provide a zipped content 303 // of profiling data of all nodes 304 zipWriter := zip.NewWriter(writer) 305 defer zipWriter.Close() 306 307 for _, client := range sys.peerClients { 308 if client == nil { 309 continue 310 } 311 data, err := client.DownloadProfileData() 312 if err != nil { 313 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", client.host.String()) 314 ctx := logger.SetReqInfo(ctx, reqInfo) 315 logger.LogIf(ctx, err) 316 continue 317 } 318 319 profilingDataFound = true 320 321 for typ, data := range data { 322 err := embedFileInZip(zipWriter, fmt.Sprintf("profile-%s-%s", client.host.String(), typ), data, 0o600) 323 if err != nil { 324 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", client.host.String()) 325 ctx := logger.SetReqInfo(ctx, reqInfo) 326 logger.LogIf(ctx, err) 327 } 328 } 329 } 330 331 // Local host 332 thisAddr, err := xnet.ParseHost(globalLocalNodeName) 333 if err != nil { 334 logger.LogIf(ctx, err) 335 return profilingDataFound 336 } 337 338 data, err := getProfileData() 339 if err != nil { 340 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", thisAddr.String()) 341 ctx := logger.SetReqInfo(ctx, reqInfo) 342 logger.LogIf(ctx, err) 343 return profilingDataFound 344 } 345 346 profilingDataFound = true 347 348 // Send profiling data to zip as file 349 for typ, data := range data { 350 err := embedFileInZip(zipWriter, fmt.Sprintf("profile-%s-%s", thisAddr, typ), data, 0o600) 351 logger.LogIf(ctx, err) 352 } 353 if b := getClusterMetaInfo(ctx); len(b) > 0 { 354 logger.LogIf(ctx, embedFileInZip(zipWriter, "cluster.info", b, 0o600)) 355 } 356 357 return 358 } 359 360 // VerifyBinary - asks remote peers to verify the checksum 361 func (sys *NotificationSys) VerifyBinary(ctx context.Context, u *url.URL, sha256Sum []byte, releaseInfo string, bin []byte) []NotificationPeerErr { 362 // FIXME: network calls made in this manner such as one goroutine per node, 363 // can easily eat into the internode bandwidth. This function would be mostly 364 // TX saturating, however there are situations where a RX might also saturate. 365 // To avoid these problems we must split the work at scale. With 1000 node 366 // setup becoming a reality we must try to shard the work properly such as 367 // pick 10 nodes that precisely can send those 100 requests the first node 368 // in the 10 node shard would coordinate between other 9 shards to get the 369 // rest of the `99*9` requests. 370 // 371 // This essentially splits the workload properly and also allows for network 372 // utilization to be optimal, instead of blindly throttling the way we are 373 // doing below. However the changes that are needed here are a bit involved, 374 // further discussion advised. Remove this comment and remove the worker model 375 // for this function in future. 376 maxWorkers := runtime.GOMAXPROCS(0) / 2 377 if maxWorkers > len(sys.peerClients) { 378 maxWorkers = len(sys.peerClients) 379 } 380 381 ng := WithNPeersThrottled(len(sys.peerClients), maxWorkers) 382 for idx, client := range sys.peerClients { 383 if client == nil { 384 continue 385 } 386 client := client 387 ng.Go(ctx, func() error { 388 return client.VerifyBinary(ctx, u, sha256Sum, releaseInfo, bytes.NewReader(bin)) 389 }, idx, *client.host) 390 } 391 return ng.Wait() 392 } 393 394 // CommitBinary - asks remote peers to overwrite the old binary with the new one 395 func (sys *NotificationSys) CommitBinary(ctx context.Context) []NotificationPeerErr { 396 ng := WithNPeers(len(sys.peerClients)) 397 for idx, client := range sys.peerClients { 398 if client == nil { 399 continue 400 } 401 client := client 402 ng.Go(ctx, func() error { 403 return client.CommitBinary(ctx) 404 }, idx, *client.host) 405 } 406 return ng.Wait() 407 } 408 409 // SignalConfigReload reloads requested sub-system on a remote peer dynamically. 410 func (sys *NotificationSys) SignalConfigReload(subSys string) []NotificationPeerErr { 411 ng := WithNPeers(len(sys.peerClients)) 412 for idx, client := range sys.peerClients { 413 if client == nil { 414 continue 415 } 416 client := client 417 ng.Go(GlobalContext, func() error { 418 return client.SignalService(serviceReloadDynamic, subSys, false) 419 }, idx, *client.host) 420 } 421 return ng.Wait() 422 } 423 424 // SignalService - calls signal service RPC call on all peers. 425 func (sys *NotificationSys) SignalService(sig serviceSignal) []NotificationPeerErr { 426 ng := WithNPeers(len(sys.peerClients)) 427 for idx, client := range sys.peerClients { 428 if client == nil { 429 continue 430 } 431 client := client 432 ng.Go(GlobalContext, func() error { 433 // force == true preserves the current behavior 434 return client.SignalService(sig, "", false) 435 }, idx, *client.host) 436 } 437 return ng.Wait() 438 } 439 440 // SignalServiceV2 - calls signal service RPC call on all peers with v2 API 441 func (sys *NotificationSys) SignalServiceV2(sig serviceSignal, dryRun bool) []NotificationPeerErr { 442 ng := WithNPeers(len(sys.peerClients)) 443 for idx, client := range sys.peerClients { 444 if client == nil { 445 continue 446 } 447 client := client 448 ng.Go(GlobalContext, func() error { 449 return client.SignalService(sig, "", dryRun) 450 }, idx, *client.host) 451 } 452 return ng.Wait() 453 } 454 455 var errPeerNotReachable = errors.New("peer is not reachable") 456 457 // GetLocks - makes GetLocks RPC call on all peers. 458 func (sys *NotificationSys) GetLocks(ctx context.Context, r *http.Request) []*PeerLocks { 459 locksResp := make([]*PeerLocks, len(sys.peerClients)) 460 g := errgroup.WithNErrs(len(sys.peerClients)) 461 for index, client := range sys.peerClients { 462 index := index 463 client := client 464 g.Go(func() error { 465 if client == nil { 466 return errPeerNotReachable 467 } 468 serverLocksResp, err := sys.peerClients[index].GetLocks() 469 if err != nil { 470 return err 471 } 472 locksResp[index] = &PeerLocks{ 473 Addr: sys.peerClients[index].host.String(), 474 Locks: serverLocksResp, 475 } 476 return nil 477 }, index) 478 } 479 for index, err := range g.Wait() { 480 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", 481 sys.peerClients[index].host.String()) 482 ctx := logger.SetReqInfo(ctx, reqInfo) 483 logger.LogOnceIf(ctx, err, sys.peerClients[index].host.String()) 484 } 485 locksResp = append(locksResp, &PeerLocks{ 486 Addr: getHostName(r), 487 Locks: globalLockServer.DupLockMap(), 488 }) 489 return locksResp 490 } 491 492 // LoadBucketMetadata - calls LoadBucketMetadata call on all peers 493 func (sys *NotificationSys) LoadBucketMetadata(ctx context.Context, bucketName string) { 494 ng := WithNPeers(len(sys.peerClients)) 495 for idx, client := range sys.peerClients { 496 if client == nil { 497 continue 498 } 499 client := client 500 ng.Go(ctx, func() error { 501 return client.LoadBucketMetadata(bucketName) 502 }, idx, *client.host) 503 } 504 for _, nErr := range ng.Wait() { 505 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 506 if nErr.Err != nil { 507 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 508 } 509 } 510 } 511 512 // DeleteBucketMetadata - calls DeleteBucketMetadata call on all peers 513 func (sys *NotificationSys) DeleteBucketMetadata(ctx context.Context, bucketName string) { 514 globalReplicationStats.Delete(bucketName) 515 globalBucketMetadataSys.Remove(bucketName) 516 globalBucketTargetSys.Delete(bucketName) 517 globalEventNotifier.RemoveNotification(bucketName) 518 globalBucketConnStats.delete(bucketName) 519 globalBucketHTTPStats.delete(bucketName) 520 if localMetacacheMgr != nil { 521 localMetacacheMgr.deleteBucketCache(bucketName) 522 } 523 524 ng := WithNPeers(len(sys.peerClients)) 525 for idx, client := range sys.peerClients { 526 if client == nil { 527 continue 528 } 529 client := client 530 ng.Go(ctx, func() error { 531 return client.DeleteBucketMetadata(bucketName) 532 }, idx, *client.host) 533 } 534 for _, nErr := range ng.Wait() { 535 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 536 if nErr.Err != nil { 537 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 538 } 539 } 540 } 541 542 // GetClusterAllBucketStats - returns bucket stats for all buckets from all remote peers. 543 func (sys *NotificationSys) GetClusterAllBucketStats(ctx context.Context) []BucketStatsMap { 544 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 545 replicationStats := make([]BucketStatsMap, len(sys.peerClients)) 546 for index, client := range sys.peerClients { 547 index := index 548 client := client 549 ng.Go(ctx, func() error { 550 if client == nil { 551 return errPeerNotReachable 552 } 553 bsMap, err := client.GetAllBucketStats() 554 if err != nil { 555 return err 556 } 557 replicationStats[index] = bsMap 558 return nil 559 }, index, *client.host) 560 } 561 for _, nErr := range ng.Wait() { 562 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 563 if nErr.Err != nil { 564 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 565 } 566 } 567 568 replicationStatsList := globalReplicationStats.GetAll() 569 bucketStatsMap := BucketStatsMap{ 570 Stats: make(map[string]BucketStats, len(replicationStatsList)), 571 Timestamp: UTCNow(), 572 } 573 for k, replicationStats := range replicationStatsList { 574 bucketStatsMap.Stats[k] = BucketStats{ 575 ReplicationStats: replicationStats, 576 ProxyStats: globalReplicationStats.getProxyStats(k), 577 } 578 } 579 580 replicationStats = append(replicationStats, bucketStatsMap) 581 return replicationStats 582 } 583 584 // GetClusterBucketStats - calls GetClusterBucketStats call on all peers for a cluster statistics view. 585 func (sys *NotificationSys) GetClusterBucketStats(ctx context.Context, bucketName string) []BucketStats { 586 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 587 bucketStats := make([]BucketStats, len(sys.peerClients)) 588 for index, client := range sys.peerClients { 589 index := index 590 client := client 591 ng.Go(ctx, func() error { 592 if client == nil { 593 return errPeerNotReachable 594 } 595 bs, err := client.GetBucketStats(bucketName) 596 if err != nil { 597 return err 598 } 599 bucketStats[index] = bs 600 return nil 601 }, index, *client.host) 602 } 603 for _, nErr := range ng.Wait() { 604 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 605 if nErr.Err != nil { 606 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 607 } 608 } 609 bucketStats = append(bucketStats, BucketStats{ 610 ReplicationStats: globalReplicationStats.Get(bucketName), 611 QueueStats: ReplicationQueueStats{Nodes: []ReplQNodeStats{globalReplicationStats.getNodeQueueStats(bucketName)}}, 612 ProxyStats: globalReplicationStats.getProxyStats(bucketName), 613 }) 614 return bucketStats 615 } 616 617 // GetClusterSiteMetrics - calls GetClusterSiteMetrics call on all peers for a cluster statistics view. 618 func (sys *NotificationSys) GetClusterSiteMetrics(ctx context.Context) []SRMetricsSummary { 619 ng := WithNPeers(len(sys.peerClients)).WithRetries(1) 620 siteStats := make([]SRMetricsSummary, len(sys.peerClients)) 621 for index, client := range sys.peerClients { 622 index := index 623 client := client 624 ng.Go(ctx, func() error { 625 if client == nil { 626 return errPeerNotReachable 627 } 628 sm, err := client.GetSRMetrics() 629 if err != nil { 630 return err 631 } 632 siteStats[index] = sm 633 return nil 634 }, index, *client.host) 635 } 636 for _, nErr := range ng.Wait() { 637 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 638 if nErr.Err != nil { 639 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 640 } 641 } 642 siteStats = append(siteStats, globalReplicationStats.getSRMetricsForNode()) 643 return siteStats 644 } 645 646 // ReloadPoolMeta reloads on disk updates on pool metadata 647 func (sys *NotificationSys) ReloadPoolMeta(ctx context.Context) { 648 ng := WithNPeers(len(sys.peerClients)) 649 for idx, client := range sys.peerClients { 650 if client == nil { 651 continue 652 } 653 client := client 654 ng.Go(ctx, func() error { 655 return client.ReloadPoolMeta(ctx) 656 }, idx, *client.host) 657 } 658 for _, nErr := range ng.Wait() { 659 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 660 if nErr.Err != nil { 661 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 662 } 663 } 664 } 665 666 // StopRebalance notifies all MinIO nodes to signal any ongoing rebalance 667 // goroutine to stop. 668 func (sys *NotificationSys) StopRebalance(ctx context.Context) { 669 ng := WithNPeers(len(sys.peerClients)) 670 for idx, client := range sys.peerClients { 671 if client == nil { 672 continue 673 } 674 client := client 675 ng.Go(ctx, func() error { 676 return client.StopRebalance(ctx) 677 }, idx, *client.host) 678 } 679 for _, nErr := range ng.Wait() { 680 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 681 if nErr.Err != nil { 682 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 683 } 684 } 685 686 objAPI := newObjectLayerFn() 687 if objAPI == nil { 688 logger.LogIf(ctx, errServerNotInitialized) 689 return 690 } 691 692 if pools, ok := objAPI.(*erasureServerPools); ok { 693 pools.StopRebalance() 694 } 695 } 696 697 // LoadRebalanceMeta notifies all peers to load rebalance.bin from object layer. 698 // Note: Only peers participating in rebalance operation, namely the first node 699 // in each pool will load rebalance.bin. 700 func (sys *NotificationSys) LoadRebalanceMeta(ctx context.Context, startRebalance bool) { 701 ng := WithNPeers(len(sys.peerClients)) 702 for idx, client := range sys.peerClients { 703 if client == nil { 704 continue 705 } 706 client := client 707 ng.Go(ctx, func() error { 708 return client.LoadRebalanceMeta(ctx, startRebalance) 709 }, idx, *client.host) 710 } 711 for _, nErr := range ng.Wait() { 712 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 713 if nErr.Err != nil { 714 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 715 } 716 } 717 } 718 719 // LoadTransitionTierConfig notifies remote peers to load their remote tier 720 // configs from config store. 721 func (sys *NotificationSys) LoadTransitionTierConfig(ctx context.Context) { 722 ng := WithNPeers(len(sys.peerClients)) 723 for idx, client := range sys.peerClients { 724 if client == nil { 725 continue 726 } 727 client := client 728 ng.Go(ctx, func() error { 729 return client.LoadTransitionTierConfig(ctx) 730 }, idx, *client.host) 731 } 732 for _, nErr := range ng.Wait() { 733 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String()) 734 if nErr.Err != nil { 735 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String()) 736 } 737 } 738 } 739 740 // GetCPUs - Get all CPU information. 741 func (sys *NotificationSys) GetCPUs(ctx context.Context) []madmin.CPUs { 742 reply := make([]madmin.CPUs, len(sys.peerClients)) 743 744 g := errgroup.WithNErrs(len(sys.peerClients)) 745 for index, client := range sys.peerClients { 746 if client == nil { 747 continue 748 } 749 index := index 750 g.Go(func() error { 751 var err error 752 reply[index], err = sys.peerClients[index].GetCPUs(ctx) 753 return err 754 }, index) 755 } 756 757 for index, err := range g.Wait() { 758 if err != nil { 759 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 760 } 761 } 762 return reply 763 } 764 765 // GetNetInfo - Network information 766 func (sys *NotificationSys) GetNetInfo(ctx context.Context) []madmin.NetInfo { 767 reply := make([]madmin.NetInfo, len(sys.peerClients)) 768 769 g := errgroup.WithNErrs(len(sys.peerClients)) 770 for index, client := range sys.peerClients { 771 if client == nil { 772 continue 773 } 774 index := index 775 g.Go(func() error { 776 var err error 777 reply[index], err = sys.peerClients[index].GetNetInfo(ctx) 778 return err 779 }, index) 780 } 781 782 for index, err := range g.Wait() { 783 if err != nil { 784 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 785 } 786 } 787 return reply 788 } 789 790 // GetPartitions - Disk partition information 791 func (sys *NotificationSys) GetPartitions(ctx context.Context) []madmin.Partitions { 792 reply := make([]madmin.Partitions, len(sys.peerClients)) 793 794 g := errgroup.WithNErrs(len(sys.peerClients)) 795 for index, client := range sys.peerClients { 796 if client == nil { 797 continue 798 } 799 index := index 800 g.Go(func() error { 801 var err error 802 reply[index], err = sys.peerClients[index].GetPartitions(ctx) 803 return err 804 }, index) 805 } 806 807 for index, err := range g.Wait() { 808 if err != nil { 809 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 810 } 811 } 812 return reply 813 } 814 815 // GetOSInfo - Get operating system's information 816 func (sys *NotificationSys) GetOSInfo(ctx context.Context) []madmin.OSInfo { 817 reply := make([]madmin.OSInfo, len(sys.peerClients)) 818 819 g := errgroup.WithNErrs(len(sys.peerClients)) 820 for index, client := range sys.peerClients { 821 if client == nil { 822 continue 823 } 824 index := index 825 g.Go(func() error { 826 var err error 827 reply[index], err = sys.peerClients[index].GetOSInfo(ctx) 828 return err 829 }, index) 830 } 831 832 for index, err := range g.Wait() { 833 if err != nil { 834 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 835 } 836 } 837 return reply 838 } 839 840 // GetMetrics - Get metrics from all peers. 841 func (sys *NotificationSys) GetMetrics(ctx context.Context, t madmin.MetricType, opts collectMetricsOpts) []madmin.RealtimeMetrics { 842 reply := make([]madmin.RealtimeMetrics, len(sys.peerClients)) 843 844 g := errgroup.WithNErrs(len(sys.peerClients)) 845 for index, client := range sys.peerClients { 846 if client == nil { 847 continue 848 } 849 host := client.host.String() 850 if len(opts.hosts) > 0 { 851 if _, ok := opts.hosts[host]; !ok { 852 continue 853 } 854 } 855 856 index := index 857 g.Go(func() error { 858 var err error 859 reply[index], err = sys.peerClients[index].GetMetrics(ctx, t, opts) 860 return err 861 }, index) 862 } 863 864 for index, err := range g.Wait() { 865 if err != nil { 866 reply[index].Errors = []string{fmt.Sprintf("%s: %s (rpc)", sys.peerClients[index].String(), err.Error())} 867 } 868 } 869 return reply 870 } 871 872 // GetResourceMetrics - gets the resource metrics from all nodes excluding self. 873 func (sys *NotificationSys) GetResourceMetrics(ctx context.Context) <-chan MetricV2 { 874 if sys == nil { 875 return nil 876 } 877 g := errgroup.WithNErrs(len(sys.peerClients)) 878 peerChannels := make([]<-chan MetricV2, len(sys.peerClients)) 879 for index := range sys.peerClients { 880 index := index 881 g.Go(func() error { 882 if sys.peerClients[index] == nil { 883 return errPeerNotReachable 884 } 885 var err error 886 peerChannels[index], err = sys.peerClients[index].GetResourceMetrics(ctx) 887 return err 888 }, index) 889 } 890 return sys.collectPeerMetrics(ctx, peerChannels, g) 891 } 892 893 // GetSysConfig - Get information about system config 894 // (only the config that are of concern to minio) 895 func (sys *NotificationSys) GetSysConfig(ctx context.Context) []madmin.SysConfig { 896 reply := make([]madmin.SysConfig, len(sys.peerClients)) 897 898 g := errgroup.WithNErrs(len(sys.peerClients)) 899 for index, client := range sys.peerClients { 900 if client == nil { 901 continue 902 } 903 index := index 904 g.Go(func() error { 905 var err error 906 reply[index], err = sys.peerClients[index].GetSysConfig(ctx) 907 return err 908 }, index) 909 } 910 911 for index, err := range g.Wait() { 912 if err != nil { 913 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 914 } 915 } 916 return reply 917 } 918 919 // GetSysServices - Get information about system services 920 // (only the services that are of concern to minio) 921 func (sys *NotificationSys) GetSysServices(ctx context.Context) []madmin.SysServices { 922 reply := make([]madmin.SysServices, len(sys.peerClients)) 923 924 g := errgroup.WithNErrs(len(sys.peerClients)) 925 for index, client := range sys.peerClients { 926 if client == nil { 927 continue 928 } 929 index := index 930 g.Go(func() error { 931 var err error 932 reply[index], err = sys.peerClients[index].GetSELinuxInfo(ctx) 933 return err 934 }, index) 935 } 936 937 for index, err := range g.Wait() { 938 if err != nil { 939 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 940 } 941 } 942 return reply 943 } 944 945 func (sys *NotificationSys) addNodeErr(nodeInfo madmin.NodeInfo, peerClient *peerRESTClient, err error) { 946 addr := peerClient.host.String() 947 reqInfo := (&logger.ReqInfo{}).AppendTags("remotePeer", addr) 948 ctx := logger.SetReqInfo(GlobalContext, reqInfo) 949 logger.LogOnceIf(ctx, err, "add-node-err-"+addr) 950 nodeInfo.SetAddr(addr) 951 nodeInfo.SetError(err.Error()) 952 } 953 954 // GetSysErrors - Memory information 955 func (sys *NotificationSys) GetSysErrors(ctx context.Context) []madmin.SysErrors { 956 reply := make([]madmin.SysErrors, len(sys.peerClients)) 957 958 g := errgroup.WithNErrs(len(sys.peerClients)) 959 for index, client := range sys.peerClients { 960 if client == nil { 961 continue 962 } 963 index := index 964 g.Go(func() error { 965 var err error 966 reply[index], err = sys.peerClients[index].GetSysErrors(ctx) 967 return err 968 }, index) 969 } 970 971 for index, err := range g.Wait() { 972 if err != nil { 973 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 974 } 975 } 976 return reply 977 } 978 979 // GetMemInfo - Memory information 980 func (sys *NotificationSys) GetMemInfo(ctx context.Context) []madmin.MemInfo { 981 reply := make([]madmin.MemInfo, len(sys.peerClients)) 982 983 g := errgroup.WithNErrs(len(sys.peerClients)) 984 for index, client := range sys.peerClients { 985 if client == nil { 986 continue 987 } 988 index := index 989 g.Go(func() error { 990 var err error 991 reply[index], err = sys.peerClients[index].GetMemInfo(ctx) 992 return err 993 }, index) 994 } 995 996 for index, err := range g.Wait() { 997 if err != nil { 998 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 999 } 1000 } 1001 return reply 1002 } 1003 1004 // GetProcInfo - Process information 1005 func (sys *NotificationSys) GetProcInfo(ctx context.Context) []madmin.ProcInfo { 1006 reply := make([]madmin.ProcInfo, len(sys.peerClients)) 1007 1008 g := errgroup.WithNErrs(len(sys.peerClients)) 1009 for index, client := range sys.peerClients { 1010 if client == nil { 1011 continue 1012 } 1013 index := index 1014 g.Go(func() error { 1015 var err error 1016 reply[index], err = sys.peerClients[index].GetProcInfo(ctx) 1017 return err 1018 }, index) 1019 } 1020 1021 for index, err := range g.Wait() { 1022 if err != nil { 1023 sys.addNodeErr(&reply[index], sys.peerClients[index], err) 1024 } 1025 } 1026 return reply 1027 } 1028 1029 // Construct a list of offline disks information for a given node. 1030 // If offlineHost is empty, do it for the local disks. 1031 func getOfflineDisks(offlineHost string, endpoints EndpointServerPools) []madmin.Disk { 1032 var offlineDisks []madmin.Disk 1033 for _, pool := range endpoints { 1034 for _, ep := range pool.Endpoints { 1035 if offlineHost == "" && ep.IsLocal || offlineHost == ep.Host { 1036 offlineDisks = append(offlineDisks, madmin.Disk{ 1037 Endpoint: ep.String(), 1038 State: string(madmin.ItemOffline), 1039 PoolIndex: ep.PoolIdx, 1040 SetIndex: ep.SetIdx, 1041 DiskIndex: ep.DiskIdx, 1042 }) 1043 } 1044 } 1045 } 1046 return offlineDisks 1047 } 1048 1049 // StorageInfo returns disk information across all peers 1050 func (sys *NotificationSys) StorageInfo(objLayer ObjectLayer, metrics bool) StorageInfo { 1051 var storageInfo StorageInfo 1052 replies := make([]StorageInfo, len(sys.peerClients)) 1053 1054 var wg sync.WaitGroup 1055 for i, client := range sys.peerClients { 1056 if client == nil { 1057 continue 1058 } 1059 wg.Add(1) 1060 go func(client *peerRESTClient, idx int) { 1061 defer wg.Done() 1062 info, err := client.LocalStorageInfo(metrics) 1063 if err != nil { 1064 info.Disks = getOfflineDisks(client.host.String(), globalEndpoints) 1065 } 1066 replies[idx] = info 1067 }(client, i) 1068 } 1069 wg.Wait() 1070 1071 // Add local to this server. 1072 replies = append(replies, objLayer.LocalStorageInfo(GlobalContext, metrics)) 1073 1074 storageInfo.Backend = objLayer.BackendInfo() 1075 for _, sinfo := range replies { 1076 storageInfo.Disks = append(storageInfo.Disks, sinfo.Disks...) 1077 } 1078 1079 return storageInfo 1080 } 1081 1082 // ServerInfo - calls ServerInfo RPC call on all peers. 1083 func (sys *NotificationSys) ServerInfo(metrics bool) []madmin.ServerProperties { 1084 reply := make([]madmin.ServerProperties, len(sys.peerClients)) 1085 var wg sync.WaitGroup 1086 for i, client := range sys.peerClients { 1087 if client == nil { 1088 continue 1089 } 1090 wg.Add(1) 1091 go func(client *peerRESTClient, idx int) { 1092 defer wg.Done() 1093 info, err := client.ServerInfo(metrics) 1094 if err != nil { 1095 info.Endpoint = client.host.String() 1096 info.State = string(madmin.ItemOffline) 1097 info.Disks = getOfflineDisks(info.Endpoint, globalEndpoints) 1098 } 1099 reply[idx] = info 1100 }(client, i) 1101 } 1102 wg.Wait() 1103 1104 return reply 1105 } 1106 1107 // returns all the peers that are currently online. 1108 func (sys *NotificationSys) getOnlinePeers() []*peerRESTClient { 1109 var peerClients []*peerRESTClient 1110 for _, peerClient := range sys.allPeerClients { 1111 if peerClient != nil && peerClient.IsOnline() { 1112 peerClients = append(peerClients, peerClient) 1113 } 1114 } 1115 return peerClients 1116 } 1117 1118 // restClientFromHash will return a deterministic peerRESTClient based on s. 1119 // Will return nil if client is local. 1120 func (sys *NotificationSys) restClientFromHash(s string) (client *peerRESTClient) { 1121 if len(sys.peerClients) == 0 { 1122 return nil 1123 } 1124 peerClients := sys.getOnlinePeers() 1125 if len(peerClients) == 0 { 1126 return nil 1127 } 1128 idx := xxhash.Sum64String(s) % uint64(len(peerClients)) 1129 return peerClients[idx] 1130 } 1131 1132 // GetPeerOnlineCount gets the count of online and offline nodes. 1133 func (sys *NotificationSys) GetPeerOnlineCount() (nodesOnline, nodesOffline int) { 1134 nodesOnline = 1 // Self is always online. 1135 nodesOffline = 0 1136 nodesOnlineIndex := make([]bool, len(sys.peerClients)) 1137 var wg sync.WaitGroup 1138 for idx, client := range sys.peerClients { 1139 if client == nil { 1140 continue 1141 } 1142 wg.Add(1) 1143 go func(idx int, client *peerRESTClient) { 1144 defer wg.Done() 1145 nodesOnlineIndex[idx] = client.restClient.HealthCheckFn() 1146 }(idx, client) 1147 1148 } 1149 wg.Wait() 1150 1151 for _, online := range nodesOnlineIndex { 1152 if online { 1153 nodesOnline++ 1154 } else { 1155 nodesOffline++ 1156 } 1157 } 1158 return 1159 } 1160 1161 // NewNotificationSys - creates new notification system object. 1162 func NewNotificationSys(endpoints EndpointServerPools) *NotificationSys { 1163 remote, all := newPeerRestClients(endpoints) 1164 return &NotificationSys{ 1165 peerClients: remote, 1166 allPeerClients: all, 1167 } 1168 } 1169 1170 // GetBandwidthReports - gets the bandwidth report from all nodes including self. 1171 func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ...string) bandwidth.BucketBandwidthReport { 1172 reports := make([]*bandwidth.BucketBandwidthReport, len(sys.peerClients)) 1173 g := errgroup.WithNErrs(len(sys.peerClients)) 1174 for index := range sys.peerClients { 1175 if sys.peerClients[index] == nil { 1176 continue 1177 } 1178 index := index 1179 g.Go(func() error { 1180 var err error 1181 reports[index], err = sys.peerClients[index].MonitorBandwidth(ctx, buckets) 1182 return err 1183 }, index) 1184 } 1185 1186 for index, err := range g.Wait() { 1187 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", 1188 sys.peerClients[index].host.String()) 1189 ctx := logger.SetReqInfo(ctx, reqInfo) 1190 logger.LogOnceIf(ctx, err, sys.peerClients[index].host.String()) 1191 } 1192 reports = append(reports, globalBucketMonitor.GetReport(bandwidth.SelectBuckets(buckets...))) 1193 consolidatedReport := bandwidth.BucketBandwidthReport{ 1194 BucketStats: make(map[bandwidth.BucketOptions]bandwidth.Details), 1195 } 1196 for _, report := range reports { 1197 if report == nil || report.BucketStats == nil { 1198 continue 1199 } 1200 for opts := range report.BucketStats { 1201 d, ok := consolidatedReport.BucketStats[opts] 1202 if !ok { 1203 d = bandwidth.Details{ 1204 LimitInBytesPerSecond: report.BucketStats[opts].LimitInBytesPerSecond, 1205 } 1206 } 1207 dt, ok := report.BucketStats[opts] 1208 if ok { 1209 d.CurrentBandwidthInBytesPerSecond += dt.CurrentBandwidthInBytesPerSecond 1210 } 1211 consolidatedReport.BucketStats[opts] = d 1212 } 1213 } 1214 return consolidatedReport 1215 } 1216 1217 func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels []<-chan MetricV2, g *errgroup.Group) <-chan MetricV2 { 1218 ch := make(chan MetricV2) 1219 var wg sync.WaitGroup 1220 for index, err := range g.Wait() { 1221 if err != nil { 1222 if sys.peerClients[index] != nil { 1223 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", 1224 sys.peerClients[index].host.String()) 1225 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String()) 1226 } else { 1227 logger.LogOnceIf(ctx, err, "peer-offline") 1228 } 1229 continue 1230 } 1231 wg.Add(1) 1232 go func(ctx context.Context, peerChannel <-chan MetricV2, wg *sync.WaitGroup) { 1233 defer wg.Done() 1234 for { 1235 select { 1236 case m, ok := <-peerChannel: 1237 if !ok { 1238 return 1239 } 1240 select { 1241 case ch <- m: 1242 case <-ctx.Done(): 1243 return 1244 } 1245 case <-ctx.Done(): 1246 return 1247 } 1248 } 1249 }(ctx, peerChannels[index], &wg) 1250 } 1251 go func(wg *sync.WaitGroup, ch chan MetricV2) { 1252 wg.Wait() 1253 xioutil.SafeClose(ch) 1254 }(&wg, ch) 1255 return ch 1256 } 1257 1258 // GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self. 1259 func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan MetricV2 { 1260 if sys == nil { 1261 return nil 1262 } 1263 g := errgroup.WithNErrs(len(sys.peerClients)) 1264 peerChannels := make([]<-chan MetricV2, len(sys.peerClients)) 1265 for index := range sys.peerClients { 1266 index := index 1267 g.Go(func() error { 1268 if sys.peerClients[index] == nil { 1269 return errPeerNotReachable 1270 } 1271 var err error 1272 peerChannels[index], err = sys.peerClients[index].GetPeerBucketMetrics(ctx) 1273 return err 1274 }, index) 1275 } 1276 return sys.collectPeerMetrics(ctx, peerChannels, g) 1277 } 1278 1279 // GetClusterMetrics - gets the cluster metrics from all nodes excluding self. 1280 func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan MetricV2 { 1281 if sys == nil { 1282 return nil 1283 } 1284 g := errgroup.WithNErrs(len(sys.peerClients)) 1285 peerChannels := make([]<-chan MetricV2, len(sys.peerClients)) 1286 for index := range sys.peerClients { 1287 index := index 1288 g.Go(func() error { 1289 if sys.peerClients[index] == nil { 1290 return errPeerNotReachable 1291 } 1292 var err error 1293 peerChannels[index], err = sys.peerClients[index].GetPeerMetrics(ctx) 1294 return err 1295 }, index) 1296 } 1297 return sys.collectPeerMetrics(ctx, peerChannels, g) 1298 } 1299 1300 // ServiceFreeze freezes all S3 API calls when 'freeze' is true, 1301 // 'freeze' is 'false' would resume all S3 API calls again. 1302 // NOTE: once a tenant is frozen either two things needs to 1303 // happen before resuming normal operations. 1304 // - Server needs to be restarted 'mc admin service restart' 1305 // - 'freeze' should be set to 'false' for this call 1306 // to resume normal operations. 1307 func (sys *NotificationSys) ServiceFreeze(ctx context.Context, freeze bool) []NotificationPeerErr { 1308 serviceSig := serviceUnFreeze 1309 if freeze { 1310 serviceSig = serviceFreeze 1311 } 1312 ng := WithNPeers(len(sys.peerClients)) 1313 for idx, client := range sys.peerClients { 1314 if client == nil { 1315 continue 1316 } 1317 client := client 1318 ng.Go(GlobalContext, func() error { 1319 return client.SignalService(serviceSig, "", false) 1320 }, idx, *client.host) 1321 } 1322 nerrs := ng.Wait() 1323 if freeze { 1324 freezeServices() 1325 } else { 1326 unfreezeServices() 1327 } 1328 return nerrs 1329 } 1330 1331 // Netperf - perform mesh style network throughput test 1332 func (sys *NotificationSys) Netperf(ctx context.Context, duration time.Duration) []madmin.NetperfNodeResult { 1333 length := len(sys.allPeerClients) 1334 if length == 0 { 1335 // For single node erasure setup. 1336 return nil 1337 } 1338 results := make([]madmin.NetperfNodeResult, length) 1339 1340 scheme := "http" 1341 if globalIsTLS { 1342 scheme = "https" 1343 } 1344 1345 var wg sync.WaitGroup 1346 for index := range sys.peerClients { 1347 if sys.peerClients[index] == nil { 1348 continue 1349 } 1350 wg.Add(1) 1351 go func(index int) { 1352 defer wg.Done() 1353 r, err := sys.peerClients[index].Netperf(ctx, duration) 1354 u := &url.URL{ 1355 Scheme: scheme, 1356 Host: sys.peerClients[index].host.String(), 1357 } 1358 if err != nil { 1359 results[index].Error = err.Error() 1360 } else { 1361 results[index] = r 1362 } 1363 results[index].Endpoint = u.String() 1364 }(index) 1365 } 1366 1367 wg.Add(1) 1368 go func() { 1369 defer wg.Done() 1370 r := netperf(ctx, duration) 1371 u := &url.URL{ 1372 Scheme: scheme, 1373 Host: globalLocalNodeName, 1374 } 1375 results[len(results)-1] = r 1376 results[len(results)-1].Endpoint = u.String() 1377 }() 1378 wg.Wait() 1379 1380 return results 1381 } 1382 1383 // SpeedTest run GET/PUT tests at input concurrency for requested object size, 1384 // optionally you can extend the tests longer with time.Duration. 1385 func (sys *NotificationSys) SpeedTest(ctx context.Context, sopts speedTestOpts) []SpeedTestResult { 1386 length := len(sys.allPeerClients) 1387 if length == 0 { 1388 // For single node erasure setup. 1389 length = 1 1390 } 1391 results := make([]SpeedTestResult, length) 1392 1393 scheme := "http" 1394 if globalIsTLS { 1395 scheme = "https" 1396 } 1397 1398 var wg sync.WaitGroup 1399 for index := range sys.peerClients { 1400 if sys.peerClients[index] == nil { 1401 continue 1402 } 1403 wg.Add(1) 1404 go func(index int) { 1405 defer wg.Done() 1406 r, err := sys.peerClients[index].SpeedTest(ctx, sopts) 1407 u := &url.URL{ 1408 Scheme: scheme, 1409 Host: sys.peerClients[index].host.String(), 1410 } 1411 if err != nil { 1412 results[index].Error = err.Error() 1413 } else { 1414 results[index] = r 1415 } 1416 results[index].Endpoint = u.String() 1417 }(index) 1418 } 1419 1420 wg.Add(1) 1421 go func() { 1422 defer wg.Done() 1423 r, err := selfSpeedTest(ctx, sopts) 1424 u := &url.URL{ 1425 Scheme: scheme, 1426 Host: globalLocalNodeName, 1427 } 1428 if err != nil { 1429 results[len(results)-1].Error = err.Error() 1430 } else { 1431 results[len(results)-1] = r 1432 } 1433 results[len(results)-1].Endpoint = u.String() 1434 }() 1435 wg.Wait() 1436 1437 return results 1438 } 1439 1440 // DriveSpeedTest - Drive performance information 1441 func (sys *NotificationSys) DriveSpeedTest(ctx context.Context, opts madmin.DriveSpeedTestOpts) chan madmin.DriveSpeedTestResult { 1442 ch := make(chan madmin.DriveSpeedTestResult) 1443 var wg sync.WaitGroup 1444 for _, client := range sys.peerClients { 1445 if client == nil { 1446 continue 1447 } 1448 wg.Add(1) 1449 go func(client *peerRESTClient) { 1450 defer wg.Done() 1451 resp, err := client.DriveSpeedTest(ctx, opts) 1452 if err != nil { 1453 resp.Error = err.Error() 1454 } 1455 1456 select { 1457 case <-ctx.Done(): 1458 case ch <- resp: 1459 } 1460 1461 reqInfo := (&logger.ReqInfo{}).AppendTags("remotePeer", client.host.String()) 1462 ctx := logger.SetReqInfo(GlobalContext, reqInfo) 1463 logger.LogOnceIf(ctx, err, client.host.String()) 1464 }(client) 1465 } 1466 1467 wg.Add(1) 1468 go func() { 1469 defer wg.Done() 1470 select { 1471 case <-ctx.Done(): 1472 case ch <- driveSpeedTest(ctx, opts): 1473 } 1474 }() 1475 1476 go func(wg *sync.WaitGroup, ch chan madmin.DriveSpeedTestResult) { 1477 wg.Wait() 1478 xioutil.SafeClose(ch) 1479 }(&wg, ch) 1480 1481 return ch 1482 } 1483 1484 // ReloadSiteReplicationConfig - tells all peer minio nodes to reload the 1485 // site-replication configuration. 1486 func (sys *NotificationSys) ReloadSiteReplicationConfig(ctx context.Context) []error { 1487 errs := make([]error, len(sys.allPeerClients)) 1488 var wg sync.WaitGroup 1489 for index := range sys.peerClients { 1490 if sys.peerClients[index] == nil { 1491 continue 1492 } 1493 wg.Add(1) 1494 go func(index int) { 1495 defer wg.Done() 1496 errs[index] = sys.peerClients[index].ReloadSiteReplicationConfig(ctx) 1497 }(index) 1498 } 1499 1500 wg.Wait() 1501 return errs 1502 } 1503 1504 // GetLastDayTierStats fetches per-tier stats of the last 24hrs from all peers 1505 func (sys *NotificationSys) GetLastDayTierStats(ctx context.Context) DailyAllTierStats { 1506 errs := make([]error, len(sys.allPeerClients)) 1507 lastDayStats := make([]DailyAllTierStats, len(sys.allPeerClients)) 1508 var wg sync.WaitGroup 1509 for index := range sys.peerClients { 1510 if sys.peerClients[index] == nil { 1511 continue 1512 } 1513 wg.Add(1) 1514 go func(index int) { 1515 defer wg.Done() 1516 lastDayStats[index], errs[index] = sys.peerClients[index].GetLastDayTierStats(ctx) 1517 }(index) 1518 } 1519 1520 wg.Wait() 1521 merged := globalTransitionState.getDailyAllTierStats() 1522 for i, stat := range lastDayStats { 1523 if errs[i] != nil { 1524 logger.LogOnceIf(ctx, fmt.Errorf("failed to fetch last day tier stats: %w", errs[i]), sys.peerClients[i].host.String()) 1525 continue 1526 } 1527 merged.merge(stat) 1528 } 1529 return merged 1530 } 1531 1532 // GetReplicationMRF - Get replication MRF from all peers. 1533 func (sys *NotificationSys) GetReplicationMRF(ctx context.Context, bucket, node string) (mrfCh chan madmin.ReplicationMRF, err error) { 1534 g := errgroup.WithNErrs(len(sys.peerClients)) 1535 peerChannels := make([]<-chan madmin.ReplicationMRF, len(sys.peerClients)) 1536 for index, client := range sys.peerClients { 1537 if client == nil { 1538 continue 1539 } 1540 host := client.host.String() 1541 if host != node && node != "all" { 1542 continue 1543 } 1544 index := index 1545 g.Go(func() error { 1546 var err error 1547 peerChannels[index], err = sys.peerClients[index].GetReplicationMRF(ctx, bucket) 1548 return err 1549 }, index) 1550 } 1551 mrfCh = make(chan madmin.ReplicationMRF, 4000) 1552 var wg sync.WaitGroup 1553 1554 for index, err := range g.Wait() { 1555 if err != nil { 1556 if sys.peerClients[index] != nil { 1557 reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", 1558 sys.peerClients[index].host.String()) 1559 logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String()) 1560 } else { 1561 logger.LogOnceIf(ctx, err, "peer-offline") 1562 } 1563 continue 1564 } 1565 wg.Add(1) 1566 go func(ctx context.Context, peerChannel <-chan madmin.ReplicationMRF, wg *sync.WaitGroup) { 1567 defer wg.Done() 1568 for { 1569 select { 1570 case m, ok := <-peerChannel: 1571 if !ok { 1572 return 1573 } 1574 select { 1575 case <-ctx.Done(): 1576 return 1577 case mrfCh <- m: 1578 } 1579 case <-ctx.Done(): 1580 return 1581 } 1582 } 1583 }(ctx, peerChannels[index], &wg) 1584 } 1585 wg.Add(1) 1586 go func(ch chan madmin.ReplicationMRF) error { 1587 defer wg.Done() 1588 if node != "all" && node != globalLocalNodeName { 1589 return nil 1590 } 1591 mCh, err := globalReplicationPool.getMRF(ctx, bucket) 1592 if err != nil { 1593 return err 1594 } 1595 for e := range mCh { 1596 select { 1597 case <-ctx.Done(): 1598 return err 1599 case mrfCh <- e: 1600 } 1601 } 1602 return nil 1603 }(mrfCh) 1604 go func(wg *sync.WaitGroup) { 1605 wg.Wait() 1606 xioutil.SafeClose(mrfCh) 1607 }(&wg) 1608 return mrfCh, nil 1609 }