github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/notification.go (about)

     1  // Copyright (c) 2015-2023 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"math/rand"
    27  	"net/http"
    28  	"net/url"
    29  	"runtime"
    30  	"sync"
    31  	"time"
    32  
    33  	"github.com/cespare/xxhash/v2"
    34  	"github.com/klauspost/compress/zip"
    35  	"github.com/minio/madmin-go/v3"
    36  	xioutil "github.com/minio/minio/internal/ioutil"
    37  	xnet "github.com/minio/pkg/v2/net"
    38  	"github.com/minio/pkg/v2/sync/errgroup"
    39  	"github.com/minio/pkg/v2/workers"
    40  
    41  	"github.com/minio/minio/internal/bucket/bandwidth"
    42  	"github.com/minio/minio/internal/logger"
    43  )
    44  
    45  // This file contains peer related notifications. For sending notifications to
    46  // external systems, see event-notification.go
    47  
    48  // NotificationSys - notification system.
    49  type NotificationSys struct {
    50  	peerClients    []*peerRESTClient // Excludes self
    51  	allPeerClients []*peerRESTClient // Includes nil client for self
    52  }
    53  
    54  // NotificationPeerErr returns error associated for a remote peer.
    55  type NotificationPeerErr struct {
    56  	Host xnet.Host // Remote host on which the rpc call was initiated
    57  	Err  error     // Error returned by the remote peer for an rpc call
    58  }
    59  
    60  // A NotificationGroup is a collection of goroutines working on subtasks that are part of
    61  // the same overall task.
    62  //
    63  // A zero NotificationGroup is valid and does not cancel on error.
    64  type NotificationGroup struct {
    65  	workers    *workers.Workers
    66  	errs       []NotificationPeerErr
    67  	retryCount int
    68  }
    69  
    70  // WithNPeers returns a new NotificationGroup with length of errs slice upto nerrs,
    71  // upon Wait() errors are returned collected from all tasks.
    72  func WithNPeers(nerrs int) *NotificationGroup {
    73  	if nerrs <= 0 {
    74  		nerrs = 1
    75  	}
    76  	wk, _ := workers.New(nerrs)
    77  	return &NotificationGroup{errs: make([]NotificationPeerErr, nerrs), workers: wk, retryCount: 3}
    78  }
    79  
    80  // WithNPeersThrottled returns a new NotificationGroup with length of errs slice upto nerrs,
    81  // upon Wait() errors are returned collected from all tasks, optionally allows for X workers
    82  // only "per" parallel task.
    83  func WithNPeersThrottled(nerrs, wks int) *NotificationGroup {
    84  	if nerrs <= 0 {
    85  		nerrs = 1
    86  	}
    87  	wk, _ := workers.New(wks)
    88  	return &NotificationGroup{errs: make([]NotificationPeerErr, nerrs), workers: wk, retryCount: 3}
    89  }
    90  
    91  // WithRetries sets the retry count for all function calls from the Go method.
    92  func (g *NotificationGroup) WithRetries(retryCount int) *NotificationGroup {
    93  	if g != nil {
    94  		g.retryCount = retryCount
    95  	}
    96  	return g
    97  }
    98  
    99  // Wait blocks until all function calls from the Go method have returned, then
   100  // returns the slice of errors from all function calls.
   101  func (g *NotificationGroup) Wait() []NotificationPeerErr {
   102  	g.workers.Wait()
   103  	return g.errs
   104  }
   105  
   106  // Go calls the given function in a new goroutine.
   107  //
   108  // The first call to return a non-nil error will be
   109  // collected in errs slice and returned by Wait().
   110  func (g *NotificationGroup) Go(ctx context.Context, f func() error, index int, addr xnet.Host) {
   111  	r := rand.New(rand.NewSource(time.Now().UnixNano()))
   112  
   113  	g.workers.Take()
   114  
   115  	go func() {
   116  		defer g.workers.Give()
   117  
   118  		g.errs[index] = NotificationPeerErr{
   119  			Host: addr,
   120  		}
   121  		for i := 0; i < g.retryCount; i++ {
   122  			g.errs[index].Err = nil
   123  			if err := f(); err != nil {
   124  				g.errs[index].Err = err
   125  				// Last iteration log the error.
   126  				if i == g.retryCount-1 {
   127  					reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", addr.String())
   128  					ctx := logger.SetReqInfo(ctx, reqInfo)
   129  					logger.LogOnceIf(ctx, err, addr.String())
   130  				}
   131  				// Wait for a minimum of 100ms and dynamically increase this based on number of attempts.
   132  				if i < g.retryCount-1 {
   133  					time.Sleep(100*time.Millisecond + time.Duration(r.Float64()*float64(time.Second)))
   134  				}
   135  				continue
   136  			}
   137  			break
   138  		}
   139  	}()
   140  }
   141  
   142  // DeletePolicy - deletes policy across all peers.
   143  func (sys *NotificationSys) DeletePolicy(policyName string) []NotificationPeerErr {
   144  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   145  	for idx, client := range sys.peerClients {
   146  		client := client
   147  		ng.Go(GlobalContext, func() error {
   148  			if client == nil {
   149  				return errPeerNotReachable
   150  			}
   151  			return client.DeletePolicy(policyName)
   152  		}, idx, *client.host)
   153  	}
   154  	return ng.Wait()
   155  }
   156  
   157  // LoadPolicy - reloads a specific modified policy across all peers
   158  func (sys *NotificationSys) LoadPolicy(policyName string) []NotificationPeerErr {
   159  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   160  	for idx, client := range sys.peerClients {
   161  		client := client
   162  		ng.Go(GlobalContext, func() error {
   163  			if client == nil {
   164  				return errPeerNotReachable
   165  			}
   166  			return client.LoadPolicy(policyName)
   167  		}, idx, *client.host)
   168  	}
   169  	return ng.Wait()
   170  }
   171  
   172  // LoadPolicyMapping - reloads a policy mapping across all peers
   173  func (sys *NotificationSys) LoadPolicyMapping(userOrGroup string, userType IAMUserType, isGroup bool) []NotificationPeerErr {
   174  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   175  	for idx, client := range sys.peerClients {
   176  		client := client
   177  		ng.Go(GlobalContext, func() error {
   178  			if client == nil {
   179  				return errPeerNotReachable
   180  			}
   181  			return client.LoadPolicyMapping(userOrGroup, userType, isGroup)
   182  		}, idx, *client.host)
   183  	}
   184  	return ng.Wait()
   185  }
   186  
   187  // DeleteUser - deletes a specific user across all peers
   188  func (sys *NotificationSys) DeleteUser(accessKey string) []NotificationPeerErr {
   189  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   190  	for idx, client := range sys.peerClients {
   191  		client := client
   192  		ng.Go(GlobalContext, func() error {
   193  			if client == nil {
   194  				return errPeerNotReachable
   195  			}
   196  			return client.DeleteUser(accessKey)
   197  		}, idx, *client.host)
   198  	}
   199  	return ng.Wait()
   200  }
   201  
   202  // LoadUser - reloads a specific user across all peers
   203  func (sys *NotificationSys) LoadUser(accessKey string, temp bool) []NotificationPeerErr {
   204  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   205  	for idx, client := range sys.peerClients {
   206  		client := client
   207  		ng.Go(GlobalContext, func() error {
   208  			if client == nil {
   209  				return errPeerNotReachable
   210  			}
   211  			return client.LoadUser(accessKey, temp)
   212  		}, idx, *client.host)
   213  	}
   214  	return ng.Wait()
   215  }
   216  
   217  // LoadGroup - loads a specific group on all peers.
   218  func (sys *NotificationSys) LoadGroup(group string) []NotificationPeerErr {
   219  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   220  	for idx, client := range sys.peerClients {
   221  		client := client
   222  		ng.Go(GlobalContext, func() error {
   223  			if client == nil {
   224  				return errPeerNotReachable
   225  			}
   226  			return client.LoadGroup(group)
   227  		}, idx, *client.host)
   228  	}
   229  	return ng.Wait()
   230  }
   231  
   232  // DeleteServiceAccount - deletes a specific service account across all peers
   233  func (sys *NotificationSys) DeleteServiceAccount(accessKey string) []NotificationPeerErr {
   234  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   235  	for idx, client := range sys.peerClients {
   236  		client := client
   237  		ng.Go(GlobalContext, func() error {
   238  			if client == nil {
   239  				return errPeerNotReachable
   240  			}
   241  			return client.DeleteServiceAccount(accessKey)
   242  		}, idx, *client.host)
   243  	}
   244  	return ng.Wait()
   245  }
   246  
   247  // LoadServiceAccount - reloads a specific service account across all peers
   248  func (sys *NotificationSys) LoadServiceAccount(accessKey string) []NotificationPeerErr {
   249  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   250  	for idx, client := range sys.peerClients {
   251  		client := client
   252  		ng.Go(GlobalContext, func() error {
   253  			if client == nil {
   254  				return errPeerNotReachable
   255  			}
   256  			return client.LoadServiceAccount(accessKey)
   257  		}, idx, *client.host)
   258  	}
   259  	return ng.Wait()
   260  }
   261  
   262  // BackgroundHealStatus - returns background heal status of all peers
   263  func (sys *NotificationSys) BackgroundHealStatus() ([]madmin.BgHealState, []NotificationPeerErr) {
   264  	ng := WithNPeers(len(sys.peerClients))
   265  	states := make([]madmin.BgHealState, len(sys.peerClients))
   266  	for idx, client := range sys.peerClients {
   267  		idx := idx
   268  		client := client
   269  		ng.Go(GlobalContext, func() error {
   270  			if client == nil {
   271  				return errPeerNotReachable
   272  			}
   273  			st, err := client.BackgroundHealStatus()
   274  			if err != nil {
   275  				return err
   276  			}
   277  			states[idx] = st
   278  			return nil
   279  		}, idx, *client.host)
   280  	}
   281  
   282  	return states, ng.Wait()
   283  }
   284  
   285  // StartProfiling - start profiling on remote peers, by initiating a remote RPC.
   286  func (sys *NotificationSys) StartProfiling(profiler string) []NotificationPeerErr {
   287  	ng := WithNPeers(len(sys.peerClients))
   288  	for idx, client := range sys.peerClients {
   289  		if client == nil {
   290  			continue
   291  		}
   292  		client := client
   293  		ng.Go(GlobalContext, func() error {
   294  			return client.StartProfiling(profiler)
   295  		}, idx, *client.host)
   296  	}
   297  	return ng.Wait()
   298  }
   299  
   300  // DownloadProfilingData - download profiling data from all remote peers.
   301  func (sys *NotificationSys) DownloadProfilingData(ctx context.Context, writer io.Writer) (profilingDataFound bool) {
   302  	// Initialize a zip writer which will provide a zipped content
   303  	// of profiling data of all nodes
   304  	zipWriter := zip.NewWriter(writer)
   305  	defer zipWriter.Close()
   306  
   307  	for _, client := range sys.peerClients {
   308  		if client == nil {
   309  			continue
   310  		}
   311  		data, err := client.DownloadProfileData()
   312  		if err != nil {
   313  			reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", client.host.String())
   314  			ctx := logger.SetReqInfo(ctx, reqInfo)
   315  			logger.LogIf(ctx, err)
   316  			continue
   317  		}
   318  
   319  		profilingDataFound = true
   320  
   321  		for typ, data := range data {
   322  			err := embedFileInZip(zipWriter, fmt.Sprintf("profile-%s-%s", client.host.String(), typ), data, 0o600)
   323  			if err != nil {
   324  				reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", client.host.String())
   325  				ctx := logger.SetReqInfo(ctx, reqInfo)
   326  				logger.LogIf(ctx, err)
   327  			}
   328  		}
   329  	}
   330  
   331  	// Local host
   332  	thisAddr, err := xnet.ParseHost(globalLocalNodeName)
   333  	if err != nil {
   334  		logger.LogIf(ctx, err)
   335  		return profilingDataFound
   336  	}
   337  
   338  	data, err := getProfileData()
   339  	if err != nil {
   340  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", thisAddr.String())
   341  		ctx := logger.SetReqInfo(ctx, reqInfo)
   342  		logger.LogIf(ctx, err)
   343  		return profilingDataFound
   344  	}
   345  
   346  	profilingDataFound = true
   347  
   348  	// Send profiling data to zip as file
   349  	for typ, data := range data {
   350  		err := embedFileInZip(zipWriter, fmt.Sprintf("profile-%s-%s", thisAddr, typ), data, 0o600)
   351  		logger.LogIf(ctx, err)
   352  	}
   353  	if b := getClusterMetaInfo(ctx); len(b) > 0 {
   354  		logger.LogIf(ctx, embedFileInZip(zipWriter, "cluster.info", b, 0o600))
   355  	}
   356  
   357  	return
   358  }
   359  
   360  // VerifyBinary - asks remote peers to verify the checksum
   361  func (sys *NotificationSys) VerifyBinary(ctx context.Context, u *url.URL, sha256Sum []byte, releaseInfo string, bin []byte) []NotificationPeerErr {
   362  	// FIXME: network calls made in this manner such as one goroutine per node,
   363  	// can easily eat into the internode bandwidth. This function would be mostly
   364  	// TX saturating, however there are situations where a RX might also saturate.
   365  	// To avoid these problems we must split the work at scale. With 1000 node
   366  	// setup becoming a reality we must try to shard the work properly such as
   367  	// pick 10 nodes that precisely can send those 100 requests the first node
   368  	// in the 10 node shard would coordinate between other 9 shards to get the
   369  	// rest of the `99*9` requests.
   370  	//
   371  	// This essentially splits the workload properly and also allows for network
   372  	// utilization to be optimal, instead of blindly throttling the way we are
   373  	// doing below. However the changes that are needed here are a bit involved,
   374  	// further discussion advised. Remove this comment and remove the worker model
   375  	// for this function in future.
   376  	maxWorkers := runtime.GOMAXPROCS(0) / 2
   377  	if maxWorkers > len(sys.peerClients) {
   378  		maxWorkers = len(sys.peerClients)
   379  	}
   380  
   381  	ng := WithNPeersThrottled(len(sys.peerClients), maxWorkers)
   382  	for idx, client := range sys.peerClients {
   383  		if client == nil {
   384  			continue
   385  		}
   386  		client := client
   387  		ng.Go(ctx, func() error {
   388  			return client.VerifyBinary(ctx, u, sha256Sum, releaseInfo, bytes.NewReader(bin))
   389  		}, idx, *client.host)
   390  	}
   391  	return ng.Wait()
   392  }
   393  
   394  // CommitBinary - asks remote peers to overwrite the old binary with the new one
   395  func (sys *NotificationSys) CommitBinary(ctx context.Context) []NotificationPeerErr {
   396  	ng := WithNPeers(len(sys.peerClients))
   397  	for idx, client := range sys.peerClients {
   398  		if client == nil {
   399  			continue
   400  		}
   401  		client := client
   402  		ng.Go(ctx, func() error {
   403  			return client.CommitBinary(ctx)
   404  		}, idx, *client.host)
   405  	}
   406  	return ng.Wait()
   407  }
   408  
   409  // SignalConfigReload reloads requested sub-system on a remote peer dynamically.
   410  func (sys *NotificationSys) SignalConfigReload(subSys string) []NotificationPeerErr {
   411  	ng := WithNPeers(len(sys.peerClients))
   412  	for idx, client := range sys.peerClients {
   413  		if client == nil {
   414  			continue
   415  		}
   416  		client := client
   417  		ng.Go(GlobalContext, func() error {
   418  			return client.SignalService(serviceReloadDynamic, subSys, false)
   419  		}, idx, *client.host)
   420  	}
   421  	return ng.Wait()
   422  }
   423  
   424  // SignalService - calls signal service RPC call on all peers.
   425  func (sys *NotificationSys) SignalService(sig serviceSignal) []NotificationPeerErr {
   426  	ng := WithNPeers(len(sys.peerClients))
   427  	for idx, client := range sys.peerClients {
   428  		if client == nil {
   429  			continue
   430  		}
   431  		client := client
   432  		ng.Go(GlobalContext, func() error {
   433  			// force == true preserves the current behavior
   434  			return client.SignalService(sig, "", false)
   435  		}, idx, *client.host)
   436  	}
   437  	return ng.Wait()
   438  }
   439  
   440  // SignalServiceV2 - calls signal service RPC call on all peers with v2 API
   441  func (sys *NotificationSys) SignalServiceV2(sig serviceSignal, dryRun bool) []NotificationPeerErr {
   442  	ng := WithNPeers(len(sys.peerClients))
   443  	for idx, client := range sys.peerClients {
   444  		if client == nil {
   445  			continue
   446  		}
   447  		client := client
   448  		ng.Go(GlobalContext, func() error {
   449  			return client.SignalService(sig, "", dryRun)
   450  		}, idx, *client.host)
   451  	}
   452  	return ng.Wait()
   453  }
   454  
   455  var errPeerNotReachable = errors.New("peer is not reachable")
   456  
   457  // GetLocks - makes GetLocks RPC call on all peers.
   458  func (sys *NotificationSys) GetLocks(ctx context.Context, r *http.Request) []*PeerLocks {
   459  	locksResp := make([]*PeerLocks, len(sys.peerClients))
   460  	g := errgroup.WithNErrs(len(sys.peerClients))
   461  	for index, client := range sys.peerClients {
   462  		index := index
   463  		client := client
   464  		g.Go(func() error {
   465  			if client == nil {
   466  				return errPeerNotReachable
   467  			}
   468  			serverLocksResp, err := sys.peerClients[index].GetLocks()
   469  			if err != nil {
   470  				return err
   471  			}
   472  			locksResp[index] = &PeerLocks{
   473  				Addr:  sys.peerClients[index].host.String(),
   474  				Locks: serverLocksResp,
   475  			}
   476  			return nil
   477  		}, index)
   478  	}
   479  	for index, err := range g.Wait() {
   480  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress",
   481  			sys.peerClients[index].host.String())
   482  		ctx := logger.SetReqInfo(ctx, reqInfo)
   483  		logger.LogOnceIf(ctx, err, sys.peerClients[index].host.String())
   484  	}
   485  	locksResp = append(locksResp, &PeerLocks{
   486  		Addr:  getHostName(r),
   487  		Locks: globalLockServer.DupLockMap(),
   488  	})
   489  	return locksResp
   490  }
   491  
   492  // LoadBucketMetadata - calls LoadBucketMetadata call on all peers
   493  func (sys *NotificationSys) LoadBucketMetadata(ctx context.Context, bucketName string) {
   494  	ng := WithNPeers(len(sys.peerClients))
   495  	for idx, client := range sys.peerClients {
   496  		if client == nil {
   497  			continue
   498  		}
   499  		client := client
   500  		ng.Go(ctx, func() error {
   501  			return client.LoadBucketMetadata(bucketName)
   502  		}, idx, *client.host)
   503  	}
   504  	for _, nErr := range ng.Wait() {
   505  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   506  		if nErr.Err != nil {
   507  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   508  		}
   509  	}
   510  }
   511  
   512  // DeleteBucketMetadata - calls DeleteBucketMetadata call on all peers
   513  func (sys *NotificationSys) DeleteBucketMetadata(ctx context.Context, bucketName string) {
   514  	globalReplicationStats.Delete(bucketName)
   515  	globalBucketMetadataSys.Remove(bucketName)
   516  	globalBucketTargetSys.Delete(bucketName)
   517  	globalEventNotifier.RemoveNotification(bucketName)
   518  	globalBucketConnStats.delete(bucketName)
   519  	globalBucketHTTPStats.delete(bucketName)
   520  	if localMetacacheMgr != nil {
   521  		localMetacacheMgr.deleteBucketCache(bucketName)
   522  	}
   523  
   524  	ng := WithNPeers(len(sys.peerClients))
   525  	for idx, client := range sys.peerClients {
   526  		if client == nil {
   527  			continue
   528  		}
   529  		client := client
   530  		ng.Go(ctx, func() error {
   531  			return client.DeleteBucketMetadata(bucketName)
   532  		}, idx, *client.host)
   533  	}
   534  	for _, nErr := range ng.Wait() {
   535  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   536  		if nErr.Err != nil {
   537  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   538  		}
   539  	}
   540  }
   541  
   542  // GetClusterAllBucketStats - returns bucket stats for all buckets from all remote peers.
   543  func (sys *NotificationSys) GetClusterAllBucketStats(ctx context.Context) []BucketStatsMap {
   544  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   545  	replicationStats := make([]BucketStatsMap, len(sys.peerClients))
   546  	for index, client := range sys.peerClients {
   547  		index := index
   548  		client := client
   549  		ng.Go(ctx, func() error {
   550  			if client == nil {
   551  				return errPeerNotReachable
   552  			}
   553  			bsMap, err := client.GetAllBucketStats()
   554  			if err != nil {
   555  				return err
   556  			}
   557  			replicationStats[index] = bsMap
   558  			return nil
   559  		}, index, *client.host)
   560  	}
   561  	for _, nErr := range ng.Wait() {
   562  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   563  		if nErr.Err != nil {
   564  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   565  		}
   566  	}
   567  
   568  	replicationStatsList := globalReplicationStats.GetAll()
   569  	bucketStatsMap := BucketStatsMap{
   570  		Stats:     make(map[string]BucketStats, len(replicationStatsList)),
   571  		Timestamp: UTCNow(),
   572  	}
   573  	for k, replicationStats := range replicationStatsList {
   574  		bucketStatsMap.Stats[k] = BucketStats{
   575  			ReplicationStats: replicationStats,
   576  			ProxyStats:       globalReplicationStats.getProxyStats(k),
   577  		}
   578  	}
   579  
   580  	replicationStats = append(replicationStats, bucketStatsMap)
   581  	return replicationStats
   582  }
   583  
   584  // GetClusterBucketStats - calls GetClusterBucketStats call on all peers for a cluster statistics view.
   585  func (sys *NotificationSys) GetClusterBucketStats(ctx context.Context, bucketName string) []BucketStats {
   586  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   587  	bucketStats := make([]BucketStats, len(sys.peerClients))
   588  	for index, client := range sys.peerClients {
   589  		index := index
   590  		client := client
   591  		ng.Go(ctx, func() error {
   592  			if client == nil {
   593  				return errPeerNotReachable
   594  			}
   595  			bs, err := client.GetBucketStats(bucketName)
   596  			if err != nil {
   597  				return err
   598  			}
   599  			bucketStats[index] = bs
   600  			return nil
   601  		}, index, *client.host)
   602  	}
   603  	for _, nErr := range ng.Wait() {
   604  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   605  		if nErr.Err != nil {
   606  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   607  		}
   608  	}
   609  	bucketStats = append(bucketStats, BucketStats{
   610  		ReplicationStats: globalReplicationStats.Get(bucketName),
   611  		QueueStats:       ReplicationQueueStats{Nodes: []ReplQNodeStats{globalReplicationStats.getNodeQueueStats(bucketName)}},
   612  		ProxyStats:       globalReplicationStats.getProxyStats(bucketName),
   613  	})
   614  	return bucketStats
   615  }
   616  
   617  // GetClusterSiteMetrics - calls GetClusterSiteMetrics call on all peers for a cluster statistics view.
   618  func (sys *NotificationSys) GetClusterSiteMetrics(ctx context.Context) []SRMetricsSummary {
   619  	ng := WithNPeers(len(sys.peerClients)).WithRetries(1)
   620  	siteStats := make([]SRMetricsSummary, len(sys.peerClients))
   621  	for index, client := range sys.peerClients {
   622  		index := index
   623  		client := client
   624  		ng.Go(ctx, func() error {
   625  			if client == nil {
   626  				return errPeerNotReachable
   627  			}
   628  			sm, err := client.GetSRMetrics()
   629  			if err != nil {
   630  				return err
   631  			}
   632  			siteStats[index] = sm
   633  			return nil
   634  		}, index, *client.host)
   635  	}
   636  	for _, nErr := range ng.Wait() {
   637  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   638  		if nErr.Err != nil {
   639  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   640  		}
   641  	}
   642  	siteStats = append(siteStats, globalReplicationStats.getSRMetricsForNode())
   643  	return siteStats
   644  }
   645  
   646  // ReloadPoolMeta reloads on disk updates on pool metadata
   647  func (sys *NotificationSys) ReloadPoolMeta(ctx context.Context) {
   648  	ng := WithNPeers(len(sys.peerClients))
   649  	for idx, client := range sys.peerClients {
   650  		if client == nil {
   651  			continue
   652  		}
   653  		client := client
   654  		ng.Go(ctx, func() error {
   655  			return client.ReloadPoolMeta(ctx)
   656  		}, idx, *client.host)
   657  	}
   658  	for _, nErr := range ng.Wait() {
   659  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   660  		if nErr.Err != nil {
   661  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   662  		}
   663  	}
   664  }
   665  
   666  // StopRebalance notifies all MinIO nodes to signal any ongoing rebalance
   667  // goroutine to stop.
   668  func (sys *NotificationSys) StopRebalance(ctx context.Context) {
   669  	ng := WithNPeers(len(sys.peerClients))
   670  	for idx, client := range sys.peerClients {
   671  		if client == nil {
   672  			continue
   673  		}
   674  		client := client
   675  		ng.Go(ctx, func() error {
   676  			return client.StopRebalance(ctx)
   677  		}, idx, *client.host)
   678  	}
   679  	for _, nErr := range ng.Wait() {
   680  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   681  		if nErr.Err != nil {
   682  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   683  		}
   684  	}
   685  
   686  	objAPI := newObjectLayerFn()
   687  	if objAPI == nil {
   688  		logger.LogIf(ctx, errServerNotInitialized)
   689  		return
   690  	}
   691  
   692  	if pools, ok := objAPI.(*erasureServerPools); ok {
   693  		pools.StopRebalance()
   694  	}
   695  }
   696  
   697  // LoadRebalanceMeta notifies all peers to load rebalance.bin from object layer.
   698  // Note: Only peers participating in rebalance operation, namely the first node
   699  // in each pool will load rebalance.bin.
   700  func (sys *NotificationSys) LoadRebalanceMeta(ctx context.Context, startRebalance bool) {
   701  	ng := WithNPeers(len(sys.peerClients))
   702  	for idx, client := range sys.peerClients {
   703  		if client == nil {
   704  			continue
   705  		}
   706  		client := client
   707  		ng.Go(ctx, func() error {
   708  			return client.LoadRebalanceMeta(ctx, startRebalance)
   709  		}, idx, *client.host)
   710  	}
   711  	for _, nErr := range ng.Wait() {
   712  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   713  		if nErr.Err != nil {
   714  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   715  		}
   716  	}
   717  }
   718  
   719  // LoadTransitionTierConfig notifies remote peers to load their remote tier
   720  // configs from config store.
   721  func (sys *NotificationSys) LoadTransitionTierConfig(ctx context.Context) {
   722  	ng := WithNPeers(len(sys.peerClients))
   723  	for idx, client := range sys.peerClients {
   724  		if client == nil {
   725  			continue
   726  		}
   727  		client := client
   728  		ng.Go(ctx, func() error {
   729  			return client.LoadTransitionTierConfig(ctx)
   730  		}, idx, *client.host)
   731  	}
   732  	for _, nErr := range ng.Wait() {
   733  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress", nErr.Host.String())
   734  		if nErr.Err != nil {
   735  			logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), nErr.Err, nErr.Host.String())
   736  		}
   737  	}
   738  }
   739  
   740  // GetCPUs - Get all CPU information.
   741  func (sys *NotificationSys) GetCPUs(ctx context.Context) []madmin.CPUs {
   742  	reply := make([]madmin.CPUs, len(sys.peerClients))
   743  
   744  	g := errgroup.WithNErrs(len(sys.peerClients))
   745  	for index, client := range sys.peerClients {
   746  		if client == nil {
   747  			continue
   748  		}
   749  		index := index
   750  		g.Go(func() error {
   751  			var err error
   752  			reply[index], err = sys.peerClients[index].GetCPUs(ctx)
   753  			return err
   754  		}, index)
   755  	}
   756  
   757  	for index, err := range g.Wait() {
   758  		if err != nil {
   759  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   760  		}
   761  	}
   762  	return reply
   763  }
   764  
   765  // GetNetInfo - Network information
   766  func (sys *NotificationSys) GetNetInfo(ctx context.Context) []madmin.NetInfo {
   767  	reply := make([]madmin.NetInfo, len(sys.peerClients))
   768  
   769  	g := errgroup.WithNErrs(len(sys.peerClients))
   770  	for index, client := range sys.peerClients {
   771  		if client == nil {
   772  			continue
   773  		}
   774  		index := index
   775  		g.Go(func() error {
   776  			var err error
   777  			reply[index], err = sys.peerClients[index].GetNetInfo(ctx)
   778  			return err
   779  		}, index)
   780  	}
   781  
   782  	for index, err := range g.Wait() {
   783  		if err != nil {
   784  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   785  		}
   786  	}
   787  	return reply
   788  }
   789  
   790  // GetPartitions - Disk partition information
   791  func (sys *NotificationSys) GetPartitions(ctx context.Context) []madmin.Partitions {
   792  	reply := make([]madmin.Partitions, len(sys.peerClients))
   793  
   794  	g := errgroup.WithNErrs(len(sys.peerClients))
   795  	for index, client := range sys.peerClients {
   796  		if client == nil {
   797  			continue
   798  		}
   799  		index := index
   800  		g.Go(func() error {
   801  			var err error
   802  			reply[index], err = sys.peerClients[index].GetPartitions(ctx)
   803  			return err
   804  		}, index)
   805  	}
   806  
   807  	for index, err := range g.Wait() {
   808  		if err != nil {
   809  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   810  		}
   811  	}
   812  	return reply
   813  }
   814  
   815  // GetOSInfo - Get operating system's information
   816  func (sys *NotificationSys) GetOSInfo(ctx context.Context) []madmin.OSInfo {
   817  	reply := make([]madmin.OSInfo, len(sys.peerClients))
   818  
   819  	g := errgroup.WithNErrs(len(sys.peerClients))
   820  	for index, client := range sys.peerClients {
   821  		if client == nil {
   822  			continue
   823  		}
   824  		index := index
   825  		g.Go(func() error {
   826  			var err error
   827  			reply[index], err = sys.peerClients[index].GetOSInfo(ctx)
   828  			return err
   829  		}, index)
   830  	}
   831  
   832  	for index, err := range g.Wait() {
   833  		if err != nil {
   834  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   835  		}
   836  	}
   837  	return reply
   838  }
   839  
   840  // GetMetrics - Get metrics from all peers.
   841  func (sys *NotificationSys) GetMetrics(ctx context.Context, t madmin.MetricType, opts collectMetricsOpts) []madmin.RealtimeMetrics {
   842  	reply := make([]madmin.RealtimeMetrics, len(sys.peerClients))
   843  
   844  	g := errgroup.WithNErrs(len(sys.peerClients))
   845  	for index, client := range sys.peerClients {
   846  		if client == nil {
   847  			continue
   848  		}
   849  		host := client.host.String()
   850  		if len(opts.hosts) > 0 {
   851  			if _, ok := opts.hosts[host]; !ok {
   852  				continue
   853  			}
   854  		}
   855  
   856  		index := index
   857  		g.Go(func() error {
   858  			var err error
   859  			reply[index], err = sys.peerClients[index].GetMetrics(ctx, t, opts)
   860  			return err
   861  		}, index)
   862  	}
   863  
   864  	for index, err := range g.Wait() {
   865  		if err != nil {
   866  			reply[index].Errors = []string{fmt.Sprintf("%s: %s (rpc)", sys.peerClients[index].String(), err.Error())}
   867  		}
   868  	}
   869  	return reply
   870  }
   871  
   872  // GetResourceMetrics - gets the resource metrics from all nodes excluding self.
   873  func (sys *NotificationSys) GetResourceMetrics(ctx context.Context) <-chan MetricV2 {
   874  	if sys == nil {
   875  		return nil
   876  	}
   877  	g := errgroup.WithNErrs(len(sys.peerClients))
   878  	peerChannels := make([]<-chan MetricV2, len(sys.peerClients))
   879  	for index := range sys.peerClients {
   880  		index := index
   881  		g.Go(func() error {
   882  			if sys.peerClients[index] == nil {
   883  				return errPeerNotReachable
   884  			}
   885  			var err error
   886  			peerChannels[index], err = sys.peerClients[index].GetResourceMetrics(ctx)
   887  			return err
   888  		}, index)
   889  	}
   890  	return sys.collectPeerMetrics(ctx, peerChannels, g)
   891  }
   892  
   893  // GetSysConfig - Get information about system config
   894  // (only the config that are of concern to minio)
   895  func (sys *NotificationSys) GetSysConfig(ctx context.Context) []madmin.SysConfig {
   896  	reply := make([]madmin.SysConfig, len(sys.peerClients))
   897  
   898  	g := errgroup.WithNErrs(len(sys.peerClients))
   899  	for index, client := range sys.peerClients {
   900  		if client == nil {
   901  			continue
   902  		}
   903  		index := index
   904  		g.Go(func() error {
   905  			var err error
   906  			reply[index], err = sys.peerClients[index].GetSysConfig(ctx)
   907  			return err
   908  		}, index)
   909  	}
   910  
   911  	for index, err := range g.Wait() {
   912  		if err != nil {
   913  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   914  		}
   915  	}
   916  	return reply
   917  }
   918  
   919  // GetSysServices - Get information about system services
   920  // (only the services that are of concern to minio)
   921  func (sys *NotificationSys) GetSysServices(ctx context.Context) []madmin.SysServices {
   922  	reply := make([]madmin.SysServices, len(sys.peerClients))
   923  
   924  	g := errgroup.WithNErrs(len(sys.peerClients))
   925  	for index, client := range sys.peerClients {
   926  		if client == nil {
   927  			continue
   928  		}
   929  		index := index
   930  		g.Go(func() error {
   931  			var err error
   932  			reply[index], err = sys.peerClients[index].GetSELinuxInfo(ctx)
   933  			return err
   934  		}, index)
   935  	}
   936  
   937  	for index, err := range g.Wait() {
   938  		if err != nil {
   939  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   940  		}
   941  	}
   942  	return reply
   943  }
   944  
   945  func (sys *NotificationSys) addNodeErr(nodeInfo madmin.NodeInfo, peerClient *peerRESTClient, err error) {
   946  	addr := peerClient.host.String()
   947  	reqInfo := (&logger.ReqInfo{}).AppendTags("remotePeer", addr)
   948  	ctx := logger.SetReqInfo(GlobalContext, reqInfo)
   949  	logger.LogOnceIf(ctx, err, "add-node-err-"+addr)
   950  	nodeInfo.SetAddr(addr)
   951  	nodeInfo.SetError(err.Error())
   952  }
   953  
   954  // GetSysErrors - Memory information
   955  func (sys *NotificationSys) GetSysErrors(ctx context.Context) []madmin.SysErrors {
   956  	reply := make([]madmin.SysErrors, len(sys.peerClients))
   957  
   958  	g := errgroup.WithNErrs(len(sys.peerClients))
   959  	for index, client := range sys.peerClients {
   960  		if client == nil {
   961  			continue
   962  		}
   963  		index := index
   964  		g.Go(func() error {
   965  			var err error
   966  			reply[index], err = sys.peerClients[index].GetSysErrors(ctx)
   967  			return err
   968  		}, index)
   969  	}
   970  
   971  	for index, err := range g.Wait() {
   972  		if err != nil {
   973  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   974  		}
   975  	}
   976  	return reply
   977  }
   978  
   979  // GetMemInfo - Memory information
   980  func (sys *NotificationSys) GetMemInfo(ctx context.Context) []madmin.MemInfo {
   981  	reply := make([]madmin.MemInfo, len(sys.peerClients))
   982  
   983  	g := errgroup.WithNErrs(len(sys.peerClients))
   984  	for index, client := range sys.peerClients {
   985  		if client == nil {
   986  			continue
   987  		}
   988  		index := index
   989  		g.Go(func() error {
   990  			var err error
   991  			reply[index], err = sys.peerClients[index].GetMemInfo(ctx)
   992  			return err
   993  		}, index)
   994  	}
   995  
   996  	for index, err := range g.Wait() {
   997  		if err != nil {
   998  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
   999  		}
  1000  	}
  1001  	return reply
  1002  }
  1003  
  1004  // GetProcInfo - Process information
  1005  func (sys *NotificationSys) GetProcInfo(ctx context.Context) []madmin.ProcInfo {
  1006  	reply := make([]madmin.ProcInfo, len(sys.peerClients))
  1007  
  1008  	g := errgroup.WithNErrs(len(sys.peerClients))
  1009  	for index, client := range sys.peerClients {
  1010  		if client == nil {
  1011  			continue
  1012  		}
  1013  		index := index
  1014  		g.Go(func() error {
  1015  			var err error
  1016  			reply[index], err = sys.peerClients[index].GetProcInfo(ctx)
  1017  			return err
  1018  		}, index)
  1019  	}
  1020  
  1021  	for index, err := range g.Wait() {
  1022  		if err != nil {
  1023  			sys.addNodeErr(&reply[index], sys.peerClients[index], err)
  1024  		}
  1025  	}
  1026  	return reply
  1027  }
  1028  
  1029  // Construct a list of offline disks information for a given node.
  1030  // If offlineHost is empty, do it for the local disks.
  1031  func getOfflineDisks(offlineHost string, endpoints EndpointServerPools) []madmin.Disk {
  1032  	var offlineDisks []madmin.Disk
  1033  	for _, pool := range endpoints {
  1034  		for _, ep := range pool.Endpoints {
  1035  			if offlineHost == "" && ep.IsLocal || offlineHost == ep.Host {
  1036  				offlineDisks = append(offlineDisks, madmin.Disk{
  1037  					Endpoint:  ep.String(),
  1038  					State:     string(madmin.ItemOffline),
  1039  					PoolIndex: ep.PoolIdx,
  1040  					SetIndex:  ep.SetIdx,
  1041  					DiskIndex: ep.DiskIdx,
  1042  				})
  1043  			}
  1044  		}
  1045  	}
  1046  	return offlineDisks
  1047  }
  1048  
  1049  // StorageInfo returns disk information across all peers
  1050  func (sys *NotificationSys) StorageInfo(objLayer ObjectLayer, metrics bool) StorageInfo {
  1051  	var storageInfo StorageInfo
  1052  	replies := make([]StorageInfo, len(sys.peerClients))
  1053  
  1054  	var wg sync.WaitGroup
  1055  	for i, client := range sys.peerClients {
  1056  		if client == nil {
  1057  			continue
  1058  		}
  1059  		wg.Add(1)
  1060  		go func(client *peerRESTClient, idx int) {
  1061  			defer wg.Done()
  1062  			info, err := client.LocalStorageInfo(metrics)
  1063  			if err != nil {
  1064  				info.Disks = getOfflineDisks(client.host.String(), globalEndpoints)
  1065  			}
  1066  			replies[idx] = info
  1067  		}(client, i)
  1068  	}
  1069  	wg.Wait()
  1070  
  1071  	// Add local to this server.
  1072  	replies = append(replies, objLayer.LocalStorageInfo(GlobalContext, metrics))
  1073  
  1074  	storageInfo.Backend = objLayer.BackendInfo()
  1075  	for _, sinfo := range replies {
  1076  		storageInfo.Disks = append(storageInfo.Disks, sinfo.Disks...)
  1077  	}
  1078  
  1079  	return storageInfo
  1080  }
  1081  
  1082  // ServerInfo - calls ServerInfo RPC call on all peers.
  1083  func (sys *NotificationSys) ServerInfo(metrics bool) []madmin.ServerProperties {
  1084  	reply := make([]madmin.ServerProperties, len(sys.peerClients))
  1085  	var wg sync.WaitGroup
  1086  	for i, client := range sys.peerClients {
  1087  		if client == nil {
  1088  			continue
  1089  		}
  1090  		wg.Add(1)
  1091  		go func(client *peerRESTClient, idx int) {
  1092  			defer wg.Done()
  1093  			info, err := client.ServerInfo(metrics)
  1094  			if err != nil {
  1095  				info.Endpoint = client.host.String()
  1096  				info.State = string(madmin.ItemOffline)
  1097  				info.Disks = getOfflineDisks(info.Endpoint, globalEndpoints)
  1098  			}
  1099  			reply[idx] = info
  1100  		}(client, i)
  1101  	}
  1102  	wg.Wait()
  1103  
  1104  	return reply
  1105  }
  1106  
  1107  // returns all the peers that are currently online.
  1108  func (sys *NotificationSys) getOnlinePeers() []*peerRESTClient {
  1109  	var peerClients []*peerRESTClient
  1110  	for _, peerClient := range sys.allPeerClients {
  1111  		if peerClient != nil && peerClient.IsOnline() {
  1112  			peerClients = append(peerClients, peerClient)
  1113  		}
  1114  	}
  1115  	return peerClients
  1116  }
  1117  
  1118  // restClientFromHash will return a deterministic peerRESTClient based on s.
  1119  // Will return nil if client is local.
  1120  func (sys *NotificationSys) restClientFromHash(s string) (client *peerRESTClient) {
  1121  	if len(sys.peerClients) == 0 {
  1122  		return nil
  1123  	}
  1124  	peerClients := sys.getOnlinePeers()
  1125  	if len(peerClients) == 0 {
  1126  		return nil
  1127  	}
  1128  	idx := xxhash.Sum64String(s) % uint64(len(peerClients))
  1129  	return peerClients[idx]
  1130  }
  1131  
  1132  // GetPeerOnlineCount gets the count of online and offline nodes.
  1133  func (sys *NotificationSys) GetPeerOnlineCount() (nodesOnline, nodesOffline int) {
  1134  	nodesOnline = 1 // Self is always online.
  1135  	nodesOffline = 0
  1136  	nodesOnlineIndex := make([]bool, len(sys.peerClients))
  1137  	var wg sync.WaitGroup
  1138  	for idx, client := range sys.peerClients {
  1139  		if client == nil {
  1140  			continue
  1141  		}
  1142  		wg.Add(1)
  1143  		go func(idx int, client *peerRESTClient) {
  1144  			defer wg.Done()
  1145  			nodesOnlineIndex[idx] = client.restClient.HealthCheckFn()
  1146  		}(idx, client)
  1147  
  1148  	}
  1149  	wg.Wait()
  1150  
  1151  	for _, online := range nodesOnlineIndex {
  1152  		if online {
  1153  			nodesOnline++
  1154  		} else {
  1155  			nodesOffline++
  1156  		}
  1157  	}
  1158  	return
  1159  }
  1160  
  1161  // NewNotificationSys - creates new notification system object.
  1162  func NewNotificationSys(endpoints EndpointServerPools) *NotificationSys {
  1163  	remote, all := newPeerRestClients(endpoints)
  1164  	return &NotificationSys{
  1165  		peerClients:    remote,
  1166  		allPeerClients: all,
  1167  	}
  1168  }
  1169  
  1170  // GetBandwidthReports - gets the bandwidth report from all nodes including self.
  1171  func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ...string) bandwidth.BucketBandwidthReport {
  1172  	reports := make([]*bandwidth.BucketBandwidthReport, len(sys.peerClients))
  1173  	g := errgroup.WithNErrs(len(sys.peerClients))
  1174  	for index := range sys.peerClients {
  1175  		if sys.peerClients[index] == nil {
  1176  			continue
  1177  		}
  1178  		index := index
  1179  		g.Go(func() error {
  1180  			var err error
  1181  			reports[index], err = sys.peerClients[index].MonitorBandwidth(ctx, buckets)
  1182  			return err
  1183  		}, index)
  1184  	}
  1185  
  1186  	for index, err := range g.Wait() {
  1187  		reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress",
  1188  			sys.peerClients[index].host.String())
  1189  		ctx := logger.SetReqInfo(ctx, reqInfo)
  1190  		logger.LogOnceIf(ctx, err, sys.peerClients[index].host.String())
  1191  	}
  1192  	reports = append(reports, globalBucketMonitor.GetReport(bandwidth.SelectBuckets(buckets...)))
  1193  	consolidatedReport := bandwidth.BucketBandwidthReport{
  1194  		BucketStats: make(map[bandwidth.BucketOptions]bandwidth.Details),
  1195  	}
  1196  	for _, report := range reports {
  1197  		if report == nil || report.BucketStats == nil {
  1198  			continue
  1199  		}
  1200  		for opts := range report.BucketStats {
  1201  			d, ok := consolidatedReport.BucketStats[opts]
  1202  			if !ok {
  1203  				d = bandwidth.Details{
  1204  					LimitInBytesPerSecond: report.BucketStats[opts].LimitInBytesPerSecond,
  1205  				}
  1206  			}
  1207  			dt, ok := report.BucketStats[opts]
  1208  			if ok {
  1209  				d.CurrentBandwidthInBytesPerSecond += dt.CurrentBandwidthInBytesPerSecond
  1210  			}
  1211  			consolidatedReport.BucketStats[opts] = d
  1212  		}
  1213  	}
  1214  	return consolidatedReport
  1215  }
  1216  
  1217  func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels []<-chan MetricV2, g *errgroup.Group) <-chan MetricV2 {
  1218  	ch := make(chan MetricV2)
  1219  	var wg sync.WaitGroup
  1220  	for index, err := range g.Wait() {
  1221  		if err != nil {
  1222  			if sys.peerClients[index] != nil {
  1223  				reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress",
  1224  					sys.peerClients[index].host.String())
  1225  				logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String())
  1226  			} else {
  1227  				logger.LogOnceIf(ctx, err, "peer-offline")
  1228  			}
  1229  			continue
  1230  		}
  1231  		wg.Add(1)
  1232  		go func(ctx context.Context, peerChannel <-chan MetricV2, wg *sync.WaitGroup) {
  1233  			defer wg.Done()
  1234  			for {
  1235  				select {
  1236  				case m, ok := <-peerChannel:
  1237  					if !ok {
  1238  						return
  1239  					}
  1240  					select {
  1241  					case ch <- m:
  1242  					case <-ctx.Done():
  1243  						return
  1244  					}
  1245  				case <-ctx.Done():
  1246  					return
  1247  				}
  1248  			}
  1249  		}(ctx, peerChannels[index], &wg)
  1250  	}
  1251  	go func(wg *sync.WaitGroup, ch chan MetricV2) {
  1252  		wg.Wait()
  1253  		xioutil.SafeClose(ch)
  1254  	}(&wg, ch)
  1255  	return ch
  1256  }
  1257  
  1258  // GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self.
  1259  func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan MetricV2 {
  1260  	if sys == nil {
  1261  		return nil
  1262  	}
  1263  	g := errgroup.WithNErrs(len(sys.peerClients))
  1264  	peerChannels := make([]<-chan MetricV2, len(sys.peerClients))
  1265  	for index := range sys.peerClients {
  1266  		index := index
  1267  		g.Go(func() error {
  1268  			if sys.peerClients[index] == nil {
  1269  				return errPeerNotReachable
  1270  			}
  1271  			var err error
  1272  			peerChannels[index], err = sys.peerClients[index].GetPeerBucketMetrics(ctx)
  1273  			return err
  1274  		}, index)
  1275  	}
  1276  	return sys.collectPeerMetrics(ctx, peerChannels, g)
  1277  }
  1278  
  1279  // GetClusterMetrics - gets the cluster metrics from all nodes excluding self.
  1280  func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan MetricV2 {
  1281  	if sys == nil {
  1282  		return nil
  1283  	}
  1284  	g := errgroup.WithNErrs(len(sys.peerClients))
  1285  	peerChannels := make([]<-chan MetricV2, len(sys.peerClients))
  1286  	for index := range sys.peerClients {
  1287  		index := index
  1288  		g.Go(func() error {
  1289  			if sys.peerClients[index] == nil {
  1290  				return errPeerNotReachable
  1291  			}
  1292  			var err error
  1293  			peerChannels[index], err = sys.peerClients[index].GetPeerMetrics(ctx)
  1294  			return err
  1295  		}, index)
  1296  	}
  1297  	return sys.collectPeerMetrics(ctx, peerChannels, g)
  1298  }
  1299  
  1300  // ServiceFreeze freezes all S3 API calls when 'freeze' is true,
  1301  // 'freeze' is 'false' would resume all S3 API calls again.
  1302  // NOTE: once a tenant is frozen either two things needs to
  1303  // happen before resuming normal operations.
  1304  //   - Server needs to be restarted 'mc admin service restart'
  1305  //   - 'freeze' should be set to 'false' for this call
  1306  //     to resume normal operations.
  1307  func (sys *NotificationSys) ServiceFreeze(ctx context.Context, freeze bool) []NotificationPeerErr {
  1308  	serviceSig := serviceUnFreeze
  1309  	if freeze {
  1310  		serviceSig = serviceFreeze
  1311  	}
  1312  	ng := WithNPeers(len(sys.peerClients))
  1313  	for idx, client := range sys.peerClients {
  1314  		if client == nil {
  1315  			continue
  1316  		}
  1317  		client := client
  1318  		ng.Go(GlobalContext, func() error {
  1319  			return client.SignalService(serviceSig, "", false)
  1320  		}, idx, *client.host)
  1321  	}
  1322  	nerrs := ng.Wait()
  1323  	if freeze {
  1324  		freezeServices()
  1325  	} else {
  1326  		unfreezeServices()
  1327  	}
  1328  	return nerrs
  1329  }
  1330  
  1331  // Netperf - perform mesh style network throughput test
  1332  func (sys *NotificationSys) Netperf(ctx context.Context, duration time.Duration) []madmin.NetperfNodeResult {
  1333  	length := len(sys.allPeerClients)
  1334  	if length == 0 {
  1335  		// For single node erasure setup.
  1336  		return nil
  1337  	}
  1338  	results := make([]madmin.NetperfNodeResult, length)
  1339  
  1340  	scheme := "http"
  1341  	if globalIsTLS {
  1342  		scheme = "https"
  1343  	}
  1344  
  1345  	var wg sync.WaitGroup
  1346  	for index := range sys.peerClients {
  1347  		if sys.peerClients[index] == nil {
  1348  			continue
  1349  		}
  1350  		wg.Add(1)
  1351  		go func(index int) {
  1352  			defer wg.Done()
  1353  			r, err := sys.peerClients[index].Netperf(ctx, duration)
  1354  			u := &url.URL{
  1355  				Scheme: scheme,
  1356  				Host:   sys.peerClients[index].host.String(),
  1357  			}
  1358  			if err != nil {
  1359  				results[index].Error = err.Error()
  1360  			} else {
  1361  				results[index] = r
  1362  			}
  1363  			results[index].Endpoint = u.String()
  1364  		}(index)
  1365  	}
  1366  
  1367  	wg.Add(1)
  1368  	go func() {
  1369  		defer wg.Done()
  1370  		r := netperf(ctx, duration)
  1371  		u := &url.URL{
  1372  			Scheme: scheme,
  1373  			Host:   globalLocalNodeName,
  1374  		}
  1375  		results[len(results)-1] = r
  1376  		results[len(results)-1].Endpoint = u.String()
  1377  	}()
  1378  	wg.Wait()
  1379  
  1380  	return results
  1381  }
  1382  
  1383  // SpeedTest run GET/PUT tests at input concurrency for requested object size,
  1384  // optionally you can extend the tests longer with time.Duration.
  1385  func (sys *NotificationSys) SpeedTest(ctx context.Context, sopts speedTestOpts) []SpeedTestResult {
  1386  	length := len(sys.allPeerClients)
  1387  	if length == 0 {
  1388  		// For single node erasure setup.
  1389  		length = 1
  1390  	}
  1391  	results := make([]SpeedTestResult, length)
  1392  
  1393  	scheme := "http"
  1394  	if globalIsTLS {
  1395  		scheme = "https"
  1396  	}
  1397  
  1398  	var wg sync.WaitGroup
  1399  	for index := range sys.peerClients {
  1400  		if sys.peerClients[index] == nil {
  1401  			continue
  1402  		}
  1403  		wg.Add(1)
  1404  		go func(index int) {
  1405  			defer wg.Done()
  1406  			r, err := sys.peerClients[index].SpeedTest(ctx, sopts)
  1407  			u := &url.URL{
  1408  				Scheme: scheme,
  1409  				Host:   sys.peerClients[index].host.String(),
  1410  			}
  1411  			if err != nil {
  1412  				results[index].Error = err.Error()
  1413  			} else {
  1414  				results[index] = r
  1415  			}
  1416  			results[index].Endpoint = u.String()
  1417  		}(index)
  1418  	}
  1419  
  1420  	wg.Add(1)
  1421  	go func() {
  1422  		defer wg.Done()
  1423  		r, err := selfSpeedTest(ctx, sopts)
  1424  		u := &url.URL{
  1425  			Scheme: scheme,
  1426  			Host:   globalLocalNodeName,
  1427  		}
  1428  		if err != nil {
  1429  			results[len(results)-1].Error = err.Error()
  1430  		} else {
  1431  			results[len(results)-1] = r
  1432  		}
  1433  		results[len(results)-1].Endpoint = u.String()
  1434  	}()
  1435  	wg.Wait()
  1436  
  1437  	return results
  1438  }
  1439  
  1440  // DriveSpeedTest - Drive performance information
  1441  func (sys *NotificationSys) DriveSpeedTest(ctx context.Context, opts madmin.DriveSpeedTestOpts) chan madmin.DriveSpeedTestResult {
  1442  	ch := make(chan madmin.DriveSpeedTestResult)
  1443  	var wg sync.WaitGroup
  1444  	for _, client := range sys.peerClients {
  1445  		if client == nil {
  1446  			continue
  1447  		}
  1448  		wg.Add(1)
  1449  		go func(client *peerRESTClient) {
  1450  			defer wg.Done()
  1451  			resp, err := client.DriveSpeedTest(ctx, opts)
  1452  			if err != nil {
  1453  				resp.Error = err.Error()
  1454  			}
  1455  
  1456  			select {
  1457  			case <-ctx.Done():
  1458  			case ch <- resp:
  1459  			}
  1460  
  1461  			reqInfo := (&logger.ReqInfo{}).AppendTags("remotePeer", client.host.String())
  1462  			ctx := logger.SetReqInfo(GlobalContext, reqInfo)
  1463  			logger.LogOnceIf(ctx, err, client.host.String())
  1464  		}(client)
  1465  	}
  1466  
  1467  	wg.Add(1)
  1468  	go func() {
  1469  		defer wg.Done()
  1470  		select {
  1471  		case <-ctx.Done():
  1472  		case ch <- driveSpeedTest(ctx, opts):
  1473  		}
  1474  	}()
  1475  
  1476  	go func(wg *sync.WaitGroup, ch chan madmin.DriveSpeedTestResult) {
  1477  		wg.Wait()
  1478  		xioutil.SafeClose(ch)
  1479  	}(&wg, ch)
  1480  
  1481  	return ch
  1482  }
  1483  
  1484  // ReloadSiteReplicationConfig - tells all peer minio nodes to reload the
  1485  // site-replication configuration.
  1486  func (sys *NotificationSys) ReloadSiteReplicationConfig(ctx context.Context) []error {
  1487  	errs := make([]error, len(sys.allPeerClients))
  1488  	var wg sync.WaitGroup
  1489  	for index := range sys.peerClients {
  1490  		if sys.peerClients[index] == nil {
  1491  			continue
  1492  		}
  1493  		wg.Add(1)
  1494  		go func(index int) {
  1495  			defer wg.Done()
  1496  			errs[index] = sys.peerClients[index].ReloadSiteReplicationConfig(ctx)
  1497  		}(index)
  1498  	}
  1499  
  1500  	wg.Wait()
  1501  	return errs
  1502  }
  1503  
  1504  // GetLastDayTierStats fetches per-tier stats of the last 24hrs from all peers
  1505  func (sys *NotificationSys) GetLastDayTierStats(ctx context.Context) DailyAllTierStats {
  1506  	errs := make([]error, len(sys.allPeerClients))
  1507  	lastDayStats := make([]DailyAllTierStats, len(sys.allPeerClients))
  1508  	var wg sync.WaitGroup
  1509  	for index := range sys.peerClients {
  1510  		if sys.peerClients[index] == nil {
  1511  			continue
  1512  		}
  1513  		wg.Add(1)
  1514  		go func(index int) {
  1515  			defer wg.Done()
  1516  			lastDayStats[index], errs[index] = sys.peerClients[index].GetLastDayTierStats(ctx)
  1517  		}(index)
  1518  	}
  1519  
  1520  	wg.Wait()
  1521  	merged := globalTransitionState.getDailyAllTierStats()
  1522  	for i, stat := range lastDayStats {
  1523  		if errs[i] != nil {
  1524  			logger.LogOnceIf(ctx, fmt.Errorf("failed to fetch last day tier stats: %w", errs[i]), sys.peerClients[i].host.String())
  1525  			continue
  1526  		}
  1527  		merged.merge(stat)
  1528  	}
  1529  	return merged
  1530  }
  1531  
  1532  // GetReplicationMRF - Get replication MRF from all peers.
  1533  func (sys *NotificationSys) GetReplicationMRF(ctx context.Context, bucket, node string) (mrfCh chan madmin.ReplicationMRF, err error) {
  1534  	g := errgroup.WithNErrs(len(sys.peerClients))
  1535  	peerChannels := make([]<-chan madmin.ReplicationMRF, len(sys.peerClients))
  1536  	for index, client := range sys.peerClients {
  1537  		if client == nil {
  1538  			continue
  1539  		}
  1540  		host := client.host.String()
  1541  		if host != node && node != "all" {
  1542  			continue
  1543  		}
  1544  		index := index
  1545  		g.Go(func() error {
  1546  			var err error
  1547  			peerChannels[index], err = sys.peerClients[index].GetReplicationMRF(ctx, bucket)
  1548  			return err
  1549  		}, index)
  1550  	}
  1551  	mrfCh = make(chan madmin.ReplicationMRF, 4000)
  1552  	var wg sync.WaitGroup
  1553  
  1554  	for index, err := range g.Wait() {
  1555  		if err != nil {
  1556  			if sys.peerClients[index] != nil {
  1557  				reqInfo := (&logger.ReqInfo{}).AppendTags("peerAddress",
  1558  					sys.peerClients[index].host.String())
  1559  				logger.LogOnceIf(logger.SetReqInfo(ctx, reqInfo), err, sys.peerClients[index].host.String())
  1560  			} else {
  1561  				logger.LogOnceIf(ctx, err, "peer-offline")
  1562  			}
  1563  			continue
  1564  		}
  1565  		wg.Add(1)
  1566  		go func(ctx context.Context, peerChannel <-chan madmin.ReplicationMRF, wg *sync.WaitGroup) {
  1567  			defer wg.Done()
  1568  			for {
  1569  				select {
  1570  				case m, ok := <-peerChannel:
  1571  					if !ok {
  1572  						return
  1573  					}
  1574  					select {
  1575  					case <-ctx.Done():
  1576  						return
  1577  					case mrfCh <- m:
  1578  					}
  1579  				case <-ctx.Done():
  1580  					return
  1581  				}
  1582  			}
  1583  		}(ctx, peerChannels[index], &wg)
  1584  	}
  1585  	wg.Add(1)
  1586  	go func(ch chan madmin.ReplicationMRF) error {
  1587  		defer wg.Done()
  1588  		if node != "all" && node != globalLocalNodeName {
  1589  			return nil
  1590  		}
  1591  		mCh, err := globalReplicationPool.getMRF(ctx, bucket)
  1592  		if err != nil {
  1593  			return err
  1594  		}
  1595  		for e := range mCh {
  1596  			select {
  1597  			case <-ctx.Done():
  1598  				return err
  1599  			case mrfCh <- e:
  1600  			}
  1601  		}
  1602  		return nil
  1603  	}(mrfCh)
  1604  	go func(wg *sync.WaitGroup) {
  1605  		wg.Wait()
  1606  		xioutil.SafeClose(mrfCh)
  1607  	}(&wg)
  1608  	return mrfCh, nil
  1609  }