github.com/minio/mc@v0.0.0-20240503112107-b471de8d1882/cmd/replicate-status.go (about)

     1  // Copyright (c) 2015-2022 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"hash/fnv"
    24  	"math"
    25  	"sort"
    26  	"strings"
    27  	"time"
    28  
    29  	humanize "github.com/dustin/go-humanize"
    30  	"github.com/fatih/color"
    31  	"github.com/minio/cli"
    32  	json "github.com/minio/colorjson"
    33  	"github.com/minio/madmin-go/v3"
    34  	"github.com/minio/mc/pkg/probe"
    35  	"github.com/minio/minio-go/v7/pkg/replication"
    36  	"github.com/minio/pkg/v2/console"
    37  	"github.com/olekukonko/tablewriter"
    38  )
    39  
    40  var replicateStatusFlags = []cli.Flag{
    41  	cli.StringFlag{
    42  		Name:  "backlog,b",
    43  		Usage: "show most recent failures for one or more nodes. Valid values are 'all', or node name",
    44  		Value: "all",
    45  	},
    46  	cli.BoolFlag{
    47  		Name:  "nodes,n",
    48  		Usage: "show replication speed for all nodes",
    49  	},
    50  }
    51  
    52  var replicateStatusCmd = cli.Command{
    53  	Name:         "status",
    54  	Usage:        "show server side replication status",
    55  	Action:       mainReplicateStatus,
    56  	OnUsageError: onUsageError,
    57  	Before:       setGlobalsFromContext,
    58  	Flags:        append(globalFlags, replicateStatusFlags...),
    59  	CustomHelpTemplate: `NAME:
    60    {{.HelpName}} - {{.Usage}}
    61  
    62  USAGE:
    63    {{.HelpName}} TARGET/BUCKET
    64  
    65  FLAGS:
    66    {{range .VisibleFlags}}{{.}}
    67    {{end}}
    68  EXAMPLES:
    69    1. Get server side replication metrics for bucket "mybucket" for alias "myminio".
    70       {{.Prompt}} {{.HelpName}} myminio/mybucket
    71  
    72    2. Get replication speed across nodes for bucket "mybucket" for alias "myminio".
    73       {{.Prompt}} {{.HelpName}} --nodes  myminio/mybucket
    74  `,
    75  }
    76  
    77  // checkReplicateStatusSyntax - validate all the passed arguments
    78  func checkReplicateStatusSyntax(ctx *cli.Context) {
    79  	if len(ctx.Args()) != 1 {
    80  		showCommandHelpAndExit(ctx, 1) // last argument is exit code
    81  	}
    82  }
    83  
    84  type replicateStatusMessage struct {
    85  	Op      string                `json:"op"`
    86  	URL     string                `json:"url"`
    87  	Status  string                `json:"status"`
    88  	Metrics replication.MetricsV2 `json:"replicationstats"`
    89  	Targets []madmin.BucketTarget `json:"remoteTargets"`
    90  	cfg     replication.Config    `json:"-"`
    91  }
    92  
    93  func (s replicateStatusMessage) JSON() string {
    94  	s.Status = "success"
    95  	jsonMessageBytes, e := json.MarshalIndent(s, "", " ")
    96  	fatalIf(probe.NewError(e), "Unable to marshal into JSON.")
    97  	return string(jsonMessageBytes)
    98  }
    99  
   100  func (s replicateStatusMessage) String() string {
   101  	q := s.Metrics.QueueStats
   102  	rs := s.Metrics.CurrentStats
   103  
   104  	if s.cfg.Empty() {
   105  		return "Replication is not configured."
   106  	}
   107  
   108  	var (
   109  		replSz       = rs.ReplicatedSize
   110  		replCount    = rs.ReplicatedCount
   111  		replicaCount = rs.ReplicaCount
   112  		replicaSz    = rs.ReplicaSize
   113  		failed       = rs.Errors
   114  		qs           = q.QStats()
   115  	)
   116  	for arn, st := range rs.Stats { // Remove stale ARNs from stats
   117  		staleARN := true
   118  		for _, r := range s.cfg.Rules {
   119  			if r.Destination.Bucket == arn {
   120  				staleARN = false
   121  				break
   122  			}
   123  		}
   124  		if staleARN {
   125  			replSz -= st.ReplicatedSize
   126  			replCount -= int64(st.ReplicatedCount)
   127  		}
   128  	}
   129  	// normalize stats, avoid negative values
   130  	replSz = uint64(math.Max(float64(replSz), 0))
   131  	if replCount < 0 {
   132  		replCount = 0
   133  	}
   134  	// for queue stats
   135  	qtots := rs.QStats
   136  	coloredDot := console.Colorize("qStatusOK", dot)
   137  	if qtots.Curr.Count > qtots.Avg.Count {
   138  		coloredDot = console.Colorize("qStatusWarn", dot)
   139  	}
   140  	var sb strings.Builder
   141  
   142  	// Set table header
   143  	table := tablewriter.NewWriter(&sb)
   144  	table.SetAutoWrapText(false)
   145  	table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
   146  	table.SetAlignment(tablewriter.ALIGN_LEFT)
   147  	table.SetRowLine(false)
   148  	table.SetBorder(false)
   149  	table.SetTablePadding("\t") // pad with tabs
   150  
   151  	uiFn := func(theme string) func(string) string {
   152  		return func(s string) string {
   153  			return console.Colorize(theme, s)
   154  		}
   155  	}
   156  	titleui := uiFn("title")
   157  	valueui := uiFn("value")
   158  	hdrui := uiFn("THeaderBold")
   159  	keyui := uiFn("key")
   160  	maxui := uiFn("Peak")
   161  	avgui := uiFn("Avg")
   162  
   163  	addRowF := func(format string, vals ...interface{}) {
   164  		s := fmt.Sprintf(format, vals...)
   165  		table.Append([]string{s})
   166  	}
   167  	var arns []string
   168  	for arn := range rs.Stats {
   169  		arns = append(arns, arn)
   170  	}
   171  	sort.Strings(arns)
   172  	addRowF(titleui("Replication status since %s"), humanize.RelTime(time.Now(), time.Now().Add(time.Duration(s.Metrics.Uptime)*time.Second), "", "ago"))
   173  	singleTgt := len(arns) == 1
   174  	staleARN := false
   175  	for i, arn := range arns {
   176  		if i > 0 && !staleARN {
   177  			addRowF("\n")
   178  		}
   179  		staleARN = true
   180  		for _, r := range s.cfg.Rules {
   181  			if r.Destination.Bucket == arn {
   182  				staleARN = false
   183  				break
   184  			}
   185  		}
   186  		if staleARN {
   187  			continue // skip historic metrics for deleted targets
   188  		}
   189  		var ep string
   190  		var tgt madmin.BucketTarget
   191  		for _, t := range s.Targets {
   192  			if t.Arn == arn {
   193  				ep = t.Endpoint
   194  				tgt = t
   195  				break
   196  			}
   197  		}
   198  		nodeName := ep
   199  		if nodeName == "" {
   200  			nodeName = arn
   201  		}
   202  		nodeui := uiFn(getNodeTheme(nodeName))
   203  		currDowntime := time.Duration(0)
   204  		if !tgt.Online && !tgt.LastOnline.IsZero() {
   205  			currDowntime = UTCNow().Sub(tgt.LastOnline)
   206  		}
   207  		// normalize because total downtime is calculated at server side at heartbeat interval, may be slightly behind
   208  		totalDowntime := tgt.TotalDowntime
   209  		if currDowntime > totalDowntime {
   210  			totalDowntime = currDowntime
   211  		}
   212  		nodeStr := nodeui(nodeName)
   213  		addRowF(nodeui(nodeStr))
   214  		stat, ok := rs.Stats[arn]
   215  		if ok {
   216  			addRowF(titleui("Replicated:                   ")+humanize.Comma(int64(stat.ReplicatedCount))+keyui(" objects")+" (%s", valueui(humanize.IBytes(stat.ReplicatedSize))+")")
   217  		}
   218  		healthDot := console.Colorize("online", dot)
   219  		if !tgt.Online {
   220  			healthDot = console.Colorize("offline", dot)
   221  		}
   222  
   223  		var linkStatus string
   224  		if tgt.Online {
   225  			linkStatus = healthDot + fmt.Sprintf(" online (total downtime: %s)", valueui(timeDurationToHumanizedDuration(totalDowntime).String()))
   226  		} else {
   227  			linkStatus = healthDot + fmt.Sprintf(" offline %s (total downtime: %s)", valueui(timeDurationToHumanizedDuration(currDowntime).String()), valueui(timeDurationToHumanizedDuration(totalDowntime).String()))
   228  		}
   229  		if singleTgt { // for single target - combine summary section into the target section
   230  			addRowF(titleui("Queued:                       ") + coloredDot + " " + humanize.Comma(int64(qtots.Curr.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Curr.Bytes))) +
   231  				" (" + avgui("avg") + ": " + humanize.Comma(int64(qtots.Avg.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Avg.Bytes))) +
   232  				" ; " + maxui("max:") + " " + humanize.Comma(int64(qtots.Max.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Max.Bytes))) + ")")
   233  			addRowF(titleui("Workers:                      ") + valueui(humanize.Comma(int64(qs.Workers.Curr))) + avgui(" (avg: ") + humanize.Comma(int64(qs.Workers.Avg)) + maxui("; max: ") + humanize.Comma(int64(qs.Workers.Max)) + ")")
   234  		}
   235  		tgtXfer := qs.TgtXferStats[arn][replication.Total]
   236  		addRowF(titleui("Transfer Rate:                ")+"%s/s ("+keyui("avg: ")+"%s/s"+keyui("; max: ")+"%s/s", valueui(humanize.Bytes(uint64(tgtXfer.CurrRate))), valueui(humanize.Bytes(uint64(tgtXfer.AvgRate))), valueui(humanize.Bytes(uint64(tgtXfer.PeakRate))))
   237  		addRowF(titleui("Latency:                      ")+"%s ("+keyui("avg: ")+"%s"+keyui("; max: ")+"%s)", valueui(tgt.Latency.Curr.Round(time.Millisecond).String()), valueui(tgt.Latency.Avg.Round(time.Millisecond).String()), valueui(tgt.Latency.Max.Round(time.Millisecond).String()))
   238  
   239  		addRowF(titleui("Link:                         %s"), linkStatus)
   240  		addRowF(titleui("Errors:                       ")+"%s in last 1 minute; %s in last 1hr; %s since uptime", valueui(humanize.Comma(int64(stat.Failed.LastMinute.Count))), valueui(humanize.Comma(int64(stat.Failed.LastHour.Count))), valueui(humanize.Comma(int64(stat.Failed.Totals.Count))))
   241  
   242  		bwStat, ok := rs.Stats[arn]
   243  		if ok && bwStat.BandWidthLimitInBytesPerSecond > 0 {
   244  			limit := "N/A"   // N/A means cluster bandwidth is not configured
   245  			current := "N/A" // N/A means cluster bandwidth is not configured
   246  			if bwStat.CurrentBandwidthInBytesPerSecond > 0 {
   247  				current = humanize.Bytes(uint64(bwStat.CurrentBandwidthInBytesPerSecond * 8))
   248  				current = fmt.Sprintf("%sb/s", current[:len(current)-1])
   249  			}
   250  			if bwStat.BandWidthLimitInBytesPerSecond > 0 {
   251  				limit = humanize.Bytes(uint64(bwStat.BandWidthLimitInBytesPerSecond * 8))
   252  				limit = fmt.Sprintf("%sb/s", limit[:len(limit)-1])
   253  			}
   254  			addRowF(titleui("Configured Max Bandwidth (Bps): ")+"%s"+titleui("   Current Bandwidth (Bps): ")+"%s", valueui(limit), valueui(current))
   255  		}
   256  
   257  	}
   258  	if !singleTgt {
   259  		xfer := qs.XferStats[replication.Total]
   260  		addRowF(hdrui("\nSummary:"))
   261  		addRowF(titleui("Replicated:                   ")+humanize.Comma(int64(replCount))+keyui(" objects")+" (%s", valueui(humanize.IBytes(replSz))+")")
   262  		addRowF(titleui("Queued:                       ") + coloredDot + " " + humanize.Comma(int64(qtots.Curr.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Curr.Bytes))) +
   263  			" (" + avgui("avg") + ": " + humanize.Comma(int64(qtots.Avg.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Avg.Bytes))) +
   264  			" ; " + maxui("max:") + " " + humanize.Comma(int64(qtots.Max.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Max.Bytes))) + ")")
   265  		addRowF(titleui("Workers:                      ") + valueui(humanize.Comma(int64(qs.Workers.Curr))) + avgui(" (avg: ") + humanize.Comma(int64(qs.Workers.Avg)) + maxui("; max: ") + humanize.Comma(int64(qs.Workers.Max)) + ")")
   266  		addRowF(titleui("Received:                     ")+"%s"+keyui(" objects")+" (%s)", humanize.Comma(int64(replicaCount)), valueui(humanize.IBytes(uint64(replicaSz))))
   267  		addRowF(titleui("Transfer Rate:                ")+"%s/s"+avgui(" (avg: ")+"%s/s"+maxui("; max: ")+"%s/s)", valueui(humanize.Bytes(uint64(xfer.CurrRate))), valueui(humanize.Bytes(uint64(xfer.AvgRate))), valueui(humanize.Bytes(uint64(xfer.PeakRate))))
   268  		addRowF(titleui("Errors:                       ")+"%s in last 1 minute; %s in last 1hr; %s since uptime", valueui(humanize.Comma(int64(failed.LastMinute.Count))), valueui(humanize.Comma(int64(failed.LastHour.Count))), valueui(humanize.Comma(int64(failed.Totals.Count))))
   269  	}
   270  
   271  	table.Render()
   272  	return sb.String()
   273  }
   274  
   275  func mainReplicateStatus(cliCtx *cli.Context) error {
   276  	ctx, cancelReplicateStatus := context.WithCancel(globalContext)
   277  	defer cancelReplicateStatus()
   278  
   279  	console.SetColor("title", color.New(color.FgCyan))
   280  	console.SetColor("value", color.New(color.FgWhite, color.Bold))
   281  
   282  	console.SetColor("key", color.New(color.FgWhite))
   283  	console.SetColor("THeaderBold", color.New(color.Bold, color.FgWhite))
   284  	console.SetColor("Replica", color.New(color.FgCyan))
   285  	console.SetColor("Failed", color.New(color.Bold, color.FgRed))
   286  	for _, c := range colors {
   287  		console.SetColor(fmt.Sprintf("Node%d", c), color.New(c))
   288  	}
   289  	console.SetColor("Replicated", color.New(color.FgCyan))
   290  	console.SetColor("In-Queue", color.New(color.Bold, color.FgYellow))
   291  	console.SetColor("Avg", color.New(color.FgCyan))
   292  	console.SetColor("Peak", color.New(color.FgYellow))
   293  	console.SetColor("Current", color.New(color.FgCyan))
   294  	console.SetColor("Uptime", color.New(color.FgWhite))
   295  	console.SetColor("qStatusWarn", color.New(color.FgYellow, color.Bold))
   296  	console.SetColor("qStatusOK", color.New(color.FgGreen, color.Bold))
   297  	console.SetColor("online", color.New(color.FgGreen, color.Bold))
   298  	console.SetColor("offline", color.New(color.FgRed, color.Bold))
   299  
   300  	for _, c := range colors {
   301  		console.SetColor(fmt.Sprintf("Node%d", c), color.New(color.Bold, c))
   302  	}
   303  	checkReplicateStatusSyntax(cliCtx)
   304  
   305  	// Get the alias parameter from cli
   306  	args := cliCtx.Args()
   307  	aliasedURL := args.Get(0)
   308  	// Create a new Client
   309  	client, err := newClient(aliasedURL)
   310  	fatalIf(err, "Unable to initialize connection.")
   311  	// Create a new MinIO Admin Client
   312  	admClient, cerr := newAdminClient(aliasedURL)
   313  	fatalIf(cerr, "Unable to initialize admin connection.")
   314  	_, sourceBucket := url2Alias(args[0])
   315  
   316  	replicateStatus, err := client.GetReplicationMetrics(ctx)
   317  	fatalIf(err.Trace(args...), "Unable to get replication status")
   318  	targets, e := admClient.ListRemoteTargets(globalContext, sourceBucket, "")
   319  	fatalIf(probe.NewError(e).Trace(args...), "Unable to fetch remote target.")
   320  	cfg, err := client.GetReplication(ctx)
   321  	fatalIf(err.Trace(args...), "Unable to fetch replication configuration.")
   322  
   323  	if cliCtx.IsSet("nodes") {
   324  		printMsg(replicateXferMessage{
   325  			Op:             cliCtx.Command.Name,
   326  			Status:         "success",
   327  			ReplQueueStats: replicateStatus.QueueStats,
   328  		})
   329  		return nil
   330  	}
   331  
   332  	printMsg(replicateStatusMessage{
   333  		Op:      cliCtx.Command.Name,
   334  		URL:     aliasedURL,
   335  		Metrics: replicateStatus,
   336  		Targets: targets,
   337  		cfg:     cfg,
   338  	})
   339  
   340  	return nil
   341  }
   342  
   343  type replicateXferMessage struct {
   344  	Op     string `json:"op"`
   345  	Status string `json:"status"`
   346  	replication.ReplQueueStats
   347  }
   348  
   349  func (m replicateXferMessage) JSON() string {
   350  	m.Status = "success"
   351  	jsonMessageBytes, e := json.MarshalIndent(m, "", " ")
   352  	fatalIf(probe.NewError(e), "Unable to marshal into JSON.")
   353  	return string(jsonMessageBytes)
   354  }
   355  
   356  func (m replicateXferMessage) String() string {
   357  	var rows []string
   358  	maxLen := 0
   359  
   360  	for _, rqs := range m.ReplQueueStats.Nodes {
   361  		if len(rqs.NodeName) > maxLen {
   362  			maxLen = len(rqs.NodeName)
   363  		}
   364  		lrgX := rqs.XferStats[replication.Large]
   365  		smlX := rqs.XferStats[replication.Small]
   366  		rows = append(rows, console.Colorize("", newPrettyTable(" | ",
   367  			Field{getNodeTheme(rqs.NodeName), len(rqs.NodeName) + 3},
   368  			Field{"Uptime:", 15},
   369  			Field{"Lbl", 25},
   370  			Field{"Avg", 12},
   371  			Field{"Peak", 12},
   372  			Field{"Current", 12},
   373  			Field{"Workers", 10},
   374  		).buildRow(rqs.NodeName, humanize.RelTime(time.Now(), time.Now().Add(time.Duration(rqs.Uptime)*time.Second), "", ""), "Large Objects (>=128 MiB)", fmt.Sprintf("%s/s", humanize.Bytes(uint64(lrgX.AvgRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(lrgX.PeakRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(lrgX.CurrRate))), fmt.Sprintf("%d", int(rqs.Workers.Avg)))))
   375  
   376  		rows = append(rows, console.Colorize("", newPrettyTable(" | ",
   377  			Field{getNodeTheme(rqs.NodeName), len(rqs.NodeName) + 3},
   378  			Field{"Uptime:", 15},
   379  			Field{"Lbl", 25},
   380  			Field{"Avg", 12},
   381  			Field{"Peak", 12},
   382  			Field{"Current", 12},
   383  			Field{"Workers", 10},
   384  		).buildRow(rqs.NodeName, humanize.RelTime(time.Now(), time.Now().Add(time.Duration(rqs.Uptime)*time.Second), "", ""), "Small Objects (<128 MiB)", fmt.Sprintf("%s/s", humanize.Bytes(uint64(smlX.AvgRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(smlX.PeakRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(smlX.CurrRate))), fmt.Sprintf("%d", int(rqs.Workers.Avg)))))
   385  	}
   386  
   387  	hdrSlc := []string{
   388  		console.Colorize("THeaderBold", newPrettyTable(" | ",
   389  			Field{"", maxLen + 3},
   390  			Field{"Uptime:", 15},
   391  			Field{"Lbl", 25},
   392  			Field{"XferRate", 42},
   393  			Field{"Workers", 12}).buildRow("Node Name", "Uptime", "Label", "         Transfer Rate      ", "Workers")),
   394  		console.Colorize("THeaderBold", newPrettyTable(" | ",
   395  			Field{"", maxLen + 3},
   396  			Field{"Uptime:", 15},
   397  			Field{"Lbl", 25},
   398  			Field{"Avg", 12},
   399  			Field{"Peak", 12},
   400  			Field{"Current", 12},
   401  			Field{"Workers", 10}).buildRow("", "", "", "Avg", "Peak", "Current", "")),
   402  	}
   403  
   404  	return strings.Join(append(hdrSlc, rows...), "\n")
   405  }
   406  
   407  // colorize node name
   408  func getNodeTheme(nodeName string) string {
   409  	nodeHash := fnv.New32a()
   410  	nodeHash.Write([]byte(nodeName))
   411  	nHashSum := nodeHash.Sum32()
   412  	idx := nHashSum % uint32(len(colors))
   413  	return fmt.Sprintf("Node%d", colors[idx])
   414  }