github.com/minio/mc@v0.0.0-20240503112107-b471de8d1882/cmd/replicate-status.go (about) 1 // Copyright (c) 2015-2022 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "fmt" 23 "hash/fnv" 24 "math" 25 "sort" 26 "strings" 27 "time" 28 29 humanize "github.com/dustin/go-humanize" 30 "github.com/fatih/color" 31 "github.com/minio/cli" 32 json "github.com/minio/colorjson" 33 "github.com/minio/madmin-go/v3" 34 "github.com/minio/mc/pkg/probe" 35 "github.com/minio/minio-go/v7/pkg/replication" 36 "github.com/minio/pkg/v2/console" 37 "github.com/olekukonko/tablewriter" 38 ) 39 40 var replicateStatusFlags = []cli.Flag{ 41 cli.StringFlag{ 42 Name: "backlog,b", 43 Usage: "show most recent failures for one or more nodes. Valid values are 'all', or node name", 44 Value: "all", 45 }, 46 cli.BoolFlag{ 47 Name: "nodes,n", 48 Usage: "show replication speed for all nodes", 49 }, 50 } 51 52 var replicateStatusCmd = cli.Command{ 53 Name: "status", 54 Usage: "show server side replication status", 55 Action: mainReplicateStatus, 56 OnUsageError: onUsageError, 57 Before: setGlobalsFromContext, 58 Flags: append(globalFlags, replicateStatusFlags...), 59 CustomHelpTemplate: `NAME: 60 {{.HelpName}} - {{.Usage}} 61 62 USAGE: 63 {{.HelpName}} TARGET/BUCKET 64 65 FLAGS: 66 {{range .VisibleFlags}}{{.}} 67 {{end}} 68 EXAMPLES: 69 1. Get server side replication metrics for bucket "mybucket" for alias "myminio". 70 {{.Prompt}} {{.HelpName}} myminio/mybucket 71 72 2. Get replication speed across nodes for bucket "mybucket" for alias "myminio". 73 {{.Prompt}} {{.HelpName}} --nodes myminio/mybucket 74 `, 75 } 76 77 // checkReplicateStatusSyntax - validate all the passed arguments 78 func checkReplicateStatusSyntax(ctx *cli.Context) { 79 if len(ctx.Args()) != 1 { 80 showCommandHelpAndExit(ctx, 1) // last argument is exit code 81 } 82 } 83 84 type replicateStatusMessage struct { 85 Op string `json:"op"` 86 URL string `json:"url"` 87 Status string `json:"status"` 88 Metrics replication.MetricsV2 `json:"replicationstats"` 89 Targets []madmin.BucketTarget `json:"remoteTargets"` 90 cfg replication.Config `json:"-"` 91 } 92 93 func (s replicateStatusMessage) JSON() string { 94 s.Status = "success" 95 jsonMessageBytes, e := json.MarshalIndent(s, "", " ") 96 fatalIf(probe.NewError(e), "Unable to marshal into JSON.") 97 return string(jsonMessageBytes) 98 } 99 100 func (s replicateStatusMessage) String() string { 101 q := s.Metrics.QueueStats 102 rs := s.Metrics.CurrentStats 103 104 if s.cfg.Empty() { 105 return "Replication is not configured." 106 } 107 108 var ( 109 replSz = rs.ReplicatedSize 110 replCount = rs.ReplicatedCount 111 replicaCount = rs.ReplicaCount 112 replicaSz = rs.ReplicaSize 113 failed = rs.Errors 114 qs = q.QStats() 115 ) 116 for arn, st := range rs.Stats { // Remove stale ARNs from stats 117 staleARN := true 118 for _, r := range s.cfg.Rules { 119 if r.Destination.Bucket == arn { 120 staleARN = false 121 break 122 } 123 } 124 if staleARN { 125 replSz -= st.ReplicatedSize 126 replCount -= int64(st.ReplicatedCount) 127 } 128 } 129 // normalize stats, avoid negative values 130 replSz = uint64(math.Max(float64(replSz), 0)) 131 if replCount < 0 { 132 replCount = 0 133 } 134 // for queue stats 135 qtots := rs.QStats 136 coloredDot := console.Colorize("qStatusOK", dot) 137 if qtots.Curr.Count > qtots.Avg.Count { 138 coloredDot = console.Colorize("qStatusWarn", dot) 139 } 140 var sb strings.Builder 141 142 // Set table header 143 table := tablewriter.NewWriter(&sb) 144 table.SetAutoWrapText(false) 145 table.SetHeaderAlignment(tablewriter.ALIGN_LEFT) 146 table.SetAlignment(tablewriter.ALIGN_LEFT) 147 table.SetRowLine(false) 148 table.SetBorder(false) 149 table.SetTablePadding("\t") // pad with tabs 150 151 uiFn := func(theme string) func(string) string { 152 return func(s string) string { 153 return console.Colorize(theme, s) 154 } 155 } 156 titleui := uiFn("title") 157 valueui := uiFn("value") 158 hdrui := uiFn("THeaderBold") 159 keyui := uiFn("key") 160 maxui := uiFn("Peak") 161 avgui := uiFn("Avg") 162 163 addRowF := func(format string, vals ...interface{}) { 164 s := fmt.Sprintf(format, vals...) 165 table.Append([]string{s}) 166 } 167 var arns []string 168 for arn := range rs.Stats { 169 arns = append(arns, arn) 170 } 171 sort.Strings(arns) 172 addRowF(titleui("Replication status since %s"), humanize.RelTime(time.Now(), time.Now().Add(time.Duration(s.Metrics.Uptime)*time.Second), "", "ago")) 173 singleTgt := len(arns) == 1 174 staleARN := false 175 for i, arn := range arns { 176 if i > 0 && !staleARN { 177 addRowF("\n") 178 } 179 staleARN = true 180 for _, r := range s.cfg.Rules { 181 if r.Destination.Bucket == arn { 182 staleARN = false 183 break 184 } 185 } 186 if staleARN { 187 continue // skip historic metrics for deleted targets 188 } 189 var ep string 190 var tgt madmin.BucketTarget 191 for _, t := range s.Targets { 192 if t.Arn == arn { 193 ep = t.Endpoint 194 tgt = t 195 break 196 } 197 } 198 nodeName := ep 199 if nodeName == "" { 200 nodeName = arn 201 } 202 nodeui := uiFn(getNodeTheme(nodeName)) 203 currDowntime := time.Duration(0) 204 if !tgt.Online && !tgt.LastOnline.IsZero() { 205 currDowntime = UTCNow().Sub(tgt.LastOnline) 206 } 207 // normalize because total downtime is calculated at server side at heartbeat interval, may be slightly behind 208 totalDowntime := tgt.TotalDowntime 209 if currDowntime > totalDowntime { 210 totalDowntime = currDowntime 211 } 212 nodeStr := nodeui(nodeName) 213 addRowF(nodeui(nodeStr)) 214 stat, ok := rs.Stats[arn] 215 if ok { 216 addRowF(titleui("Replicated: ")+humanize.Comma(int64(stat.ReplicatedCount))+keyui(" objects")+" (%s", valueui(humanize.IBytes(stat.ReplicatedSize))+")") 217 } 218 healthDot := console.Colorize("online", dot) 219 if !tgt.Online { 220 healthDot = console.Colorize("offline", dot) 221 } 222 223 var linkStatus string 224 if tgt.Online { 225 linkStatus = healthDot + fmt.Sprintf(" online (total downtime: %s)", valueui(timeDurationToHumanizedDuration(totalDowntime).String())) 226 } else { 227 linkStatus = healthDot + fmt.Sprintf(" offline %s (total downtime: %s)", valueui(timeDurationToHumanizedDuration(currDowntime).String()), valueui(timeDurationToHumanizedDuration(totalDowntime).String())) 228 } 229 if singleTgt { // for single target - combine summary section into the target section 230 addRowF(titleui("Queued: ") + coloredDot + " " + humanize.Comma(int64(qtots.Curr.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Curr.Bytes))) + 231 " (" + avgui("avg") + ": " + humanize.Comma(int64(qtots.Avg.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Avg.Bytes))) + 232 " ; " + maxui("max:") + " " + humanize.Comma(int64(qtots.Max.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Max.Bytes))) + ")") 233 addRowF(titleui("Workers: ") + valueui(humanize.Comma(int64(qs.Workers.Curr))) + avgui(" (avg: ") + humanize.Comma(int64(qs.Workers.Avg)) + maxui("; max: ") + humanize.Comma(int64(qs.Workers.Max)) + ")") 234 } 235 tgtXfer := qs.TgtXferStats[arn][replication.Total] 236 addRowF(titleui("Transfer Rate: ")+"%s/s ("+keyui("avg: ")+"%s/s"+keyui("; max: ")+"%s/s", valueui(humanize.Bytes(uint64(tgtXfer.CurrRate))), valueui(humanize.Bytes(uint64(tgtXfer.AvgRate))), valueui(humanize.Bytes(uint64(tgtXfer.PeakRate)))) 237 addRowF(titleui("Latency: ")+"%s ("+keyui("avg: ")+"%s"+keyui("; max: ")+"%s)", valueui(tgt.Latency.Curr.Round(time.Millisecond).String()), valueui(tgt.Latency.Avg.Round(time.Millisecond).String()), valueui(tgt.Latency.Max.Round(time.Millisecond).String())) 238 239 addRowF(titleui("Link: %s"), linkStatus) 240 addRowF(titleui("Errors: ")+"%s in last 1 minute; %s in last 1hr; %s since uptime", valueui(humanize.Comma(int64(stat.Failed.LastMinute.Count))), valueui(humanize.Comma(int64(stat.Failed.LastHour.Count))), valueui(humanize.Comma(int64(stat.Failed.Totals.Count)))) 241 242 bwStat, ok := rs.Stats[arn] 243 if ok && bwStat.BandWidthLimitInBytesPerSecond > 0 { 244 limit := "N/A" // N/A means cluster bandwidth is not configured 245 current := "N/A" // N/A means cluster bandwidth is not configured 246 if bwStat.CurrentBandwidthInBytesPerSecond > 0 { 247 current = humanize.Bytes(uint64(bwStat.CurrentBandwidthInBytesPerSecond * 8)) 248 current = fmt.Sprintf("%sb/s", current[:len(current)-1]) 249 } 250 if bwStat.BandWidthLimitInBytesPerSecond > 0 { 251 limit = humanize.Bytes(uint64(bwStat.BandWidthLimitInBytesPerSecond * 8)) 252 limit = fmt.Sprintf("%sb/s", limit[:len(limit)-1]) 253 } 254 addRowF(titleui("Configured Max Bandwidth (Bps): ")+"%s"+titleui(" Current Bandwidth (Bps): ")+"%s", valueui(limit), valueui(current)) 255 } 256 257 } 258 if !singleTgt { 259 xfer := qs.XferStats[replication.Total] 260 addRowF(hdrui("\nSummary:")) 261 addRowF(titleui("Replicated: ")+humanize.Comma(int64(replCount))+keyui(" objects")+" (%s", valueui(humanize.IBytes(replSz))+")") 262 addRowF(titleui("Queued: ") + coloredDot + " " + humanize.Comma(int64(qtots.Curr.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Curr.Bytes))) + 263 " (" + avgui("avg") + ": " + humanize.Comma(int64(qtots.Avg.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Avg.Bytes))) + 264 " ; " + maxui("max:") + " " + humanize.Comma(int64(qtots.Max.Count)) + keyui(" objects, ") + valueui(humanize.IBytes(uint64(qtots.Max.Bytes))) + ")") 265 addRowF(titleui("Workers: ") + valueui(humanize.Comma(int64(qs.Workers.Curr))) + avgui(" (avg: ") + humanize.Comma(int64(qs.Workers.Avg)) + maxui("; max: ") + humanize.Comma(int64(qs.Workers.Max)) + ")") 266 addRowF(titleui("Received: ")+"%s"+keyui(" objects")+" (%s)", humanize.Comma(int64(replicaCount)), valueui(humanize.IBytes(uint64(replicaSz)))) 267 addRowF(titleui("Transfer Rate: ")+"%s/s"+avgui(" (avg: ")+"%s/s"+maxui("; max: ")+"%s/s)", valueui(humanize.Bytes(uint64(xfer.CurrRate))), valueui(humanize.Bytes(uint64(xfer.AvgRate))), valueui(humanize.Bytes(uint64(xfer.PeakRate)))) 268 addRowF(titleui("Errors: ")+"%s in last 1 minute; %s in last 1hr; %s since uptime", valueui(humanize.Comma(int64(failed.LastMinute.Count))), valueui(humanize.Comma(int64(failed.LastHour.Count))), valueui(humanize.Comma(int64(failed.Totals.Count)))) 269 } 270 271 table.Render() 272 return sb.String() 273 } 274 275 func mainReplicateStatus(cliCtx *cli.Context) error { 276 ctx, cancelReplicateStatus := context.WithCancel(globalContext) 277 defer cancelReplicateStatus() 278 279 console.SetColor("title", color.New(color.FgCyan)) 280 console.SetColor("value", color.New(color.FgWhite, color.Bold)) 281 282 console.SetColor("key", color.New(color.FgWhite)) 283 console.SetColor("THeaderBold", color.New(color.Bold, color.FgWhite)) 284 console.SetColor("Replica", color.New(color.FgCyan)) 285 console.SetColor("Failed", color.New(color.Bold, color.FgRed)) 286 for _, c := range colors { 287 console.SetColor(fmt.Sprintf("Node%d", c), color.New(c)) 288 } 289 console.SetColor("Replicated", color.New(color.FgCyan)) 290 console.SetColor("In-Queue", color.New(color.Bold, color.FgYellow)) 291 console.SetColor("Avg", color.New(color.FgCyan)) 292 console.SetColor("Peak", color.New(color.FgYellow)) 293 console.SetColor("Current", color.New(color.FgCyan)) 294 console.SetColor("Uptime", color.New(color.FgWhite)) 295 console.SetColor("qStatusWarn", color.New(color.FgYellow, color.Bold)) 296 console.SetColor("qStatusOK", color.New(color.FgGreen, color.Bold)) 297 console.SetColor("online", color.New(color.FgGreen, color.Bold)) 298 console.SetColor("offline", color.New(color.FgRed, color.Bold)) 299 300 for _, c := range colors { 301 console.SetColor(fmt.Sprintf("Node%d", c), color.New(color.Bold, c)) 302 } 303 checkReplicateStatusSyntax(cliCtx) 304 305 // Get the alias parameter from cli 306 args := cliCtx.Args() 307 aliasedURL := args.Get(0) 308 // Create a new Client 309 client, err := newClient(aliasedURL) 310 fatalIf(err, "Unable to initialize connection.") 311 // Create a new MinIO Admin Client 312 admClient, cerr := newAdminClient(aliasedURL) 313 fatalIf(cerr, "Unable to initialize admin connection.") 314 _, sourceBucket := url2Alias(args[0]) 315 316 replicateStatus, err := client.GetReplicationMetrics(ctx) 317 fatalIf(err.Trace(args...), "Unable to get replication status") 318 targets, e := admClient.ListRemoteTargets(globalContext, sourceBucket, "") 319 fatalIf(probe.NewError(e).Trace(args...), "Unable to fetch remote target.") 320 cfg, err := client.GetReplication(ctx) 321 fatalIf(err.Trace(args...), "Unable to fetch replication configuration.") 322 323 if cliCtx.IsSet("nodes") { 324 printMsg(replicateXferMessage{ 325 Op: cliCtx.Command.Name, 326 Status: "success", 327 ReplQueueStats: replicateStatus.QueueStats, 328 }) 329 return nil 330 } 331 332 printMsg(replicateStatusMessage{ 333 Op: cliCtx.Command.Name, 334 URL: aliasedURL, 335 Metrics: replicateStatus, 336 Targets: targets, 337 cfg: cfg, 338 }) 339 340 return nil 341 } 342 343 type replicateXferMessage struct { 344 Op string `json:"op"` 345 Status string `json:"status"` 346 replication.ReplQueueStats 347 } 348 349 func (m replicateXferMessage) JSON() string { 350 m.Status = "success" 351 jsonMessageBytes, e := json.MarshalIndent(m, "", " ") 352 fatalIf(probe.NewError(e), "Unable to marshal into JSON.") 353 return string(jsonMessageBytes) 354 } 355 356 func (m replicateXferMessage) String() string { 357 var rows []string 358 maxLen := 0 359 360 for _, rqs := range m.ReplQueueStats.Nodes { 361 if len(rqs.NodeName) > maxLen { 362 maxLen = len(rqs.NodeName) 363 } 364 lrgX := rqs.XferStats[replication.Large] 365 smlX := rqs.XferStats[replication.Small] 366 rows = append(rows, console.Colorize("", newPrettyTable(" | ", 367 Field{getNodeTheme(rqs.NodeName), len(rqs.NodeName) + 3}, 368 Field{"Uptime:", 15}, 369 Field{"Lbl", 25}, 370 Field{"Avg", 12}, 371 Field{"Peak", 12}, 372 Field{"Current", 12}, 373 Field{"Workers", 10}, 374 ).buildRow(rqs.NodeName, humanize.RelTime(time.Now(), time.Now().Add(time.Duration(rqs.Uptime)*time.Second), "", ""), "Large Objects (>=128 MiB)", fmt.Sprintf("%s/s", humanize.Bytes(uint64(lrgX.AvgRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(lrgX.PeakRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(lrgX.CurrRate))), fmt.Sprintf("%d", int(rqs.Workers.Avg))))) 375 376 rows = append(rows, console.Colorize("", newPrettyTable(" | ", 377 Field{getNodeTheme(rqs.NodeName), len(rqs.NodeName) + 3}, 378 Field{"Uptime:", 15}, 379 Field{"Lbl", 25}, 380 Field{"Avg", 12}, 381 Field{"Peak", 12}, 382 Field{"Current", 12}, 383 Field{"Workers", 10}, 384 ).buildRow(rqs.NodeName, humanize.RelTime(time.Now(), time.Now().Add(time.Duration(rqs.Uptime)*time.Second), "", ""), "Small Objects (<128 MiB)", fmt.Sprintf("%s/s", humanize.Bytes(uint64(smlX.AvgRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(smlX.PeakRate))), fmt.Sprintf("%s/s", humanize.Bytes(uint64(smlX.CurrRate))), fmt.Sprintf("%d", int(rqs.Workers.Avg))))) 385 } 386 387 hdrSlc := []string{ 388 console.Colorize("THeaderBold", newPrettyTable(" | ", 389 Field{"", maxLen + 3}, 390 Field{"Uptime:", 15}, 391 Field{"Lbl", 25}, 392 Field{"XferRate", 42}, 393 Field{"Workers", 12}).buildRow("Node Name", "Uptime", "Label", " Transfer Rate ", "Workers")), 394 console.Colorize("THeaderBold", newPrettyTable(" | ", 395 Field{"", maxLen + 3}, 396 Field{"Uptime:", 15}, 397 Field{"Lbl", 25}, 398 Field{"Avg", 12}, 399 Field{"Peak", 12}, 400 Field{"Current", 12}, 401 Field{"Workers", 10}).buildRow("", "", "", "Avg", "Peak", "Current", "")), 402 } 403 404 return strings.Join(append(hdrSlc, rows...), "\n") 405 } 406 407 // colorize node name 408 func getNodeTheme(nodeName string) string { 409 nodeHash := fnv.New32a() 410 nodeHash.Write([]byte(nodeName)) 411 nHashSum := nodeHash.Sum32() 412 idx := nHashSum % uint32(len(colors)) 413 return fmt.Sprintf("Node%d", colors[idx]) 414 }