github.com/minio/madmin-go/v3@v3.0.51/metrics.go (about) 1 // 2 // Copyright (c) 2015-2022 MinIO, Inc. 3 // 4 // This file is part of MinIO Object Storage stack 5 // 6 // This program is free software: you can redistribute it and/or modify 7 // it under the terms of the GNU Affero General Public License as 8 // published by the Free Software Foundation, either version 3 of the 9 // License, or (at your option) any later version. 10 // 11 // This program is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU Affero General Public License for more details. 15 // 16 // You should have received a copy of the GNU Affero General Public License 17 // along with this program. If not, see <http://www.gnu.org/licenses/>. 18 // 19 20 package madmin 21 22 import ( 23 "context" 24 "encoding/json" 25 "errors" 26 "fmt" 27 "io" 28 "net/http" 29 "net/url" 30 "sort" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/prometheus/procfs" 36 "github.com/shirou/gopsutil/v3/cpu" 37 "github.com/shirou/gopsutil/v3/load" 38 ) 39 40 // MetricType is a bitfield representation of different metric types. 41 type MetricType uint32 42 43 // MetricsNone indicates no metrics. 44 const MetricsNone MetricType = 0 45 46 const ( 47 MetricsScanner MetricType = 1 << (iota) 48 MetricsDisk 49 MetricsOS 50 MetricsBatchJobs 51 MetricsSiteResync 52 MetricNet 53 MetricsMem 54 MetricsCPU 55 56 // MetricsAll must be last. 57 // Enables all metrics. 58 MetricsAll = 1<<(iota) - 1 59 ) 60 61 // MetricsOptions are options provided to Metrics call. 62 type MetricsOptions struct { 63 Type MetricType // Return only these metric types. Several types can be combined using |. Leave at 0 to return all. 64 N int // Maximum number of samples to return. 0 will return endless stream. 65 Interval time.Duration // Interval between samples. Will be rounded up to 1s. 66 Hosts []string // Leave empty for all 67 ByHost bool // Return metrics by host. 68 Disks []string 69 ByDisk bool 70 ByJobID string 71 ByDepID string 72 } 73 74 // Metrics makes an admin call to retrieve metrics. 75 // The provided function is called for each received entry. 76 func (adm *AdminClient) Metrics(ctx context.Context, o MetricsOptions, out func(RealtimeMetrics)) (err error) { 77 path := fmt.Sprintf(adminAPIPrefix + "/metrics") 78 q := make(url.Values) 79 q.Set("types", strconv.FormatUint(uint64(o.Type), 10)) 80 q.Set("n", strconv.Itoa(o.N)) 81 q.Set("interval", o.Interval.String()) 82 q.Set("hosts", strings.Join(o.Hosts, ",")) 83 if o.ByHost { 84 q.Set("by-host", "true") 85 } 86 q.Set("disks", strings.Join(o.Disks, ",")) 87 if o.ByDisk { 88 q.Set("by-disk", "true") 89 } 90 if o.ByJobID != "" { 91 q.Set("by-jobID", o.ByJobID) 92 } 93 if o.ByDepID != "" { 94 q.Set("by-depID", o.ByDepID) 95 } 96 97 resp, err := adm.executeMethod(ctx, 98 http.MethodGet, requestData{ 99 relPath: path, 100 queryValues: q, 101 }, 102 ) 103 if err != nil { 104 return err 105 } 106 107 if resp.StatusCode != http.StatusOK { 108 closeResponse(resp) 109 return httpRespToErrorResponse(resp) 110 } 111 defer closeResponse(resp) 112 dec := json.NewDecoder(resp.Body) 113 for { 114 var m RealtimeMetrics 115 err := dec.Decode(&m) 116 if err != nil { 117 if errors.Is(err, io.EOF) { 118 err = io.ErrUnexpectedEOF 119 } 120 return err 121 } 122 out(m) 123 if m.Final { 124 break 125 } 126 } 127 return nil 128 } 129 130 // Contains returns whether m contains all of x. 131 func (m MetricType) Contains(x MetricType) bool { 132 return m&x == x 133 } 134 135 // RealtimeMetrics provides realtime metrics. 136 // This is intended to be expanded over time to cover more types. 137 type RealtimeMetrics struct { 138 // Error indicates an error occurred. 139 Errors []string `json:"errors,omitempty"` 140 // Hosts indicates the scanned hosts 141 Hosts []string `json:"hosts"` 142 Aggregated Metrics `json:"aggregated"` 143 ByHost map[string]Metrics `json:"by_host,omitempty"` 144 ByDisk map[string]DiskMetric `json:"by_disk,omitempty"` 145 // Final indicates whether this is the final packet and the receiver can exit. 146 Final bool `json:"final"` 147 } 148 149 // Metrics contains all metric types. 150 type Metrics struct { 151 Scanner *ScannerMetrics `json:"scanner,omitempty"` 152 Disk *DiskMetric `json:"disk,omitempty"` 153 OS *OSMetrics `json:"os,omitempty"` 154 BatchJobs *BatchJobMetrics `json:"batchJobs,omitempty"` 155 SiteResync *SiteResyncMetrics `json:"siteResync,omitempty"` 156 Net *NetMetrics `json:"net,omitempty"` 157 Mem *MemMetrics `json:"mem,omitempty"` 158 CPU *CPUMetrics `json:"cpu,omitempty"` 159 } 160 161 // Merge other into r. 162 func (r *Metrics) Merge(other *Metrics) { 163 if other == nil { 164 return 165 } 166 if r.Scanner == nil && other.Scanner != nil { 167 r.Scanner = &ScannerMetrics{} 168 } 169 r.Scanner.Merge(other.Scanner) 170 171 if r.Disk == nil && other.Disk != nil { 172 r.Disk = &DiskMetric{} 173 } 174 r.Disk.Merge(other.Disk) 175 176 if r.OS == nil && other.OS != nil { 177 r.OS = &OSMetrics{} 178 } 179 r.OS.Merge(other.OS) 180 if r.BatchJobs == nil && other.BatchJobs != nil { 181 r.BatchJobs = &BatchJobMetrics{} 182 } 183 r.BatchJobs.Merge(other.BatchJobs) 184 185 if r.SiteResync == nil && other.SiteResync != nil { 186 r.SiteResync = &SiteResyncMetrics{} 187 } 188 r.SiteResync.Merge(other.SiteResync) 189 190 if r.Net == nil && other.Net != nil { 191 r.Net = &NetMetrics{} 192 } 193 r.Net.Merge(other.Net) 194 } 195 196 // Merge will merge other into r. 197 func (r *RealtimeMetrics) Merge(other *RealtimeMetrics) { 198 if other == nil { 199 return 200 } 201 202 if len(other.Errors) > 0 { 203 r.Errors = append(r.Errors, other.Errors...) 204 } 205 206 if r.ByHost == nil && len(other.ByHost) > 0 { 207 r.ByHost = make(map[string]Metrics, len(other.ByHost)) 208 } 209 for host, metrics := range other.ByHost { 210 r.ByHost[host] = metrics 211 } 212 213 r.Hosts = append(r.Hosts, other.Hosts...) 214 r.Aggregated.Merge(&other.Aggregated) 215 sort.Strings(r.Hosts) 216 217 // Gather per disk metrics 218 if r.ByDisk == nil && len(other.ByDisk) > 0 { 219 r.ByDisk = make(map[string]DiskMetric, len(other.ByDisk)) 220 } 221 for disk, metrics := range other.ByDisk { 222 r.ByDisk[disk] = metrics 223 } 224 } 225 226 // ScannerMetrics contains scanner information. 227 type ScannerMetrics struct { 228 // Time these metrics were collected 229 CollectedAt time.Time `json:"collected"` 230 231 // Current scanner cycle 232 CurrentCycle uint64 `json:"current_cycle"` 233 234 // Start time of current cycle 235 CurrentStarted time.Time `json:"current_started"` 236 237 // History of when last cycles completed 238 CyclesCompletedAt []time.Time `json:"cycle_complete_times"` 239 240 // Number of accumulated operations by type since server restart. 241 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 242 243 // Number of accumulated ILM operations by type since server restart. 244 LifeTimeILM map[string]uint64 `json:"ilm_ops,omitempty"` 245 246 // Last minute operation statistics. 247 LastMinute struct { 248 // Scanner actions. 249 Actions map[string]TimedAction `json:"actions,omitempty"` 250 // ILM actions. 251 ILM map[string]TimedAction `json:"ilm,omitempty"` 252 } `json:"last_minute"` 253 254 // Currently active path(s) being scanned. 255 ActivePaths []string `json:"active,omitempty"` 256 } 257 258 // TimedAction contains a number of actions and their accumulated duration in nanoseconds. 259 type TimedAction struct { 260 Count uint64 `json:"count"` 261 AccTime uint64 `json:"acc_time_ns"` 262 Bytes uint64 `json:"bytes,omitempty"` 263 } 264 265 // Avg returns the average time spent on the action. 266 func (t TimedAction) Avg() time.Duration { 267 if t.Count == 0 { 268 return 0 269 } 270 return time.Duration(t.AccTime / t.Count) 271 } 272 273 // AvgBytes returns the average time spent on the action. 274 func (t TimedAction) AvgBytes() uint64 { 275 if t.Count == 0 { 276 return 0 277 } 278 return t.Bytes / t.Count 279 } 280 281 // Merge other into t. 282 func (t *TimedAction) Merge(other TimedAction) { 283 t.Count += other.Count 284 t.AccTime += other.AccTime 285 t.Bytes += other.Bytes 286 } 287 288 // Merge other into 's'. 289 func (s *ScannerMetrics) Merge(other *ScannerMetrics) { 290 if other == nil { 291 return 292 } 293 if s.CollectedAt.Before(other.CollectedAt) { 294 // Use latest timestamp 295 s.CollectedAt = other.CollectedAt 296 } 297 if s.CurrentCycle < other.CurrentCycle { 298 s.CurrentCycle = other.CurrentCycle 299 s.CyclesCompletedAt = other.CyclesCompletedAt 300 s.CurrentStarted = other.CurrentStarted 301 } 302 if len(other.CyclesCompletedAt) > len(s.CyclesCompletedAt) { 303 s.CyclesCompletedAt = other.CyclesCompletedAt 304 } 305 306 // Regular ops 307 if len(other.LifeTimeOps) > 0 && s.LifeTimeOps == nil { 308 s.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 309 } 310 for k, v := range other.LifeTimeOps { 311 total := s.LifeTimeOps[k] + v 312 s.LifeTimeOps[k] = total 313 } 314 if s.LastMinute.Actions == nil && len(other.LastMinute.Actions) > 0 { 315 s.LastMinute.Actions = make(map[string]TimedAction, len(other.LastMinute.Actions)) 316 } 317 for k, v := range other.LastMinute.Actions { 318 total := s.LastMinute.Actions[k] 319 total.Merge(v) 320 s.LastMinute.Actions[k] = total 321 } 322 323 // ILM 324 if len(other.LifeTimeILM) > 0 && s.LifeTimeILM == nil { 325 s.LifeTimeILM = make(map[string]uint64, len(other.LifeTimeILM)) 326 } 327 for k, v := range other.LifeTimeILM { 328 total := s.LifeTimeILM[k] + v 329 s.LifeTimeILM[k] = total 330 } 331 if s.LastMinute.ILM == nil && len(other.LastMinute.ILM) > 0 { 332 s.LastMinute.ILM = make(map[string]TimedAction, len(other.LastMinute.ILM)) 333 } 334 for k, v := range other.LastMinute.ILM { 335 total := s.LastMinute.ILM[k] 336 total.Merge(v) 337 s.LastMinute.ILM[k] = total 338 } 339 s.ActivePaths = append(s.ActivePaths, other.ActivePaths...) 340 sort.Strings(s.ActivePaths) 341 } 342 343 // DiskIOStats contains IO stats of a single drive 344 type DiskIOStats struct { 345 ReadIOs uint64 `json:"read_ios"` 346 ReadMerges uint64 `json:"read_merges"` 347 ReadSectors uint64 `json:"read_sectors"` 348 ReadTicks uint64 `json:"read_ticks"` 349 WriteIOs uint64 `json:"write_ios"` 350 WriteMerges uint64 `json:"write_merges"` 351 WriteSectors uint64 `json:"wrte_sectors"` 352 WriteTicks uint64 `json:"write_ticks"` 353 CurrentIOs uint64 `json:"current_ios"` 354 TotalTicks uint64 `json:"total_ticks"` 355 ReqTicks uint64 `json:"req_ticks"` 356 DiscardIOs uint64 `json:"discard_ios"` 357 DiscardMerges uint64 `json:"discard_merges"` 358 DiscardSectors uint64 `json:"discard_secotrs"` 359 DiscardTicks uint64 `json:"discard_ticks"` 360 FlushIOs uint64 `json:"flush_ios"` 361 FlushTicks uint64 `json:"flush_ticks"` 362 } 363 364 // DiskMetric contains metrics for one or more disks. 365 type DiskMetric struct { 366 // Time these metrics were collected 367 CollectedAt time.Time `json:"collected"` 368 369 // Number of disks 370 NDisks int `json:"n_disks"` 371 372 // Offline disks 373 Offline int `json:"offline,omitempty"` 374 375 // Healing disks 376 Healing int `json:"healing,omitempty"` 377 378 // Number of accumulated operations by type since server restart. 379 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 380 381 // Last minute statistics. 382 LastMinute struct { 383 Operations map[string]TimedAction `json:"operations,omitempty"` 384 } `json:"last_minute"` 385 386 IOStats DiskIOStats `json:"iostats,omitempty"` 387 } 388 389 // Merge other into 's'. 390 func (d *DiskMetric) Merge(other *DiskMetric) { 391 if other == nil { 392 return 393 } 394 if d.CollectedAt.Before(other.CollectedAt) { 395 // Use latest timestamp 396 d.CollectedAt = other.CollectedAt 397 } 398 d.NDisks += other.NDisks 399 d.Offline += other.Offline 400 d.Healing += other.Healing 401 402 if len(other.LifeTimeOps) > 0 && d.LifeTimeOps == nil { 403 d.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 404 } 405 for k, v := range other.LifeTimeOps { 406 total := d.LifeTimeOps[k] + v 407 d.LifeTimeOps[k] = total 408 } 409 410 if d.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 { 411 d.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations)) 412 } 413 for k, v := range other.LastMinute.Operations { 414 total := d.LastMinute.Operations[k] 415 total.Merge(v) 416 d.LastMinute.Operations[k] = total 417 } 418 } 419 420 // OSMetrics contains metrics for OS operations. 421 type OSMetrics struct { 422 // Time these metrics were collected 423 CollectedAt time.Time `json:"collected"` 424 425 // Number of accumulated operations by type since server restart. 426 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 427 428 // Last minute statistics. 429 LastMinute struct { 430 Operations map[string]TimedAction `json:"operations,omitempty"` 431 } `json:"last_minute"` 432 } 433 434 // Merge other into 'o'. 435 func (o *OSMetrics) Merge(other *OSMetrics) { 436 if other == nil { 437 return 438 } 439 if o.CollectedAt.Before(other.CollectedAt) { 440 // Use latest timestamp 441 o.CollectedAt = other.CollectedAt 442 } 443 444 if len(other.LifeTimeOps) > 0 && o.LifeTimeOps == nil { 445 o.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 446 } 447 for k, v := range other.LifeTimeOps { 448 total := o.LifeTimeOps[k] + v 449 o.LifeTimeOps[k] = total 450 } 451 452 if o.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 { 453 o.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations)) 454 } 455 for k, v := range other.LastMinute.Operations { 456 total := o.LastMinute.Operations[k] 457 total.Merge(v) 458 o.LastMinute.Operations[k] = total 459 } 460 } 461 462 // BatchJobMetrics contains metrics for batch operations 463 type BatchJobMetrics struct { 464 // Time these metrics were collected 465 CollectedAt time.Time `json:"collected"` 466 467 // Jobs by ID. 468 Jobs map[string]JobMetric 469 } 470 471 type JobMetric struct { 472 JobID string `json:"jobID"` 473 JobType string `json:"jobType"` 474 StartTime time.Time `json:"startTime"` 475 LastUpdate time.Time `json:"lastUpdate"` 476 RetryAttempts int `json:"retryAttempts"` 477 478 Complete bool `json:"complete"` 479 Failed bool `json:"failed"` 480 481 // Specific job type data: 482 Replicate *ReplicateInfo `json:"replicate,omitempty"` 483 KeyRotate *KeyRotationInfo `json:"rotation,omitempty"` 484 Expired *ExpirationInfo `json:"expired,omitempty"` 485 } 486 487 type ReplicateInfo struct { 488 // Last bucket/object batch replicated 489 Bucket string `json:"lastBucket"` 490 Object string `json:"lastObject"` 491 492 // Verbose information 493 Objects int64 `json:"objects"` 494 ObjectsFailed int64 `json:"objectsFailed"` 495 BytesTransferred int64 `json:"bytesTransferred"` 496 BytesFailed int64 `json:"bytesFailed"` 497 } 498 499 type ExpirationInfo struct { 500 // Last bucket/object key rotated 501 Bucket string `json:"lastBucket"` 502 Object string `json:"lastObject"` 503 504 // Verbose information 505 Objects int64 `json:"objects"` 506 ObjectsFailed int64 `json:"objectsFailed"` 507 } 508 509 type KeyRotationInfo struct { 510 // Last bucket/object key rotated 511 Bucket string `json:"lastBucket"` 512 Object string `json:"lastObject"` 513 514 // Verbose information 515 Objects int64 `json:"objects"` 516 ObjectsFailed int64 `json:"objectsFailed"` 517 } 518 519 // Merge other into 'o'. 520 func (o *BatchJobMetrics) Merge(other *BatchJobMetrics) { 521 if other == nil || len(other.Jobs) == 0 { 522 return 523 } 524 if o.CollectedAt.Before(other.CollectedAt) { 525 // Use latest timestamp 526 o.CollectedAt = other.CollectedAt 527 } 528 if o.Jobs == nil { 529 o.Jobs = make(map[string]JobMetric, len(other.Jobs)) 530 } 531 // Job 532 for k, v := range other.Jobs { 533 o.Jobs[k] = v 534 } 535 } 536 537 // SiteResyncMetrics contains metrics for site resync operation 538 type SiteResyncMetrics struct { 539 // Time these metrics were collected 540 CollectedAt time.Time `json:"collected"` 541 // Status of resync operation 542 ResyncStatus string `json:"resyncStatus,omitempty"` 543 StartTime time.Time `json:"startTime"` 544 LastUpdate time.Time `json:"lastUpdate"` 545 NumBuckets int64 `json:"numBuckets"` 546 ResyncID string `json:"resyncID"` 547 DeplID string `json:"deplID"` 548 549 // Completed size in bytes 550 ReplicatedSize int64 `json:"completedReplicationSize"` 551 // Total number of objects replicated 552 ReplicatedCount int64 `json:"replicationCount"` 553 // Failed size in bytes 554 FailedSize int64 `json:"failedReplicationSize"` 555 // Total number of failed operations 556 FailedCount int64 `json:"failedReplicationCount"` 557 // Buckets that could not be synced 558 FailedBuckets []string `json:"failedBuckets"` 559 // Last bucket/object replicated. 560 Bucket string `json:"bucket,omitempty"` 561 Object string `json:"object,omitempty"` 562 } 563 564 func (o SiteResyncMetrics) Complete() bool { 565 return strings.ToLower(o.ResyncStatus) == "completed" 566 } 567 568 // Merge other into 'o'. 569 func (o *SiteResyncMetrics) Merge(other *SiteResyncMetrics) { 570 if other == nil { 571 return 572 } 573 if o.CollectedAt.Before(other.CollectedAt) { 574 // Use latest 575 *o = *other 576 } 577 } 578 579 type NetMetrics struct { 580 // Time these metrics were collected 581 CollectedAt time.Time `json:"collected"` 582 583 // net of Interface 584 InterfaceName string `json:"interfaceName"` 585 586 NetStats procfs.NetDevLine `json:"netstats"` 587 } 588 589 // Merge other into 'o'. 590 func (n *NetMetrics) Merge(other *NetMetrics) { 591 if other == nil { 592 return 593 } 594 if n.CollectedAt.Before(other.CollectedAt) { 595 // Use latest timestamp 596 n.CollectedAt = other.CollectedAt 597 } 598 n.NetStats.RxBytes += other.NetStats.RxBytes 599 n.NetStats.RxPackets += other.NetStats.RxPackets 600 n.NetStats.RxErrors += other.NetStats.RxErrors 601 n.NetStats.RxDropped += other.NetStats.RxDropped 602 n.NetStats.RxFIFO += other.NetStats.RxFIFO 603 n.NetStats.RxFrame += other.NetStats.RxFrame 604 n.NetStats.RxCompressed += other.NetStats.RxCompressed 605 n.NetStats.RxMulticast += other.NetStats.RxMulticast 606 n.NetStats.TxBytes += other.NetStats.TxBytes 607 n.NetStats.TxPackets += other.NetStats.TxPackets 608 n.NetStats.TxErrors += other.NetStats.TxErrors 609 n.NetStats.TxDropped += other.NetStats.TxDropped 610 n.NetStats.TxFIFO += other.NetStats.TxFIFO 611 n.NetStats.TxCollisions += other.NetStats.TxCollisions 612 n.NetStats.TxCarrier += other.NetStats.TxCarrier 613 n.NetStats.TxCompressed += other.NetStats.TxCompressed 614 } 615 616 type MemMetrics struct { 617 // Time these metrics were collected 618 CollectedAt time.Time `json:"collected"` 619 620 Info MemInfo `json:"memInfo"` 621 } 622 623 // Merge other into 'm'. 624 func (m *MemMetrics) Merge(other *MemMetrics) { 625 if m.CollectedAt.Before(other.CollectedAt) { 626 // Use latest timestamp 627 m.CollectedAt = other.CollectedAt 628 } 629 630 m.Info.Total += other.Info.Total 631 m.Info.Available += other.Info.Available 632 m.Info.SwapSpaceTotal += other.Info.SwapSpaceTotal 633 m.Info.SwapSpaceFree += other.Info.SwapSpaceFree 634 m.Info.Limit += other.Info.Limit 635 } 636 637 type CPUMetrics struct { 638 // Time these metrics were collected 639 CollectedAt time.Time `json:"collected"` 640 641 TimesStat *cpu.TimesStat `json:"timesStat"` 642 LoadStat *load.AvgStat `json:"loadStat"` 643 CPUCount int `json:"cpuCount"` 644 } 645 646 // Merge other into 'm'. 647 func (m *CPUMetrics) Merge(other *CPUMetrics) { 648 if m.CollectedAt.Before(other.CollectedAt) { 649 // Use latest timestamp 650 m.CollectedAt = other.CollectedAt 651 } 652 m.TimesStat.User += other.TimesStat.User 653 m.TimesStat.System += other.TimesStat.System 654 m.TimesStat.Idle += other.TimesStat.Idle 655 m.TimesStat.Nice += other.TimesStat.Nice 656 m.TimesStat.Iowait += other.TimesStat.Iowait 657 m.TimesStat.Irq += other.TimesStat.Irq 658 m.TimesStat.Softirq += other.TimesStat.Softirq 659 m.TimesStat.Steal += other.TimesStat.Steal 660 m.TimesStat.Guest += other.TimesStat.Guest 661 m.TimesStat.GuestNice += other.TimesStat.GuestNice 662 663 m.LoadStat.Load1 += other.LoadStat.Load1 664 m.LoadStat.Load5 += other.LoadStat.Load5 665 m.LoadStat.Load15 += other.LoadStat.Load15 666 }