github.com/minio/madmin-go/v2@v2.2.1/metrics.go (about) 1 // 2 // Copyright (c) 2015-2022 MinIO, Inc. 3 // 4 // This file is part of MinIO Object Storage stack 5 // 6 // This program is free software: you can redistribute it and/or modify 7 // it under the terms of the GNU Affero General Public License as 8 // published by the Free Software Foundation, either version 3 of the 9 // License, or (at your option) any later version. 10 // 11 // This program is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU Affero General Public License for more details. 15 // 16 // You should have received a copy of the GNU Affero General Public License 17 // along with this program. If not, see <http://www.gnu.org/licenses/>. 18 // 19 20 package madmin 21 22 import ( 23 "context" 24 "encoding/json" 25 "errors" 26 "fmt" 27 "io" 28 "net/http" 29 "net/url" 30 "sort" 31 "strconv" 32 "strings" 33 "time" 34 ) 35 36 // MetricType is a bitfield representation of different metric types. 37 type MetricType uint32 38 39 // MetricsNone indicates no metrics. 40 const MetricsNone MetricType = 0 41 42 const ( 43 MetricsScanner MetricType = 1 << (iota) 44 MetricsDisk 45 MetricsOS 46 MetricsBatchJobs 47 MetricsSiteResync 48 49 // MetricsAll must be last. 50 // Enables all metrics. 51 MetricsAll = 1<<(iota) - 1 52 ) 53 54 // MetricsOptions are options provided to Metrics call. 55 type MetricsOptions struct { 56 Type MetricType // Return only these metric types. Several types can be combined using |. Leave at 0 to return all. 57 N int // Maximum number of samples to return. 0 will return endless stream. 58 Interval time.Duration // Interval between samples. Will be rounded up to 1s. 59 Hosts []string // Leave empty for all 60 ByHost bool // Return metrics by host. 61 Disks []string 62 ByDisk bool 63 ByJobID string 64 ByDepID string 65 } 66 67 // Metrics makes an admin call to retrieve metrics. 68 // The provided function is called for each received entry. 69 func (adm *AdminClient) Metrics(ctx context.Context, o MetricsOptions, out func(RealtimeMetrics)) (err error) { 70 path := fmt.Sprintf(adminAPIPrefix + "/metrics") 71 q := make(url.Values) 72 q.Set("types", strconv.FormatUint(uint64(o.Type), 10)) 73 q.Set("n", strconv.Itoa(o.N)) 74 q.Set("interval", o.Interval.String()) 75 q.Set("hosts", strings.Join(o.Hosts, ",")) 76 if o.ByHost { 77 q.Set("by-host", "true") 78 } 79 q.Set("disks", strings.Join(o.Disks, ",")) 80 if o.ByDisk { 81 q.Set("by-disk", "true") 82 } 83 if o.ByJobID != "" { 84 q.Set("by-jobID", o.ByJobID) 85 } 86 if o.ByDepID != "" { 87 q.Set("by-depID", o.ByDepID) 88 } 89 90 resp, err := adm.executeMethod(ctx, 91 http.MethodGet, requestData{ 92 relPath: path, 93 queryValues: q, 94 }, 95 ) 96 if err != nil { 97 return err 98 } 99 100 if resp.StatusCode != http.StatusOK { 101 closeResponse(resp) 102 return httpRespToErrorResponse(resp) 103 } 104 defer closeResponse(resp) 105 dec := json.NewDecoder(resp.Body) 106 for { 107 var m RealtimeMetrics 108 err := dec.Decode(&m) 109 if err != nil { 110 if errors.Is(err, io.EOF) { 111 err = io.ErrUnexpectedEOF 112 } 113 return err 114 } 115 out(m) 116 if m.Final { 117 break 118 } 119 } 120 return nil 121 } 122 123 // Contains returns whether m contains all of x. 124 func (m MetricType) Contains(x MetricType) bool { 125 return m&x == x 126 } 127 128 // RealtimeMetrics provides realtime metrics. 129 // This is intended to be expanded over time to cover more types. 130 type RealtimeMetrics struct { 131 // Error indicates an error occurred. 132 Errors []string `json:"errors,omitempty"` 133 // Hosts indicates the scanned hosts 134 Hosts []string `json:"hosts"` 135 Aggregated Metrics `json:"aggregated"` 136 ByHost map[string]Metrics `json:"by_host,omitempty"` 137 ByDisk map[string]DiskMetric `json:"by_disk,omitempty"` 138 // Final indicates whether this is the final packet and the receiver can exit. 139 Final bool `json:"final"` 140 } 141 142 // Metrics contains all metric types. 143 type Metrics struct { 144 Scanner *ScannerMetrics `json:"scanner,omitempty"` 145 Disk *DiskMetric `json:"disk,omitempty"` 146 OS *OSMetrics `json:"os,omitempty"` 147 BatchJobs *BatchJobMetrics `json:"batchJobs,omitempty"` 148 SiteResync *SiteResyncMetrics `json:"siteResync,omitempty"` 149 } 150 151 // Merge other into r. 152 func (r *Metrics) Merge(other *Metrics) { 153 if other == nil { 154 return 155 } 156 if r.Scanner == nil && other.Scanner != nil { 157 r.Scanner = &ScannerMetrics{} 158 } 159 r.Scanner.Merge(other.Scanner) 160 161 if r.Disk == nil && other.Disk != nil { 162 r.Disk = &DiskMetric{} 163 } 164 r.Disk.Merge(other.Disk) 165 166 if r.OS == nil && other.OS != nil { 167 r.OS = &OSMetrics{} 168 } 169 r.OS.Merge(other.OS) 170 if r.BatchJobs == nil && other.BatchJobs != nil { 171 r.BatchJobs = &BatchJobMetrics{} 172 } 173 r.BatchJobs.Merge(other.BatchJobs) 174 175 if r.SiteResync == nil && other.SiteResync != nil { 176 r.SiteResync = &SiteResyncMetrics{} 177 } 178 r.SiteResync.Merge(other.SiteResync) 179 } 180 181 // Merge will merge other into r. 182 func (r *RealtimeMetrics) Merge(other *RealtimeMetrics) { 183 if other == nil { 184 return 185 } 186 187 if len(other.Errors) > 0 { 188 r.Errors = append(r.Errors, other.Errors...) 189 } 190 191 if r.ByHost == nil && len(other.ByHost) > 0 { 192 r.ByHost = make(map[string]Metrics, len(other.ByHost)) 193 } 194 for host, metrics := range other.ByHost { 195 r.ByHost[host] = metrics 196 } 197 198 r.Hosts = append(r.Hosts, other.Hosts...) 199 r.Aggregated.Merge(&other.Aggregated) 200 sort.Strings(r.Hosts) 201 202 // Gather per disk metrics 203 if r.ByDisk == nil && len(other.ByDisk) > 0 { 204 r.ByDisk = make(map[string]DiskMetric, len(other.ByDisk)) 205 } 206 for disk, metrics := range other.ByDisk { 207 r.ByDisk[disk] = metrics 208 } 209 } 210 211 // ScannerMetrics contains scanner information. 212 type ScannerMetrics struct { 213 // Time these metrics were collected 214 CollectedAt time.Time `json:"collected"` 215 216 // Current scanner cycle 217 CurrentCycle uint64 `json:"current_cycle"` 218 219 // Start time of current cycle 220 CurrentStarted time.Time `json:"current_started"` 221 222 // History of when last cycles completed 223 CyclesCompletedAt []time.Time `json:"cycle_complete_times"` 224 225 // Number of accumulated operations by type since server restart. 226 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 227 228 // Number of accumulated ILM operations by type since server restart. 229 LifeTimeILM map[string]uint64 `json:"ilm_ops,omitempty"` 230 231 // Last minute operation statistics. 232 LastMinute struct { 233 // Scanner actions. 234 Actions map[string]TimedAction `json:"actions,omitempty"` 235 // ILM actions. 236 ILM map[string]TimedAction `json:"ilm,omitempty"` 237 } `json:"last_minute"` 238 239 // Currently active path(s) being scanned. 240 ActivePaths []string `json:"active,omitempty"` 241 } 242 243 // TimedAction contains a number of actions and their accumulated duration in nanoseconds. 244 type TimedAction struct { 245 Count uint64 `json:"count"` 246 AccTime uint64 `json:"acc_time_ns"` 247 Bytes uint64 `json:"bytes,omitempty"` 248 } 249 250 // Avg returns the average time spent on the action. 251 func (t TimedAction) Avg() time.Duration { 252 if t.Count == 0 { 253 return 0 254 } 255 return time.Duration(t.AccTime / t.Count) 256 } 257 258 // AvgBytes returns the average time spent on the action. 259 func (t TimedAction) AvgBytes() uint64 { 260 if t.Count == 0 { 261 return 0 262 } 263 return t.Bytes / t.Count 264 } 265 266 // Merge other into t. 267 func (t *TimedAction) Merge(other TimedAction) { 268 t.Count += other.Count 269 t.AccTime += other.AccTime 270 t.Bytes += other.Bytes 271 } 272 273 // Merge other into 's'. 274 func (s *ScannerMetrics) Merge(other *ScannerMetrics) { 275 if other == nil { 276 return 277 } 278 if s.CollectedAt.Before(other.CollectedAt) { 279 // Use latest timestamp 280 s.CollectedAt = other.CollectedAt 281 } 282 if s.CurrentCycle < other.CurrentCycle { 283 s.CurrentCycle = other.CurrentCycle 284 s.CyclesCompletedAt = other.CyclesCompletedAt 285 s.CurrentStarted = other.CurrentStarted 286 } 287 if len(other.CyclesCompletedAt) > len(s.CyclesCompletedAt) { 288 s.CyclesCompletedAt = other.CyclesCompletedAt 289 } 290 291 // Regular ops 292 if len(other.LifeTimeOps) > 0 && s.LifeTimeOps == nil { 293 s.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 294 } 295 for k, v := range other.LifeTimeOps { 296 total := s.LifeTimeOps[k] + v 297 s.LifeTimeOps[k] = total 298 } 299 if s.LastMinute.Actions == nil && len(other.LastMinute.Actions) > 0 { 300 s.LastMinute.Actions = make(map[string]TimedAction, len(other.LastMinute.Actions)) 301 } 302 for k, v := range other.LastMinute.Actions { 303 total := s.LastMinute.Actions[k] 304 total.Merge(v) 305 s.LastMinute.Actions[k] = total 306 } 307 308 // ILM 309 if len(other.LifeTimeILM) > 0 && s.LifeTimeILM == nil { 310 s.LifeTimeILM = make(map[string]uint64, len(other.LifeTimeILM)) 311 } 312 for k, v := range other.LifeTimeILM { 313 total := s.LifeTimeILM[k] + v 314 s.LifeTimeILM[k] = total 315 } 316 if s.LastMinute.ILM == nil && len(other.LastMinute.ILM) > 0 { 317 s.LastMinute.ILM = make(map[string]TimedAction, len(other.LastMinute.ILM)) 318 } 319 for k, v := range other.LastMinute.ILM { 320 total := s.LastMinute.ILM[k] 321 total.Merge(v) 322 s.LastMinute.ILM[k] = total 323 } 324 s.ActivePaths = append(s.ActivePaths, other.ActivePaths...) 325 sort.Strings(s.ActivePaths) 326 } 327 328 // DiskIOStats contains IO stats of a single drive 329 type DiskIOStats struct { 330 ReadIOs uint64 `json:"read_ios"` 331 ReadMerges uint64 `json:"read_merges"` 332 ReadSectors uint64 `json:"read_sectors"` 333 ReadTicks uint64 `json:"read_ticks"` 334 WriteIOs uint64 `json:"write_ios"` 335 WriteMerges uint64 `json:"write_merges"` 336 WriteSectors uint64 `json:"wrte_sectors"` 337 WriteTicks uint64 `json:"write_ticks"` 338 CurrentIOs uint64 `json:"current_ios"` 339 TotalTicks uint64 `json:"total_ticks"` 340 ReqTicks uint64 `json:"req_ticks"` 341 DiscardIOs uint64 `json:"discard_ios"` 342 DiscardMerges uint64 `json:"discard_merges"` 343 DiscardSectors uint64 `json:"discard_secotrs"` 344 DiscardTicks uint64 `json:"discard_ticks"` 345 FlushIOs uint64 `json:"flush_ios"` 346 FlushTicks uint64 `json:"flush_ticks"` 347 } 348 349 // DiskMetric contains metrics for one or more disks. 350 type DiskMetric struct { 351 // Time these metrics were collected 352 CollectedAt time.Time `json:"collected"` 353 354 // Number of disks 355 NDisks int `json:"n_disks"` 356 357 // Offline disks 358 Offline int `json:"offline,omitempty"` 359 360 // Healing disks 361 Healing int `json:"healing,omitempty"` 362 363 // Number of accumulated operations by type since server restart. 364 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 365 366 // Last minute statistics. 367 LastMinute struct { 368 Operations map[string]TimedAction `json:"operations,omitempty"` 369 } `json:"last_minute"` 370 371 IOStats DiskIOStats `json:"iostats,omitempty"` 372 } 373 374 // Merge other into 's'. 375 func (d *DiskMetric) Merge(other *DiskMetric) { 376 if other == nil { 377 return 378 } 379 if d.CollectedAt.Before(other.CollectedAt) { 380 // Use latest timestamp 381 d.CollectedAt = other.CollectedAt 382 } 383 d.NDisks += other.NDisks 384 d.Offline += other.Offline 385 d.Healing += other.Healing 386 387 if len(other.LifeTimeOps) > 0 && d.LifeTimeOps == nil { 388 d.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 389 } 390 for k, v := range other.LifeTimeOps { 391 total := d.LifeTimeOps[k] + v 392 d.LifeTimeOps[k] = total 393 } 394 395 if d.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 { 396 d.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations)) 397 } 398 for k, v := range other.LastMinute.Operations { 399 total := d.LastMinute.Operations[k] 400 total.Merge(v) 401 d.LastMinute.Operations[k] = total 402 } 403 } 404 405 // OSMetrics contains metrics for OS operations. 406 type OSMetrics struct { 407 // Time these metrics were collected 408 CollectedAt time.Time `json:"collected"` 409 410 // Number of accumulated operations by type since server restart. 411 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 412 413 // Last minute statistics. 414 LastMinute struct { 415 Operations map[string]TimedAction `json:"operations,omitempty"` 416 } `json:"last_minute"` 417 } 418 419 // Merge other into 'o'. 420 func (o *OSMetrics) Merge(other *OSMetrics) { 421 if other == nil { 422 return 423 } 424 if o.CollectedAt.Before(other.CollectedAt) { 425 // Use latest timestamp 426 o.CollectedAt = other.CollectedAt 427 } 428 429 if len(other.LifeTimeOps) > 0 && o.LifeTimeOps == nil { 430 o.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 431 } 432 for k, v := range other.LifeTimeOps { 433 total := o.LifeTimeOps[k] + v 434 o.LifeTimeOps[k] = total 435 } 436 437 if o.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 { 438 o.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations)) 439 } 440 for k, v := range other.LastMinute.Operations { 441 total := o.LastMinute.Operations[k] 442 total.Merge(v) 443 o.LastMinute.Operations[k] = total 444 } 445 } 446 447 // BatchJobMetrics contains metrics for batch operations 448 type BatchJobMetrics struct { 449 // Time these metrics were collected 450 CollectedAt time.Time `json:"collected"` 451 452 // Jobs by ID. 453 Jobs map[string]JobMetric 454 } 455 456 type JobMetric struct { 457 JobID string `json:"jobID"` 458 JobType string `json:"jobType"` 459 StartTime time.Time `json:"startTime"` 460 LastUpdate time.Time `json:"lastUpdate"` 461 RetryAttempts int `json:"retryAttempts"` 462 463 Complete bool `json:"complete"` 464 Failed bool `json:"failed"` 465 466 // Specific job type data: 467 Replicate *ReplicateInfo `json:"replicate,omitempty"` 468 KeyRotate *KeyRotationInfo `json:"rotation,omitempty"` 469 } 470 471 type ReplicateInfo struct { 472 // Last bucket/object batch replicated 473 Bucket string `json:"lastBucket"` 474 Object string `json:"lastObject"` 475 476 // Verbose information 477 Objects int64 `json:"objects"` 478 ObjectsFailed int64 `json:"objectsFailed"` 479 BytesTransferred int64 `json:"bytesTransferred"` 480 BytesFailed int64 `json:"bytesFailed"` 481 } 482 483 type KeyRotationInfo struct { 484 // Last bucket/object key rotated 485 Bucket string `json:"lastBucket"` 486 Object string `json:"lastObject"` 487 488 // Verbose information 489 Objects int64 `json:"objects"` 490 ObjectsFailed int64 `json:"objectsFailed"` 491 } 492 493 // Merge other into 'o'. 494 func (o *BatchJobMetrics) Merge(other *BatchJobMetrics) { 495 if other == nil || len(other.Jobs) == 0 { 496 return 497 } 498 if o.CollectedAt.Before(other.CollectedAt) { 499 // Use latest timestamp 500 o.CollectedAt = other.CollectedAt 501 } 502 if o.Jobs == nil { 503 o.Jobs = make(map[string]JobMetric, len(other.Jobs)) 504 } 505 // Job 506 for k, v := range other.Jobs { 507 o.Jobs[k] = v 508 } 509 } 510 511 // SiteResyncMetrics contains metrics for site resync operation 512 type SiteResyncMetrics struct { 513 // Time these metrics were collected 514 CollectedAt time.Time `json:"collected"` 515 // Status of resync operation 516 ResyncStatus string `json:"resyncStatus,omitempty"` 517 StartTime time.Time `json:"startTime"` 518 LastUpdate time.Time `json:"lastUpdate"` 519 NumBuckets int64 `json:"numBuckets"` 520 ResyncID string `json:"resyncID"` 521 DeplID string `json:"deplID"` 522 523 // Completed size in bytes 524 ReplicatedSize int64 `json:"completedReplicationSize"` 525 // Total number of objects replicated 526 ReplicatedCount int64 `json:"replicationCount"` 527 // Failed size in bytes 528 FailedSize int64 `json:"failedReplicationSize"` 529 // Total number of failed operations 530 FailedCount int64 `json:"failedReplicationCount"` 531 // Buckets that could not be synced 532 FailedBuckets []string `json:"failedBuckets"` 533 // Last bucket/object replicated. 534 Bucket string `json:"bucket,omitempty"` 535 Object string `json:"object,omitempty"` 536 } 537 538 func (o SiteResyncMetrics) Complete() bool { 539 return strings.ToLower(o.ResyncStatus) == "completed" 540 } 541 542 // Merge other into 'o'. 543 func (o *SiteResyncMetrics) Merge(other *SiteResyncMetrics) { 544 if other == nil { 545 return 546 } 547 if o.CollectedAt.Before(other.CollectedAt) { 548 // Use latest 549 *o = *other 550 } 551 }