github.com/minio/madmin-go@v1.7.5/metrics.go (about) 1 // 2 // MinIO Object Storage (c) 2022 MinIO, Inc. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 package madmin 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "io" 25 "net/http" 26 "net/url" 27 "sort" 28 "strconv" 29 "strings" 30 "time" 31 ) 32 33 // MetricType is a bitfield representation of different metric types. 34 type MetricType uint32 35 36 // MetricsNone indicates no metrics. 37 const MetricsNone MetricType = 0 38 39 const ( 40 MetricsScanner MetricType = 1 << (iota) 41 MetricsDisk 42 MetricsOS 43 MetricsBatchJobs 44 MetricsSiteResync 45 46 // MetricsAll must be last. 47 // Enables all metrics. 48 MetricsAll = 1<<(iota) - 1 49 ) 50 51 // MetricsOptions are options provided to Metrics call. 52 type MetricsOptions struct { 53 Type MetricType // Return only these metric types. Several types can be combined using |. Leave at 0 to return all. 54 N int // Maximum number of samples to return. 0 will return endless stream. 55 Interval time.Duration // Interval between samples. Will be rounded up to 1s. 56 Hosts []string // Leave empty for all 57 ByHost bool // Return metrics by host. 58 Disks []string 59 ByDisk bool 60 ByJobID string 61 ByDepID string 62 } 63 64 // Metrics makes an admin call to retrieve metrics. 65 // The provided function is called for each received entry. 66 func (adm *AdminClient) Metrics(ctx context.Context, o MetricsOptions, out func(RealtimeMetrics)) (err error) { 67 path := fmt.Sprintf(adminAPIPrefix + "/metrics") 68 q := make(url.Values) 69 q.Set("types", strconv.FormatUint(uint64(o.Type), 10)) 70 q.Set("n", strconv.Itoa(o.N)) 71 q.Set("interval", o.Interval.String()) 72 q.Set("hosts", strings.Join(o.Hosts, ",")) 73 if o.ByHost { 74 q.Set("by-host", "true") 75 } 76 q.Set("disks", strings.Join(o.Disks, ",")) 77 if o.ByDisk { 78 q.Set("by-disk", "true") 79 } 80 if o.ByJobID != "" { 81 q.Set("by-jobID", o.ByJobID) 82 } 83 if o.ByDepID != "" { 84 q.Set("by-depID", o.ByDepID) 85 } 86 87 resp, err := adm.executeMethod(ctx, 88 http.MethodGet, requestData{ 89 relPath: path, 90 queryValues: q, 91 }, 92 ) 93 if err != nil { 94 return err 95 } 96 97 if resp.StatusCode != http.StatusOK { 98 closeResponse(resp) 99 return httpRespToErrorResponse(resp) 100 } 101 defer closeResponse(resp) 102 dec := json.NewDecoder(resp.Body) 103 for { 104 var m RealtimeMetrics 105 err := dec.Decode(&m) 106 if err != nil { 107 if errors.Is(err, io.EOF) { 108 err = io.ErrUnexpectedEOF 109 } 110 return err 111 } 112 out(m) 113 if m.Final { 114 break 115 } 116 } 117 return nil 118 } 119 120 // Contains returns whether m contains all of x. 121 func (m MetricType) Contains(x MetricType) bool { 122 return m&x == x 123 } 124 125 // RealtimeMetrics provides realtime metrics. 126 // This is intended to be expanded over time to cover more types. 127 type RealtimeMetrics struct { 128 // Error indicates an error occurred. 129 Errors []string `json:"errors,omitempty"` 130 // Hosts indicates the scanned hosts 131 Hosts []string `json:"hosts"` 132 Aggregated Metrics `json:"aggregated"` 133 ByHost map[string]Metrics `json:"by_host,omitempty"` 134 ByDisk map[string]DiskMetric `json:"by_disk,omitempty"` 135 // Final indicates whether this is the final packet and the receiver can exit. 136 Final bool `json:"final"` 137 } 138 139 // Metrics contains all metric types. 140 type Metrics struct { 141 Scanner *ScannerMetrics `json:"scanner,omitempty"` 142 Disk *DiskMetric `json:"disk,omitempty"` 143 OS *OSMetrics `json:"os,omitempty"` 144 BatchJobs *BatchJobMetrics `json:"batchJobs,omitempty"` 145 SiteResync *SiteResyncMetrics `json:"siteResync,omitempty"` 146 } 147 148 // Merge other into r. 149 func (r *Metrics) Merge(other *Metrics) { 150 if other == nil { 151 return 152 } 153 if r.Scanner == nil && other.Scanner != nil { 154 r.Scanner = &ScannerMetrics{} 155 } 156 r.Scanner.Merge(other.Scanner) 157 158 if r.Disk == nil && other.Disk != nil { 159 r.Disk = &DiskMetric{} 160 } 161 r.Disk.Merge(other.Disk) 162 163 if r.OS == nil && other.OS != nil { 164 r.OS = &OSMetrics{} 165 } 166 r.OS.Merge(other.OS) 167 if r.BatchJobs == nil && other.BatchJobs != nil { 168 r.BatchJobs = &BatchJobMetrics{} 169 } 170 r.BatchJobs.Merge(other.BatchJobs) 171 172 if r.SiteResync == nil && other.SiteResync != nil { 173 r.SiteResync = &SiteResyncMetrics{} 174 } 175 r.SiteResync.Merge(other.SiteResync) 176 } 177 178 // Merge will merge other into r. 179 func (r *RealtimeMetrics) Merge(other *RealtimeMetrics) { 180 if other == nil { 181 return 182 } 183 184 if len(other.Errors) > 0 { 185 r.Errors = append(r.Errors, other.Errors...) 186 } 187 188 if r.ByHost == nil && len(other.ByHost) > 0 { 189 r.ByHost = make(map[string]Metrics, len(other.ByHost)) 190 } 191 for host, metrics := range other.ByHost { 192 r.ByHost[host] = metrics 193 } 194 195 r.Hosts = append(r.Hosts, other.Hosts...) 196 r.Aggregated.Merge(&other.Aggregated) 197 sort.Strings(r.Hosts) 198 199 // Gather per disk metrics 200 if r.ByDisk == nil && len(other.ByDisk) > 0 { 201 r.ByDisk = make(map[string]DiskMetric, len(other.ByDisk)) 202 } 203 for disk, metrics := range other.ByDisk { 204 r.ByDisk[disk] = metrics 205 } 206 } 207 208 // ScannerMetrics contains scanner information. 209 type ScannerMetrics struct { 210 // Time these metrics were collected 211 CollectedAt time.Time `json:"collected"` 212 213 // Current scanner cycle 214 CurrentCycle uint64 `json:"current_cycle"` 215 216 // Start time of current cycle 217 CurrentStarted time.Time `json:"current_started"` 218 219 // History of when last cycles completed 220 CyclesCompletedAt []time.Time `json:"cycle_complete_times"` 221 222 // Number of accumulated operations by type since server restart. 223 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 224 225 // Number of accumulated ILM operations by type since server restart. 226 LifeTimeILM map[string]uint64 `json:"ilm_ops,omitempty"` 227 228 // Last minute operation statistics. 229 LastMinute struct { 230 // Scanner actions. 231 Actions map[string]TimedAction `json:"actions,omitempty"` 232 // ILM actions. 233 ILM map[string]TimedAction `json:"ilm,omitempty"` 234 } `json:"last_minute"` 235 236 // Currently active path(s) being scanned. 237 ActivePaths []string `json:"active,omitempty"` 238 } 239 240 // TimedAction contains a number of actions and their accumulated duration in nanoseconds. 241 type TimedAction struct { 242 Count uint64 `json:"count"` 243 AccTime uint64 `json:"acc_time_ns"` 244 Bytes uint64 `json:"bytes,omitempty"` 245 } 246 247 // Avg returns the average time spent on the action. 248 func (t TimedAction) Avg() time.Duration { 249 if t.Count == 0 { 250 return 0 251 } 252 return time.Duration(t.AccTime / t.Count) 253 } 254 255 // AvgBytes returns the average time spent on the action. 256 func (t TimedAction) AvgBytes() uint64 { 257 if t.Count == 0 { 258 return 0 259 } 260 return t.Bytes / t.Count 261 } 262 263 // Merge other into t. 264 func (t *TimedAction) Merge(other TimedAction) { 265 t.Count += other.Count 266 t.AccTime += other.AccTime 267 t.Bytes += other.Bytes 268 } 269 270 // Merge other into 's'. 271 func (s *ScannerMetrics) Merge(other *ScannerMetrics) { 272 if other == nil { 273 return 274 } 275 if s.CollectedAt.Before(other.CollectedAt) { 276 // Use latest timestamp 277 s.CollectedAt = other.CollectedAt 278 } 279 if s.CurrentCycle < other.CurrentCycle { 280 s.CurrentCycle = other.CurrentCycle 281 s.CyclesCompletedAt = other.CyclesCompletedAt 282 s.CurrentStarted = other.CurrentStarted 283 } 284 if len(other.CyclesCompletedAt) > len(s.CyclesCompletedAt) { 285 s.CyclesCompletedAt = other.CyclesCompletedAt 286 } 287 288 // Regular ops 289 if len(other.LifeTimeOps) > 0 && s.LifeTimeOps == nil { 290 s.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 291 } 292 for k, v := range other.LifeTimeOps { 293 total := s.LifeTimeOps[k] + v 294 s.LifeTimeOps[k] = total 295 } 296 if s.LastMinute.Actions == nil && len(other.LastMinute.Actions) > 0 { 297 s.LastMinute.Actions = make(map[string]TimedAction, len(other.LastMinute.Actions)) 298 } 299 for k, v := range other.LastMinute.Actions { 300 total := s.LastMinute.Actions[k] 301 total.Merge(v) 302 s.LastMinute.Actions[k] = total 303 } 304 305 // ILM 306 if len(other.LifeTimeILM) > 0 && s.LifeTimeILM == nil { 307 s.LifeTimeILM = make(map[string]uint64, len(other.LifeTimeILM)) 308 } 309 for k, v := range other.LifeTimeILM { 310 total := s.LifeTimeILM[k] + v 311 s.LifeTimeILM[k] = total 312 } 313 if s.LastMinute.ILM == nil && len(other.LastMinute.ILM) > 0 { 314 s.LastMinute.ILM = make(map[string]TimedAction, len(other.LastMinute.ILM)) 315 } 316 for k, v := range other.LastMinute.ILM { 317 total := s.LastMinute.ILM[k] 318 total.Merge(v) 319 s.LastMinute.ILM[k] = total 320 } 321 s.ActivePaths = append(s.ActivePaths, other.ActivePaths...) 322 sort.Strings(s.ActivePaths) 323 } 324 325 // DiskIOStats contains IO stats of a single drive 326 type DiskIOStats struct { 327 ReadIOs uint64 `json:"read_ios"` 328 ReadMerges uint64 `json:"read_merges"` 329 ReadSectors uint64 `json:"read_sectors"` 330 ReadTicks uint64 `json:"read_ticks"` 331 WriteIOs uint64 `json:"write_ios"` 332 WriteMerges uint64 `json:"write_merges"` 333 WriteSectors uint64 `json:"wrte_sectors"` 334 WriteTicks uint64 `json:"write_ticks"` 335 CurrentIOs uint64 `json:"current_ios"` 336 TotalTicks uint64 `json:"total_ticks"` 337 ReqTicks uint64 `json:"req_ticks"` 338 DiscardIOs uint64 `json:"discard_ios"` 339 DiscardMerges uint64 `json:"discard_merges"` 340 DiscardSectors uint64 `json:"discard_secotrs"` 341 DiscardTicks uint64 `json:"discard_ticks"` 342 FlushIOs uint64 `json:"flush_ios"` 343 FlushTicks uint64 `json:"flush_ticks"` 344 } 345 346 // DiskMetric contains metrics for one or more disks. 347 type DiskMetric struct { 348 // Time these metrics were collected 349 CollectedAt time.Time `json:"collected"` 350 351 // Number of disks 352 NDisks int `json:"n_disks"` 353 354 // Offline disks 355 Offline int `json:"offline,omitempty"` 356 357 // Healing disks 358 Healing int `json:"healing,omitempty"` 359 360 // Number of accumulated operations by type since server restart. 361 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 362 363 // Last minute statistics. 364 LastMinute struct { 365 Operations map[string]TimedAction `json:"operations,omitempty"` 366 } `json:"last_minute"` 367 368 IOStats DiskIOStats `json:"iostats,omitempty"` 369 } 370 371 // Merge other into 's'. 372 func (d *DiskMetric) Merge(other *DiskMetric) { 373 if other == nil { 374 return 375 } 376 if d.CollectedAt.Before(other.CollectedAt) { 377 // Use latest timestamp 378 d.CollectedAt = other.CollectedAt 379 } 380 d.NDisks += other.NDisks 381 d.Offline += other.Offline 382 d.Healing += other.Healing 383 384 if len(other.LifeTimeOps) > 0 && d.LifeTimeOps == nil { 385 d.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 386 } 387 for k, v := range other.LifeTimeOps { 388 total := d.LifeTimeOps[k] + v 389 d.LifeTimeOps[k] = total 390 } 391 392 if d.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 { 393 d.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations)) 394 } 395 for k, v := range other.LastMinute.Operations { 396 total := d.LastMinute.Operations[k] 397 total.Merge(v) 398 d.LastMinute.Operations[k] = total 399 } 400 } 401 402 // OSMetrics contains metrics for OS operations. 403 type OSMetrics struct { 404 // Time these metrics were collected 405 CollectedAt time.Time `json:"collected"` 406 407 // Number of accumulated operations by type since server restart. 408 LifeTimeOps map[string]uint64 `json:"life_time_ops,omitempty"` 409 410 // Last minute statistics. 411 LastMinute struct { 412 Operations map[string]TimedAction `json:"operations,omitempty"` 413 } `json:"last_minute"` 414 } 415 416 // Merge other into 'o'. 417 func (o *OSMetrics) Merge(other *OSMetrics) { 418 if other == nil { 419 return 420 } 421 if o.CollectedAt.Before(other.CollectedAt) { 422 // Use latest timestamp 423 o.CollectedAt = other.CollectedAt 424 } 425 426 if len(other.LifeTimeOps) > 0 && o.LifeTimeOps == nil { 427 o.LifeTimeOps = make(map[string]uint64, len(other.LifeTimeOps)) 428 } 429 for k, v := range other.LifeTimeOps { 430 total := o.LifeTimeOps[k] + v 431 o.LifeTimeOps[k] = total 432 } 433 434 if o.LastMinute.Operations == nil && len(other.LastMinute.Operations) > 0 { 435 o.LastMinute.Operations = make(map[string]TimedAction, len(other.LastMinute.Operations)) 436 } 437 for k, v := range other.LastMinute.Operations { 438 total := o.LastMinute.Operations[k] 439 total.Merge(v) 440 o.LastMinute.Operations[k] = total 441 } 442 } 443 444 // BatchJobMetrics contains metrics for batch operations 445 type BatchJobMetrics struct { 446 // Time these metrics were collected 447 CollectedAt time.Time `json:"collected"` 448 449 // Jobs by ID. 450 Jobs map[string]JobMetric 451 } 452 453 type JobMetric struct { 454 JobID string `json:"jobID"` 455 JobType string `json:"jobType"` 456 StartTime time.Time `json:"startTime"` 457 LastUpdate time.Time `json:"lastUpdate"` 458 RetryAttempts int `json:"retryAttempts"` 459 460 Complete bool `json:"complete"` 461 Failed bool `json:"failed"` 462 463 // Specific job type data: 464 Replicate *ReplicateInfo `json:"replicate,omitempty"` 465 } 466 467 type ReplicateInfo struct { 468 // Last bucket/object batch replicated 469 Bucket string `json:"lastBucket"` 470 Object string `json:"lastObject"` 471 472 // Verbose information 473 Objects int64 `json:"objects"` 474 ObjectsFailed int64 `json:"objectsFailed"` 475 BytesTransferred int64 `json:"bytesTransferred"` 476 BytesFailed int64 `json:"bytesFailed"` 477 } 478 479 // Merge other into 'o'. 480 func (o *BatchJobMetrics) Merge(other *BatchJobMetrics) { 481 if other == nil || len(other.Jobs) == 0 { 482 return 483 } 484 if o.CollectedAt.Before(other.CollectedAt) { 485 // Use latest timestamp 486 o.CollectedAt = other.CollectedAt 487 } 488 if o.Jobs == nil { 489 o.Jobs = make(map[string]JobMetric, len(other.Jobs)) 490 } 491 // Job 492 for k, v := range other.Jobs { 493 o.Jobs[k] = v 494 } 495 } 496 497 // SiteResyncMetrics contains metrics for site resync operation 498 type SiteResyncMetrics struct { 499 // Time these metrics were collected 500 CollectedAt time.Time `json:"collected"` 501 // Status of resync operation 502 ResyncStatus string `json:"resyncStatus,omitempty"` 503 StartTime time.Time `json:"startTime"` 504 LastUpdate time.Time `json:"lastUpdate"` 505 NumBuckets int64 `json:"numBuckets"` 506 ResyncID string `json:"resyncID"` 507 DeplID string `json:"deplID"` 508 509 // Completed size in bytes 510 ReplicatedSize int64 `json:"completedReplicationSize"` 511 // Total number of objects replicated 512 ReplicatedCount int64 `json:"replicationCount"` 513 // Failed size in bytes 514 FailedSize int64 `json:"failedReplicationSize"` 515 // Total number of failed operations 516 FailedCount int64 `json:"failedReplicationCount"` 517 // Buckets that could not be synced 518 FailedBuckets []string `json:"failedBuckets"` 519 // Last bucket/object replicated. 520 Bucket string `json:"bucket,omitempty"` 521 Object string `json:"object,omitempty"` 522 } 523 524 func (o SiteResyncMetrics) Complete() bool { 525 return strings.ToLower(o.ResyncStatus) == "completed" 526 } 527 528 // Merge other into 'o'. 529 func (o *SiteResyncMetrics) Merge(other *SiteResyncMetrics) { 530 if other == nil { 531 return 532 } 533 if o.CollectedAt.Before(other.CollectedAt) { 534 // Use latest 535 *o = *other 536 } 537 }