storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/metrics.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2018-2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "net/http" 21 "strings" 22 "sync/atomic" 23 "time" 24 25 "github.com/prometheus/client_golang/prometheus" 26 "github.com/prometheus/client_golang/prometheus/promhttp" 27 28 "storj.io/minio/cmd/logger" 29 iampolicy "storj.io/minio/pkg/iam/policy" 30 "storj.io/minio/pkg/madmin" 31 ) 32 33 var ( 34 httpRequestsDuration = prometheus.NewHistogramVec( 35 prometheus.HistogramOpts{ 36 Name: "s3_ttfb_seconds", 37 Help: "Time taken by requests served by current MinIO server instance", 38 Buckets: []float64{.05, .1, .25, .5, 1, 2.5, 5, 10}, 39 }, 40 []string{"api"}, 41 ) 42 minioVersionInfo = prometheus.NewGaugeVec( 43 prometheus.GaugeOpts{ 44 Namespace: "minio", 45 Name: "version_info", 46 Help: "Version of current MinIO server instance", 47 }, 48 []string{ 49 // current version 50 "version", 51 // commit-id of the current version 52 "commit", 53 }, 54 ) 55 ) 56 57 const ( 58 healMetricsNamespace = "self_heal" 59 gatewayNamespace = "gateway" 60 cacheNamespace = "cache" 61 s3Namespace = "s3" 62 bucketNamespace = "bucket" 63 minioNamespace = "minio" 64 diskNamespace = "disk" 65 interNodeNamespace = "internode" 66 ) 67 68 func init() { 69 prometheus.MustRegister(httpRequestsDuration) 70 prometheus.MustRegister(newMinioCollector()) 71 prometheus.MustRegister(minioVersionInfo) 72 } 73 74 // newMinioCollector describes the collector 75 // and returns reference of minioCollector 76 // It creates the Prometheus Description which is used 77 // to define metric and help string 78 func newMinioCollector() *minioCollector { 79 return &minioCollector{ 80 desc: prometheus.NewDesc("minio_stats", "Statistics exposed by MinIO server", nil, nil), 81 } 82 } 83 84 // minioCollector is the Custom Collector 85 type minioCollector struct { 86 desc *prometheus.Desc 87 } 88 89 // Describe sends the super-set of all possible descriptors of metrics 90 func (c *minioCollector) Describe(ch chan<- *prometheus.Desc) { 91 ch <- c.desc 92 } 93 94 // Collect is called by the Prometheus registry when collecting metrics. 95 func (c *minioCollector) Collect(ch chan<- prometheus.Metric) { 96 97 // Expose MinIO's version information 98 minioVersionInfo.WithLabelValues(Version, CommitID).Set(1.0) 99 100 storageMetricsPrometheus(ch) 101 nodeHealthMetricsPrometheus(ch) 102 bucketUsageMetricsPrometheus(ch) 103 networkMetricsPrometheus(ch) 104 httpMetricsPrometheus(ch) 105 cacheMetricsPrometheus(ch) 106 gatewayMetricsPrometheus(ch) 107 healingMetricsPrometheus(ch) 108 } 109 110 func nodeHealthMetricsPrometheus(ch chan<- prometheus.Metric) { 111 nodesUp, nodesDown := GetPeerOnlineCount() 112 ch <- prometheus.MustNewConstMetric( 113 prometheus.NewDesc( 114 prometheus.BuildFQName(minioNamespace, "nodes", "online"), 115 "Total number of MinIO nodes online", 116 nil, nil), 117 prometheus.GaugeValue, 118 float64(nodesUp), 119 ) 120 ch <- prometheus.MustNewConstMetric( 121 prometheus.NewDesc( 122 prometheus.BuildFQName(minioNamespace, "nodes", "offline"), 123 "Total number of MinIO nodes offline", 124 nil, nil), 125 prometheus.GaugeValue, 126 float64(nodesDown), 127 ) 128 } 129 130 // collects healing specific metrics for MinIO instance in Prometheus specific format 131 // and sends to given channel 132 func healingMetricsPrometheus(ch chan<- prometheus.Metric) { 133 if !globalIsErasure { 134 return 135 } 136 bgSeq, exists := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) 137 if !exists { 138 return 139 } 140 141 var dur time.Duration 142 if !bgSeq.lastHealActivity.IsZero() { 143 dur = time.Since(bgSeq.lastHealActivity) 144 } 145 146 ch <- prometheus.MustNewConstMetric( 147 prometheus.NewDesc( 148 prometheus.BuildFQName(healMetricsNamespace, "time", "since_last_activity"), 149 "Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity", 150 nil, nil), 151 prometheus.GaugeValue, 152 float64(dur), 153 ) 154 for k, v := range bgSeq.getScannedItemsMap() { 155 ch <- prometheus.MustNewConstMetric( 156 prometheus.NewDesc( 157 prometheus.BuildFQName(healMetricsNamespace, "objects", "scanned"), 158 "Objects scanned in current self healing run", 159 []string{"type"}, nil), 160 prometheus.GaugeValue, 161 float64(v), string(k), 162 ) 163 } 164 for k, v := range bgSeq.getHealedItemsMap() { 165 ch <- prometheus.MustNewConstMetric( 166 prometheus.NewDesc( 167 prometheus.BuildFQName(healMetricsNamespace, "objects", "healed"), 168 "Objects healed in current self healing run", 169 []string{"type"}, nil), 170 prometheus.GaugeValue, 171 float64(v), string(k), 172 ) 173 } 174 for k, v := range bgSeq.gethealFailedItemsMap() { 175 // healFailedItemsMap stores the endpoint and volume state separated by comma, 176 // split the fields and pass to channel at correct index 177 s := strings.Split(k, ",") 178 ch <- prometheus.MustNewConstMetric( 179 prometheus.NewDesc( 180 prometheus.BuildFQName(healMetricsNamespace, "objects", "heal_failed"), 181 "Objects for which healing failed in current self healing run", 182 []string{"mount_path", "volume_status"}, nil), 183 prometheus.GaugeValue, 184 float64(v), string(s[0]), string(s[1]), 185 ) 186 } 187 } 188 189 // collects gateway specific metrics for MinIO instance in Prometheus specific format 190 // and sends to given channel 191 func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) { 192 if !GlobalIsGateway || (globalGatewayName != S3BackendGateway && globalGatewayName != AzureBackendGateway && globalGatewayName != GCSBackendGateway) { 193 return 194 } 195 196 objLayer := newObjectLayerFn() 197 // Service not initialized yet 198 if objLayer == nil { 199 return 200 } 201 202 m, err := objLayer.GetMetrics(GlobalContext) 203 if err != nil { 204 return 205 } 206 207 ch <- prometheus.MustNewConstMetric( 208 prometheus.NewDesc( 209 prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_received"), 210 "Total number of bytes received by current MinIO Gateway "+globalGatewayName+" backend", 211 nil, nil), 212 prometheus.CounterValue, 213 float64(m.GetBytesReceived()), 214 ) 215 ch <- prometheus.MustNewConstMetric( 216 prometheus.NewDesc( 217 prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_sent"), 218 "Total number of bytes sent by current MinIO Gateway to "+globalGatewayName+" backend", 219 nil, nil), 220 prometheus.CounterValue, 221 float64(m.GetBytesSent()), 222 ) 223 s := m.GetRequests() 224 ch <- prometheus.MustNewConstMetric( 225 prometheus.NewDesc( 226 prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), 227 "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", 228 []string{"method"}, nil), 229 prometheus.CounterValue, 230 float64(atomic.LoadUint64(&s.Get)), 231 http.MethodGet, 232 ) 233 ch <- prometheus.MustNewConstMetric( 234 prometheus.NewDesc( 235 prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), 236 "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", 237 []string{"method"}, nil), 238 prometheus.CounterValue, 239 float64(atomic.LoadUint64(&s.Head)), 240 http.MethodHead, 241 ) 242 ch <- prometheus.MustNewConstMetric( 243 prometheus.NewDesc( 244 prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), 245 "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", 246 []string{"method"}, nil), 247 prometheus.CounterValue, 248 float64(atomic.LoadUint64(&s.Put)), 249 http.MethodPut, 250 ) 251 ch <- prometheus.MustNewConstMetric( 252 prometheus.NewDesc( 253 prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"), 254 "Total number of requests made to "+globalGatewayName+" by current MinIO Gateway", 255 []string{"method"}, nil), 256 prometheus.CounterValue, 257 float64(atomic.LoadUint64(&s.Post)), 258 http.MethodPost, 259 ) 260 } 261 262 // collects cache metrics for MinIO server in Prometheus specific format 263 // and sends to given channel 264 func cacheMetricsPrometheus(ch chan<- prometheus.Metric) { 265 cacheObjLayer := newCachedObjectLayerFn() 266 // Service not initialized yet 267 if cacheObjLayer == nil { 268 return 269 } 270 271 ch <- prometheus.MustNewConstMetric( 272 prometheus.NewDesc( 273 prometheus.BuildFQName(cacheNamespace, "hits", "total"), 274 "Total number of disk cache hits in current MinIO instance", 275 nil, nil), 276 prometheus.CounterValue, 277 float64(cacheObjLayer.CacheStats().getHits()), 278 ) 279 ch <- prometheus.MustNewConstMetric( 280 prometheus.NewDesc( 281 prometheus.BuildFQName(cacheNamespace, "misses", "total"), 282 "Total number of disk cache misses in current MinIO instance", 283 nil, nil), 284 prometheus.CounterValue, 285 float64(cacheObjLayer.CacheStats().getMisses()), 286 ) 287 ch <- prometheus.MustNewConstMetric( 288 prometheus.NewDesc( 289 prometheus.BuildFQName(cacheNamespace, "data", "served"), 290 "Total number of bytes served from cache of current MinIO instance", 291 nil, nil), 292 prometheus.CounterValue, 293 float64(cacheObjLayer.CacheStats().getBytesServed()), 294 ) 295 for _, cdStats := range cacheObjLayer.CacheStats().GetDiskStats() { 296 // Cache disk usage percentage 297 ch <- prometheus.MustNewConstMetric( 298 prometheus.NewDesc( 299 prometheus.BuildFQName(cacheNamespace, "usage", "percent"), 300 "Total percentage cache usage", 301 []string{"disk"}, nil), 302 prometheus.GaugeValue, 303 float64(cdStats.UsagePercent), 304 cdStats.Dir, 305 ) 306 ch <- prometheus.MustNewConstMetric( 307 prometheus.NewDesc( 308 prometheus.BuildFQName(cacheNamespace, "usage", "high"), 309 "Indicates cache usage is high or low, relative to current cache 'quota' settings", 310 []string{"disk"}, nil), 311 prometheus.GaugeValue, 312 float64(cdStats.UsageState), 313 cdStats.Dir, 314 ) 315 316 ch <- prometheus.MustNewConstMetric( 317 prometheus.NewDesc( 318 prometheus.BuildFQName("cache", "usage", "size"), 319 "Indicates current cache usage in bytes", 320 []string{"disk"}, nil), 321 prometheus.GaugeValue, 322 float64(cdStats.UsageSize), 323 cdStats.Dir, 324 ) 325 326 ch <- prometheus.MustNewConstMetric( 327 prometheus.NewDesc( 328 prometheus.BuildFQName("cache", "total", "size"), 329 "Indicates total size of cache disk", 330 []string{"disk"}, nil), 331 prometheus.GaugeValue, 332 float64(cdStats.TotalCapacity), 333 cdStats.Dir, 334 ) 335 } 336 } 337 338 // collects http metrics for MinIO server in Prometheus specific format 339 // and sends to given channel 340 func httpMetricsPrometheus(ch chan<- prometheus.Metric) { 341 httpStats := globalHTTPStats.toServerHTTPStats() 342 343 for api, value := range httpStats.CurrentS3Requests.APIStats { 344 ch <- prometheus.MustNewConstMetric( 345 prometheus.NewDesc( 346 prometheus.BuildFQName(s3Namespace, "requests", "current"), 347 "Total number of running s3 requests in current MinIO server instance", 348 []string{"api"}, nil), 349 prometheus.CounterValue, 350 float64(value), 351 api, 352 ) 353 } 354 355 for api, value := range httpStats.TotalS3Requests.APIStats { 356 ch <- prometheus.MustNewConstMetric( 357 prometheus.NewDesc( 358 prometheus.BuildFQName(s3Namespace, "requests", "total"), 359 "Total number of s3 requests in current MinIO server instance", 360 []string{"api"}, nil), 361 prometheus.CounterValue, 362 float64(value), 363 api, 364 ) 365 } 366 367 for api, value := range httpStats.TotalS3Errors.APIStats { 368 ch <- prometheus.MustNewConstMetric( 369 prometheus.NewDesc( 370 prometheus.BuildFQName(s3Namespace, "errors", "total"), 371 "Total number of s3 errors in current MinIO server instance", 372 []string{"api"}, nil), 373 prometheus.CounterValue, 374 float64(value), 375 api, 376 ) 377 } 378 379 for api, value := range httpStats.TotalS3Canceled.APIStats { 380 ch <- prometheus.MustNewConstMetric( 381 prometheus.NewDesc( 382 prometheus.BuildFQName(s3Namespace, "canceled", "total"), 383 "Total number of client canceled s3 request in current MinIO server instance", 384 []string{"api"}, nil), 385 prometheus.CounterValue, 386 float64(value), 387 api, 388 ) 389 } 390 } 391 392 // collects network metrics for MinIO server in Prometheus specific format 393 // and sends to given channel 394 func networkMetricsPrometheus(ch chan<- prometheus.Metric) { 395 connStats := globalConnStats.toServerConnStats() 396 397 // Network Sent/Received Bytes (internode) 398 ch <- prometheus.MustNewConstMetric( 399 prometheus.NewDesc( 400 prometheus.BuildFQName(interNodeNamespace, "tx", "bytes_total"), 401 "Total number of bytes sent to the other peer nodes by current MinIO server instance", 402 nil, nil), 403 prometheus.CounterValue, 404 float64(connStats.TotalOutputBytes), 405 ) 406 407 ch <- prometheus.MustNewConstMetric( 408 prometheus.NewDesc( 409 prometheus.BuildFQName(interNodeNamespace, "rx", "bytes_total"), 410 "Total number of internode bytes received by current MinIO server instance", 411 nil, nil), 412 prometheus.CounterValue, 413 float64(connStats.TotalInputBytes), 414 ) 415 416 // Network Sent/Received Bytes (Outbound) 417 ch <- prometheus.MustNewConstMetric( 418 prometheus.NewDesc( 419 prometheus.BuildFQName(s3Namespace, "tx", "bytes_total"), 420 "Total number of s3 bytes sent by current MinIO server instance", 421 nil, nil), 422 prometheus.CounterValue, 423 float64(connStats.S3OutputBytes), 424 ) 425 426 ch <- prometheus.MustNewConstMetric( 427 prometheus.NewDesc( 428 prometheus.BuildFQName(s3Namespace, "rx", "bytes_total"), 429 "Total number of s3 bytes received by current MinIO server instance", 430 nil, nil), 431 prometheus.CounterValue, 432 float64(connStats.S3InputBytes), 433 ) 434 } 435 436 // get the most current of in-memory replication stats and data usage info from crawler. 437 func getLatestReplicationStats(bucket string, u madmin.BucketUsageInfo) (s BucketReplicationStats) { 438 bucketStats := GlobalNotificationSys.GetClusterBucketStats(GlobalContext, bucket) 439 440 replStats := BucketReplicationStats{} 441 for _, bucketStat := range bucketStats { 442 replStats.FailedCount += bucketStat.ReplicationStats.FailedCount 443 replStats.FailedSize += bucketStat.ReplicationStats.FailedSize 444 replStats.PendingCount += bucketStat.ReplicationStats.PendingCount 445 replStats.PendingSize += bucketStat.ReplicationStats.PendingSize 446 replStats.ReplicaSize += bucketStat.ReplicationStats.ReplicaSize 447 replStats.ReplicatedSize += bucketStat.ReplicationStats.ReplicatedSize 448 } 449 usageStat := globalReplicationStats.GetInitialUsage(bucket) 450 replStats.FailedCount += usageStat.FailedCount 451 replStats.FailedSize += usageStat.FailedSize 452 replStats.PendingCount += usageStat.PendingCount 453 replStats.PendingSize += usageStat.PendingSize 454 replStats.ReplicaSize += usageStat.ReplicaSize 455 replStats.ReplicatedSize += usageStat.ReplicatedSize 456 457 // use in memory replication stats if it is ahead of usage info. 458 if replStats.ReplicatedSize >= u.ReplicatedSize { 459 s.ReplicatedSize = replStats.ReplicatedSize 460 } else { 461 s.ReplicatedSize = u.ReplicatedSize 462 } 463 464 if replStats.PendingSize > u.ReplicationPendingSize { 465 s.PendingSize = replStats.PendingSize 466 } else { 467 s.PendingSize = u.ReplicationPendingSize 468 } 469 470 if replStats.FailedSize > u.ReplicationFailedSize { 471 s.FailedSize = replStats.FailedSize 472 } else { 473 s.FailedSize = u.ReplicationFailedSize 474 } 475 476 if replStats.ReplicaSize > u.ReplicaSize { 477 s.ReplicaSize = replStats.ReplicaSize 478 } else { 479 s.ReplicaSize = u.ReplicaSize 480 } 481 482 if replStats.PendingCount > u.ReplicationPendingCount { 483 s.PendingCount = replStats.PendingCount 484 } else { 485 s.PendingCount = u.ReplicationPendingCount 486 } 487 488 if replStats.FailedCount > u.ReplicationFailedCount { 489 s.FailedCount = replStats.FailedCount 490 } else { 491 s.FailedCount = u.ReplicationFailedCount 492 } 493 494 return s 495 } 496 497 // Populates prometheus with bucket usage metrics, this metrics 498 // is only enabled if scanner is enabled. 499 func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) { 500 objLayer := newObjectLayerFn() 501 // Service not initialized yet 502 if objLayer == nil { 503 return 504 } 505 506 if GlobalIsGateway { 507 return 508 } 509 510 dataUsageInfo, err := loadDataUsageFromBackend(GlobalContext, objLayer) 511 if err != nil { 512 return 513 } 514 // data usage has not captured any data yet. 515 if dataUsageInfo.LastUpdate.IsZero() { 516 return 517 } 518 519 for bucket, usageInfo := range dataUsageInfo.BucketsUsage { 520 stat := getLatestReplicationStats(bucket, usageInfo) 521 // Total space used by bucket 522 ch <- prometheus.MustNewConstMetric( 523 prometheus.NewDesc( 524 prometheus.BuildFQName(bucketNamespace, "usage", "size"), 525 "Total bucket size", 526 []string{"bucket"}, nil), 527 prometheus.GaugeValue, 528 float64(usageInfo.Size), 529 bucket, 530 ) 531 ch <- prometheus.MustNewConstMetric( 532 prometheus.NewDesc( 533 prometheus.BuildFQName(bucketNamespace, "objects", "count"), 534 "Total number of objects in a bucket", 535 []string{"bucket"}, nil), 536 prometheus.GaugeValue, 537 float64(usageInfo.ObjectsCount), 538 bucket, 539 ) 540 ch <- prometheus.MustNewConstMetric( 541 prometheus.NewDesc( 542 prometheus.BuildFQName("bucket", "replication", "pending_size"), 543 "Total capacity pending to be replicated", 544 []string{"bucket"}, nil), 545 prometheus.GaugeValue, 546 float64(stat.PendingSize), 547 bucket, 548 ) 549 ch <- prometheus.MustNewConstMetric( 550 prometheus.NewDesc( 551 prometheus.BuildFQName("bucket", "replication", "failed_size"), 552 "Total capacity failed to replicate at least once", 553 []string{"bucket"}, nil), 554 prometheus.GaugeValue, 555 float64(stat.FailedSize), 556 bucket, 557 ) 558 ch <- prometheus.MustNewConstMetric( 559 prometheus.NewDesc( 560 prometheus.BuildFQName("bucket", "replication", "successful_size"), 561 "Total capacity replicated to destination", 562 []string{"bucket"}, nil), 563 prometheus.GaugeValue, 564 float64(stat.ReplicatedSize), 565 bucket, 566 ) 567 ch <- prometheus.MustNewConstMetric( 568 prometheus.NewDesc( 569 prometheus.BuildFQName("bucket", "replication", "received_size"), 570 "Total capacity replicated to this instance", 571 []string{"bucket"}, nil), 572 prometheus.GaugeValue, 573 float64(stat.ReplicaSize), 574 bucket, 575 ) 576 ch <- prometheus.MustNewConstMetric( 577 prometheus.NewDesc( 578 prometheus.BuildFQName("bucket", "replication", "pending_count"), 579 "Total replication operations pending", 580 []string{"bucket"}, nil), 581 prometheus.GaugeValue, 582 float64(stat.PendingCount), 583 bucket, 584 ) 585 ch <- prometheus.MustNewConstMetric( 586 prometheus.NewDesc( 587 prometheus.BuildFQName("bucket", "replication", "failed_count"), 588 "Total replication operations failed", 589 []string{"bucket"}, nil), 590 prometheus.GaugeValue, 591 float64(stat.FailedCount), 592 bucket, 593 ) 594 for k, v := range usageInfo.ObjectSizesHistogram { 595 ch <- prometheus.MustNewConstMetric( 596 prometheus.NewDesc( 597 prometheus.BuildFQName(bucketNamespace, "objects", "histogram"), 598 "Total number of objects of different sizes in a bucket", 599 []string{"bucket", "object_size"}, nil), 600 prometheus.GaugeValue, 601 float64(v), 602 bucket, 603 k, 604 ) 605 } 606 } 607 } 608 609 // collects storage metrics for MinIO server in Prometheus specific format 610 // and sends to given channel 611 func storageMetricsPrometheus(ch chan<- prometheus.Metric) { 612 objLayer := newObjectLayerFn() 613 // Service not initialized yet 614 if objLayer == nil { 615 return 616 } 617 618 if GlobalIsGateway { 619 return 620 } 621 622 server := getLocalServerProperty(globalEndpoints, &http.Request{ 623 Host: globalLocalNodeName, 624 }) 625 626 onlineDisks, offlineDisks := getOnlineOfflineDisksStats(server.Disks) 627 totalDisks := offlineDisks.Merge(onlineDisks) 628 629 // Report total capacity 630 ch <- prometheus.MustNewConstMetric( 631 prometheus.NewDesc( 632 prometheus.BuildFQName(minioNamespace, "capacity_raw", "total"), 633 "Total capacity online in the cluster", 634 nil, nil), 635 prometheus.GaugeValue, 636 float64(GetTotalCapacity(server.Disks)), 637 ) 638 639 // Report total capacity free 640 ch <- prometheus.MustNewConstMetric( 641 prometheus.NewDesc( 642 prometheus.BuildFQName(minioNamespace, "capacity_raw_free", "total"), 643 "Total free capacity online in the cluster", 644 nil, nil), 645 prometheus.GaugeValue, 646 float64(GetTotalCapacityFree(server.Disks)), 647 ) 648 649 s, _ := objLayer.StorageInfo(GlobalContext) 650 // Report total usable capacity 651 ch <- prometheus.MustNewConstMetric( 652 prometheus.NewDesc( 653 prometheus.BuildFQName(minioNamespace, "capacity_usable", "total"), 654 "Total usable capacity online in the cluster", 655 nil, nil), 656 prometheus.GaugeValue, 657 GetTotalUsableCapacity(server.Disks, s), 658 ) 659 // Report total usable capacity free 660 ch <- prometheus.MustNewConstMetric( 661 prometheus.NewDesc( 662 prometheus.BuildFQName(minioNamespace, "capacity_usable_free", "total"), 663 "Total free usable capacity online in the cluster", 664 nil, nil), 665 prometheus.GaugeValue, 666 GetTotalUsableCapacityFree(server.Disks, s), 667 ) 668 669 // MinIO Offline Disks per node 670 ch <- prometheus.MustNewConstMetric( 671 prometheus.NewDesc( 672 prometheus.BuildFQName(minioNamespace, "disks", "offline"), 673 "Total number of offline disks in current MinIO server instance", 674 nil, nil), 675 prometheus.GaugeValue, 676 float64(offlineDisks.Sum()), 677 ) 678 679 // MinIO Total Disks per node 680 ch <- prometheus.MustNewConstMetric( 681 prometheus.NewDesc( 682 prometheus.BuildFQName(minioNamespace, "disks", "total"), 683 "Total number of disks for current MinIO server instance", 684 nil, nil), 685 prometheus.GaugeValue, 686 float64(totalDisks.Sum()), 687 ) 688 689 for _, disk := range server.Disks { 690 // Total disk usage by the disk 691 ch <- prometheus.MustNewConstMetric( 692 prometheus.NewDesc( 693 prometheus.BuildFQName(diskNamespace, "storage", "used"), 694 "Total disk storage used on the disk", 695 []string{"disk"}, nil), 696 prometheus.GaugeValue, 697 float64(disk.UsedSpace), 698 disk.DrivePath, 699 ) 700 701 // Total available space in the disk 702 ch <- prometheus.MustNewConstMetric( 703 prometheus.NewDesc( 704 prometheus.BuildFQName(diskNamespace, "storage", "available"), 705 "Total available space left on the disk", 706 []string{"disk"}, nil), 707 prometheus.GaugeValue, 708 float64(disk.AvailableSpace), 709 disk.DrivePath, 710 ) 711 712 // Total storage space of the disk 713 ch <- prometheus.MustNewConstMetric( 714 prometheus.NewDesc( 715 prometheus.BuildFQName(diskNamespace, "storage", "total"), 716 "Total space on the disk", 717 []string{"disk"}, nil), 718 prometheus.GaugeValue, 719 float64(disk.TotalSpace), 720 disk.DrivePath, 721 ) 722 } 723 } 724 725 func metricsHandler() http.Handler { 726 727 registry := prometheus.NewRegistry() 728 729 err := registry.Register(minioVersionInfo) 730 logger.LogIf(GlobalContext, err) 731 732 err = registry.Register(httpRequestsDuration) 733 logger.LogIf(GlobalContext, err) 734 735 err = registry.Register(newMinioCollector()) 736 logger.LogIf(GlobalContext, err) 737 738 gatherers := prometheus.Gatherers{ 739 prometheus.DefaultGatherer, 740 registry, 741 } 742 // Delegate http serving to Prometheus client library, which will call collector.Collect. 743 return promhttp.InstrumentMetricHandler( 744 registry, 745 promhttp.HandlerFor(gatherers, 746 promhttp.HandlerOpts{ 747 ErrorHandling: promhttp.ContinueOnError, 748 }), 749 ) 750 751 } 752 753 // AuthMiddleware checks if the bearer token is valid and authorized. 754 func AuthMiddleware(h http.Handler) http.Handler { 755 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 756 claims, owner, authErr := webRequestAuthenticate(r) 757 if authErr != nil || !claims.VerifyIssuer("prometheus", true) { 758 w.WriteHeader(http.StatusForbidden) 759 return 760 } 761 // For authenticated users apply IAM policy. 762 if !GlobalIAMSys.IsAllowed(iampolicy.Args{ 763 AccountName: claims.AccessKey, 764 Action: iampolicy.PrometheusAdminAction, 765 ConditionValues: getConditionValues(r, "", claims.AccessKey, claims.Map()), 766 IsOwner: owner, 767 Claims: claims.Map(), 768 }) { 769 w.WriteHeader(http.StatusForbidden) 770 return 771 } 772 h.ServeHTTP(w, r) 773 }) 774 }