storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/metrics-v2.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2018-2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/http"
    23  	"runtime"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/prometheus/client_golang/prometheus"
    29  	"github.com/prometheus/client_golang/prometheus/promhttp"
    30  	dto "github.com/prometheus/client_model/go"
    31  	"github.com/prometheus/procfs"
    32  
    33  	"storj.io/minio/cmd/logger"
    34  )
    35  
    36  // MetricNamespace is top level grouping of metrics to create the metric name.
    37  type MetricNamespace string
    38  
    39  // MetricSubsystem is the sub grouping for metrics within a namespace.
    40  type MetricSubsystem string
    41  
    42  const (
    43  	bucketMetricNamespace    MetricNamespace = "minio_bucket"
    44  	clusterMetricNamespace   MetricNamespace = "minio_cluster"
    45  	healMetricNamespace      MetricNamespace = "minio_heal"
    46  	interNodeMetricNamespace MetricNamespace = "minio_inter_node"
    47  	nodeMetricNamespace      MetricNamespace = "minio_node"
    48  	minioMetricNamespace     MetricNamespace = "minio"
    49  	s3MetricNamespace        MetricNamespace = "minio_s3"
    50  )
    51  
    52  const (
    53  	cacheSubsystem            MetricSubsystem = "cache"
    54  	capacityRawSubsystem      MetricSubsystem = "capacity_raw"
    55  	capacityUsableSubsystem   MetricSubsystem = "capacity_usable"
    56  	diskSubsystem             MetricSubsystem = "disk"
    57  	fileDescriptorSubsystem   MetricSubsystem = "file_descriptor"
    58  	goRoutines                MetricSubsystem = "go_routine"
    59  	ioSubsystem               MetricSubsystem = "io"
    60  	nodesSubsystem            MetricSubsystem = "nodes"
    61  	objectsSubsystem          MetricSubsystem = "objects"
    62  	processSubsystem          MetricSubsystem = "process"
    63  	replicationSubsystem      MetricSubsystem = "replication"
    64  	requestsSubsystem         MetricSubsystem = "requests"
    65  	requestsRejectedSubsystem MetricSubsystem = "requests_rejected"
    66  	timeSubsystem             MetricSubsystem = "time"
    67  	trafficSubsystem          MetricSubsystem = "traffic"
    68  	softwareSubsystem         MetricSubsystem = "software"
    69  	sysCallSubsystem          MetricSubsystem = "syscall"
    70  	usageSubsystem            MetricSubsystem = "usage"
    71  )
    72  
    73  // MetricName are the individual names for the metric.
    74  type MetricName string
    75  
    76  const (
    77  	authTotal      MetricName = "auth_total"
    78  	canceledTotal  MetricName = "canceled_total"
    79  	errorsTotal    MetricName = "errors_total"
    80  	headerTotal    MetricName = "header_total"
    81  	healTotal      MetricName = "heal_total"
    82  	hitsTotal      MetricName = "hits_total"
    83  	inflightTotal  MetricName = "inflight_total"
    84  	invalidTotal   MetricName = "invalid_total"
    85  	limitTotal     MetricName = "limit_total"
    86  	missedTotal    MetricName = "missed_total"
    87  	waitingTotal   MetricName = "waiting_total"
    88  	objectTotal    MetricName = "object_total"
    89  	offlineTotal   MetricName = "offline_total"
    90  	onlineTotal    MetricName = "online_total"
    91  	openTotal      MetricName = "open_total"
    92  	readTotal      MetricName = "read_total"
    93  	timestampTotal MetricName = "timestamp_total"
    94  	writeTotal     MetricName = "write_total"
    95  	total          MetricName = "total"
    96  
    97  	failedCount   MetricName = "failed_count"
    98  	failedBytes   MetricName = "failed_bytes"
    99  	freeBytes     MetricName = "free_bytes"
   100  	pendingBytes  MetricName = "pending_bytes"
   101  	pendingCount  MetricName = "pending_count"
   102  	readBytes     MetricName = "read_bytes"
   103  	rcharBytes    MetricName = "rchar_bytes"
   104  	receivedBytes MetricName = "received_bytes"
   105  	sentBytes     MetricName = "sent_bytes"
   106  	totalBytes    MetricName = "total_bytes"
   107  	usedBytes     MetricName = "used_bytes"
   108  	writeBytes    MetricName = "write_bytes"
   109  	wcharBytes    MetricName = "wchar_bytes"
   110  
   111  	usagePercent MetricName = "update_percent"
   112  
   113  	commitInfo  MetricName = "commit_info"
   114  	usageInfo   MetricName = "usage_info"
   115  	versionInfo MetricName = "version_info"
   116  
   117  	sizeDistribution = "size_distribution"
   118  	ttfbDistribution = "ttfb_seconds_distribution"
   119  
   120  	lastActivityTime = "last_activity_nano_seconds"
   121  	startTime        = "starttime_seconds"
   122  	upTime           = "uptime_seconds"
   123  )
   124  
   125  const (
   126  	serverName = "server"
   127  )
   128  
   129  // MetricType for the types of metrics supported
   130  type MetricType string
   131  
   132  const (
   133  	gaugeMetric     = "gaugeMetric"
   134  	counterMetric   = "counterMetric"
   135  	histogramMetric = "histogramMetric"
   136  )
   137  
   138  // MetricDescription describes the metric
   139  type MetricDescription struct {
   140  	Namespace MetricNamespace `json:"MetricNamespace"`
   141  	Subsystem MetricSubsystem `json:"Subsystem"`
   142  	Name      MetricName      `json:"MetricName"`
   143  	Help      string          `json:"Help"`
   144  	Type      MetricType      `json:"Type"`
   145  }
   146  
   147  // Metric captures the details for a metric
   148  type Metric struct {
   149  	Description          MetricDescription `json:"Description"`
   150  	StaticLabels         map[string]string `json:"StaticLabels"`
   151  	Value                float64           `json:"Value"`
   152  	VariableLabels       map[string]string `json:"VariableLabels"`
   153  	HistogramBucketLabel string            `json:"HistogramBucketLabel"`
   154  	Histogram            map[string]uint64 `json:"Histogram"`
   155  }
   156  
   157  func (m *Metric) copyMetric() Metric {
   158  	metric := Metric{
   159  		Description:          m.Description,
   160  		Value:                m.Value,
   161  		HistogramBucketLabel: m.HistogramBucketLabel,
   162  		StaticLabels:         make(map[string]string),
   163  		VariableLabels:       make(map[string]string),
   164  		Histogram:            make(map[string]uint64),
   165  	}
   166  	for k, v := range m.StaticLabels {
   167  		metric.StaticLabels[k] = v
   168  	}
   169  	for k, v := range m.VariableLabels {
   170  		metric.VariableLabels[k] = v
   171  	}
   172  	for k, v := range m.Histogram {
   173  		metric.Histogram[k] = v
   174  	}
   175  	return metric
   176  }
   177  
   178  // MetricsGroup are a group of metrics that are initialized together.
   179  type MetricsGroup struct {
   180  	id            string
   181  	cacheInterval time.Duration
   182  	cachedRead    func(ctx context.Context, mg *MetricsGroup) []Metric
   183  	read          func(ctx context.Context) []Metric
   184  }
   185  
   186  var metricsGroupCache = make(map[string]*timedValue)
   187  var cacheLock sync.Mutex
   188  
   189  func cachedRead(ctx context.Context, mg *MetricsGroup) (metrics []Metric) {
   190  	cacheLock.Lock()
   191  	defer cacheLock.Unlock()
   192  	v, ok := metricsGroupCache[mg.id]
   193  	if !ok {
   194  		interval := mg.cacheInterval
   195  		if interval == 0 {
   196  			interval = 30 * time.Second
   197  		}
   198  		v = &timedValue{}
   199  		v.Once.Do(func() {
   200  			v.Update = func() (interface{}, error) {
   201  				c := mg.read(ctx)
   202  				return c, nil
   203  			}
   204  			v.TTL = interval
   205  		})
   206  		metricsGroupCache[mg.id] = v
   207  	}
   208  	c, err := v.Get()
   209  	if err != nil {
   210  		return []Metric{}
   211  	}
   212  	m := c.([]Metric)
   213  	for i := range m {
   214  		metrics = append(metrics, m[i].copyMetric())
   215  	}
   216  	return metrics
   217  }
   218  
   219  // MetricsGenerator are functions that generate metric groups.
   220  type MetricsGenerator func() MetricsGroup
   221  
   222  // GetGlobalGenerators gets all the generators the report global metrics pre calculated.
   223  func GetGlobalGenerators() []MetricsGenerator {
   224  	g := []MetricsGenerator{
   225  		getBucketUsageMetrics,
   226  		getMinioHealingMetrics,
   227  		getNodeHealthMetrics,
   228  		getClusterStorageMetrics,
   229  	}
   230  	return g
   231  }
   232  
   233  // GetAllGenerators gets all the metric generators.
   234  func GetAllGenerators() []MetricsGenerator {
   235  	g := GetGlobalGenerators()
   236  	g = append(g, GetGeneratorsForPeer()...)
   237  	return g
   238  }
   239  
   240  // GetGeneratorsForPeer - gets the generators to report to peer.
   241  func GetGeneratorsForPeer() []MetricsGenerator {
   242  	g := []MetricsGenerator{
   243  		getCacheMetrics,
   244  		getGoMetrics,
   245  		getHTTPMetrics,
   246  		getLocalStorageMetrics,
   247  		getMinioProcMetrics,
   248  		getMinioVersionMetrics,
   249  		getNetworkMetrics,
   250  		getS3TTFBMetric,
   251  	}
   252  	return g
   253  }
   254  
   255  // GetSingleNodeGenerators gets the metrics that are local
   256  func GetSingleNodeGenerators() []MetricsGenerator {
   257  	g := []MetricsGenerator{
   258  		getNodeHealthMetrics,
   259  		getCacheMetrics,
   260  		getHTTPMetrics,
   261  		getNetworkMetrics,
   262  		getMinioVersionMetrics,
   263  		getS3TTFBMetric,
   264  	}
   265  	return g
   266  }
   267  
   268  func getClusterCapacityTotalBytesMD() MetricDescription {
   269  	return MetricDescription{
   270  		Namespace: clusterMetricNamespace,
   271  		Subsystem: capacityRawSubsystem,
   272  		Name:      totalBytes,
   273  		Help:      "Total capacity online in the cluster.",
   274  		Type:      gaugeMetric,
   275  	}
   276  }
   277  func getClusterCapacityFreeBytesMD() MetricDescription {
   278  	return MetricDescription{
   279  		Namespace: clusterMetricNamespace,
   280  		Subsystem: capacityRawSubsystem,
   281  		Name:      freeBytes,
   282  		Help:      "Total free capacity online in the cluster.",
   283  		Type:      gaugeMetric,
   284  	}
   285  }
   286  func getClusterCapacityUsageBytesMD() MetricDescription {
   287  	return MetricDescription{
   288  		Namespace: clusterMetricNamespace,
   289  		Subsystem: capacityUsableSubsystem,
   290  		Name:      totalBytes,
   291  		Help:      "Total usable capacity online in the cluster.",
   292  		Type:      gaugeMetric,
   293  	}
   294  }
   295  func getClusterCapacityUsageFreeBytesMD() MetricDescription {
   296  	return MetricDescription{
   297  		Namespace: clusterMetricNamespace,
   298  		Subsystem: capacityUsableSubsystem,
   299  		Name:      freeBytes,
   300  		Help:      "Total free usable capacity online in the cluster.",
   301  		Type:      gaugeMetric,
   302  	}
   303  }
   304  
   305  func getNodeDiskUsedBytesMD() MetricDescription {
   306  	return MetricDescription{
   307  		Namespace: nodeMetricNamespace,
   308  		Subsystem: diskSubsystem,
   309  		Name:      usedBytes,
   310  		Help:      "Total storage used on a disk.",
   311  		Type:      gaugeMetric,
   312  	}
   313  }
   314  func getNodeDiskFreeBytesMD() MetricDescription {
   315  	return MetricDescription{
   316  		Namespace: nodeMetricNamespace,
   317  		Subsystem: diskSubsystem,
   318  		Name:      freeBytes,
   319  		Help:      "Total storage available on a disk.",
   320  		Type:      gaugeMetric,
   321  	}
   322  }
   323  func getClusterDisksOfflineTotalMD() MetricDescription {
   324  	return MetricDescription{
   325  		Namespace: clusterMetricNamespace,
   326  		Subsystem: diskSubsystem,
   327  		Name:      offlineTotal,
   328  		Help:      "Total disks offline.",
   329  		Type:      gaugeMetric,
   330  	}
   331  }
   332  
   333  func getClusterDisksOnlineTotalMD() MetricDescription {
   334  	return MetricDescription{
   335  		Namespace: clusterMetricNamespace,
   336  		Subsystem: diskSubsystem,
   337  		Name:      onlineTotal,
   338  		Help:      "Total disks online.",
   339  		Type:      gaugeMetric,
   340  	}
   341  }
   342  
   343  func getClusterDisksTotalMD() MetricDescription {
   344  	return MetricDescription{
   345  		Namespace: clusterMetricNamespace,
   346  		Subsystem: diskSubsystem,
   347  		Name:      total,
   348  		Help:      "Total disks.",
   349  		Type:      gaugeMetric,
   350  	}
   351  }
   352  
   353  func getNodeDiskTotalBytesMD() MetricDescription {
   354  	return MetricDescription{
   355  		Namespace: nodeMetricNamespace,
   356  		Subsystem: diskSubsystem,
   357  		Name:      totalBytes,
   358  		Help:      "Total storage on a disk.",
   359  		Type:      gaugeMetric,
   360  	}
   361  }
   362  func getUsageLastScanActivityMD() MetricDescription {
   363  	return MetricDescription{
   364  		Namespace: minioMetricNamespace,
   365  		Subsystem: usageSubsystem,
   366  		Name:      lastActivityTime,
   367  		Help:      "Time elapsed (in nano seconds) since last scan activity. This is set to 0 until first scan cycle",
   368  		Type:      gaugeMetric,
   369  	}
   370  }
   371  
   372  func getBucketUsageTotalBytesMD() MetricDescription {
   373  	return MetricDescription{
   374  		Namespace: bucketMetricNamespace,
   375  		Subsystem: usageSubsystem,
   376  		Name:      totalBytes,
   377  		Help:      "Total bucket size in bytes",
   378  		Type:      gaugeMetric,
   379  	}
   380  }
   381  func getBucketUsageObjectsTotalMD() MetricDescription {
   382  	return MetricDescription{
   383  		Namespace: bucketMetricNamespace,
   384  		Subsystem: usageSubsystem,
   385  		Name:      objectTotal,
   386  		Help:      "Total number of objects",
   387  		Type:      gaugeMetric,
   388  	}
   389  }
   390  func getBucketRepPendingBytesMD() MetricDescription {
   391  	return MetricDescription{
   392  		Namespace: bucketMetricNamespace,
   393  		Subsystem: replicationSubsystem,
   394  		Name:      pendingBytes,
   395  		Help:      "Total bytes pending to replicate.",
   396  		Type:      gaugeMetric,
   397  	}
   398  }
   399  func getBucketRepFailedBytesMD() MetricDescription {
   400  	return MetricDescription{
   401  		Namespace: bucketMetricNamespace,
   402  		Subsystem: replicationSubsystem,
   403  		Name:      failedBytes,
   404  		Help:      "Total number of bytes failed at least once to replicate.",
   405  		Type:      gaugeMetric,
   406  	}
   407  }
   408  func getBucketRepSentBytesMD() MetricDescription {
   409  	return MetricDescription{
   410  		Namespace: bucketMetricNamespace,
   411  		Subsystem: replicationSubsystem,
   412  		Name:      sentBytes,
   413  		Help:      "Total number of bytes replicated to the target bucket.",
   414  		Type:      gaugeMetric,
   415  	}
   416  }
   417  func getBucketRepReceivedBytesMD() MetricDescription {
   418  	return MetricDescription{
   419  		Namespace: bucketMetricNamespace,
   420  		Subsystem: replicationSubsystem,
   421  		Name:      receivedBytes,
   422  		Help:      "Total number of bytes replicated to this bucket from another source bucket.",
   423  		Type:      gaugeMetric,
   424  	}
   425  }
   426  func getBucketRepPendingOperationsMD() MetricDescription {
   427  	return MetricDescription{
   428  		Namespace: bucketMetricNamespace,
   429  		Subsystem: replicationSubsystem,
   430  		Name:      pendingCount,
   431  		Help:      "Total number of objects pending replication",
   432  		Type:      gaugeMetric,
   433  	}
   434  }
   435  func getBucketRepFailedOperationsMD() MetricDescription {
   436  	return MetricDescription{
   437  		Namespace: bucketMetricNamespace,
   438  		Subsystem: replicationSubsystem,
   439  		Name:      failedCount,
   440  		Help:      "Total number of objects which failed replication",
   441  		Type:      gaugeMetric,
   442  	}
   443  }
   444  func getBucketObjectDistributionMD() MetricDescription {
   445  	return MetricDescription{
   446  		Namespace: bucketMetricNamespace,
   447  		Subsystem: objectsSubsystem,
   448  		Name:      sizeDistribution,
   449  		Help:      "Distribution of object sizes in the bucket, includes label for the bucket name.",
   450  		Type:      histogramMetric,
   451  	}
   452  }
   453  func getInternodeFailedRequests() MetricDescription {
   454  	return MetricDescription{
   455  		Namespace: interNodeMetricNamespace,
   456  		Subsystem: trafficSubsystem,
   457  		Name:      errorsTotal,
   458  		Help:      "Total number of failed internode calls.",
   459  		Type:      counterMetric,
   460  	}
   461  }
   462  
   463  func getInterNodeSentBytesMD() MetricDescription {
   464  	return MetricDescription{
   465  		Namespace: interNodeMetricNamespace,
   466  		Subsystem: trafficSubsystem,
   467  		Name:      sentBytes,
   468  		Help:      "Total number of bytes sent to the other peer nodes.",
   469  		Type:      counterMetric,
   470  	}
   471  }
   472  func getInterNodeReceivedBytesMD() MetricDescription {
   473  	return MetricDescription{
   474  		Namespace: interNodeMetricNamespace,
   475  		Subsystem: trafficSubsystem,
   476  		Name:      receivedBytes,
   477  		Help:      "Total number of bytes received from other peer nodes.",
   478  		Type:      counterMetric,
   479  	}
   480  }
   481  func getS3SentBytesMD() MetricDescription {
   482  	return MetricDescription{
   483  		Namespace: s3MetricNamespace,
   484  		Subsystem: trafficSubsystem,
   485  		Name:      sentBytes,
   486  		Help:      "Total number of s3 bytes sent",
   487  		Type:      counterMetric,
   488  	}
   489  }
   490  func getS3ReceivedBytesMD() MetricDescription {
   491  	return MetricDescription{
   492  		Namespace: s3MetricNamespace,
   493  		Subsystem: trafficSubsystem,
   494  		Name:      receivedBytes,
   495  		Help:      "Total number of s3 bytes received.",
   496  		Type:      counterMetric,
   497  	}
   498  }
   499  func getS3RequestsInFlightMD() MetricDescription {
   500  	return MetricDescription{
   501  		Namespace: s3MetricNamespace,
   502  		Subsystem: requestsSubsystem,
   503  		Name:      inflightTotal,
   504  		Help:      "Total number of S3 requests currently in flight",
   505  		Type:      gaugeMetric,
   506  	}
   507  }
   508  func getS3RequestsInQueueMD() MetricDescription {
   509  	return MetricDescription{
   510  		Namespace: s3MetricNamespace,
   511  		Subsystem: requestsSubsystem,
   512  		Name:      waitingTotal,
   513  		Help:      "Number of S3 requests in the waiting queue",
   514  		Type:      gaugeMetric,
   515  	}
   516  }
   517  func getS3RequestsTotalMD() MetricDescription {
   518  	return MetricDescription{
   519  		Namespace: s3MetricNamespace,
   520  		Subsystem: requestsSubsystem,
   521  		Name:      total,
   522  		Help:      "Total number S3 requests",
   523  		Type:      counterMetric,
   524  	}
   525  }
   526  func getS3RequestsErrorsMD() MetricDescription {
   527  	return MetricDescription{
   528  		Namespace: s3MetricNamespace,
   529  		Subsystem: requestsSubsystem,
   530  		Name:      errorsTotal,
   531  		Help:      "Total number S3 requests with errors",
   532  		Type:      counterMetric,
   533  	}
   534  }
   535  func getS3RequestsCanceledMD() MetricDescription {
   536  	return MetricDescription{
   537  		Namespace: s3MetricNamespace,
   538  		Subsystem: requestsSubsystem,
   539  		Name:      canceledTotal,
   540  		Help:      "Total number S3 requests that were canceled from the client while processing",
   541  		Type:      counterMetric,
   542  	}
   543  }
   544  func getS3RejectedAuthRequestsTotalMD() MetricDescription {
   545  	return MetricDescription{
   546  		Namespace: s3MetricNamespace,
   547  		Subsystem: requestsRejectedSubsystem,
   548  		Name:      authTotal,
   549  		Help:      "Total number S3 requests rejected for auth failure.",
   550  		Type:      counterMetric,
   551  	}
   552  }
   553  func getS3RejectedHeaderRequestsTotalMD() MetricDescription {
   554  	return MetricDescription{
   555  		Namespace: s3MetricNamespace,
   556  		Subsystem: requestsRejectedSubsystem,
   557  		Name:      headerTotal,
   558  		Help:      "Total number S3 requests rejected for invalid header.",
   559  		Type:      counterMetric,
   560  	}
   561  }
   562  func getS3RejectedTimestampRequestsTotalMD() MetricDescription {
   563  	return MetricDescription{
   564  		Namespace: s3MetricNamespace,
   565  		Subsystem: requestsRejectedSubsystem,
   566  		Name:      timestampTotal,
   567  		Help:      "Total number S3 requests rejected for invalid timestamp.",
   568  		Type:      counterMetric,
   569  	}
   570  }
   571  func getS3RejectedInvalidRequestsTotalMD() MetricDescription {
   572  	return MetricDescription{
   573  		Namespace: s3MetricNamespace,
   574  		Subsystem: requestsRejectedSubsystem,
   575  		Name:      invalidTotal,
   576  		Help:      "Total number S3 invalid requests.",
   577  		Type:      counterMetric,
   578  	}
   579  }
   580  func getCacheHitsTotalMD() MetricDescription {
   581  	return MetricDescription{
   582  		Namespace: minioNamespace,
   583  		Subsystem: cacheSubsystem,
   584  		Name:      hitsTotal,
   585  		Help:      "Total number of disk cache hits",
   586  		Type:      counterMetric,
   587  	}
   588  }
   589  func getCacheHitsMissedTotalMD() MetricDescription {
   590  	return MetricDescription{
   591  		Namespace: minioNamespace,
   592  		Subsystem: cacheSubsystem,
   593  		Name:      missedTotal,
   594  		Help:      "Total number of disk cache misses",
   595  		Type:      counterMetric,
   596  	}
   597  }
   598  func getCacheUsagePercentMD() MetricDescription {
   599  	return MetricDescription{
   600  		Namespace: minioNamespace,
   601  		Subsystem: minioNamespace,
   602  		Name:      usagePercent,
   603  		Help:      "Total percentage cache usage",
   604  		Type:      gaugeMetric,
   605  	}
   606  }
   607  func getCacheUsageInfoMD() MetricDescription {
   608  	return MetricDescription{
   609  		Namespace: minioNamespace,
   610  		Subsystem: cacheSubsystem,
   611  		Name:      usageInfo,
   612  		Help:      "Total percentage cache usage, value of 1 indicates high and 0 low, label level is set as well",
   613  		Type:      gaugeMetric,
   614  	}
   615  }
   616  func getCacheUsedBytesMD() MetricDescription {
   617  	return MetricDescription{
   618  		Namespace: minioNamespace,
   619  		Subsystem: cacheSubsystem,
   620  		Name:      usedBytes,
   621  		Help:      "Current cache usage in bytes",
   622  		Type:      gaugeMetric,
   623  	}
   624  }
   625  func getCacheTotalBytesMD() MetricDescription {
   626  	return MetricDescription{
   627  		Namespace: minioNamespace,
   628  		Subsystem: cacheSubsystem,
   629  		Name:      totalBytes,
   630  		Help:      "Total size of cache disk in bytes",
   631  		Type:      gaugeMetric,
   632  	}
   633  }
   634  func getCacheSentBytesMD() MetricDescription {
   635  	return MetricDescription{
   636  		Namespace: minioNamespace,
   637  		Subsystem: cacheSubsystem,
   638  		Name:      sentBytes,
   639  		Help:      "Total number of bytes served from cache",
   640  		Type:      counterMetric,
   641  	}
   642  }
   643  func getHealObjectsTotalMD() MetricDescription {
   644  	return MetricDescription{
   645  		Namespace: healMetricNamespace,
   646  		Subsystem: objectsSubsystem,
   647  		Name:      total,
   648  		Help:      "Objects scanned in current self healing run",
   649  		Type:      gaugeMetric,
   650  	}
   651  }
   652  func getHealObjectsHealTotalMD() MetricDescription {
   653  	return MetricDescription{
   654  		Namespace: healMetricNamespace,
   655  		Subsystem: objectsSubsystem,
   656  		Name:      healTotal,
   657  		Help:      "Objects healed in current self healing run",
   658  		Type:      gaugeMetric,
   659  	}
   660  }
   661  
   662  func getHealObjectsFailTotalMD() MetricDescription {
   663  	return MetricDescription{
   664  		Namespace: healMetricNamespace,
   665  		Subsystem: objectsSubsystem,
   666  		Name:      errorsTotal,
   667  		Help:      "Objects for which healing failed in current self healing run",
   668  		Type:      gaugeMetric,
   669  	}
   670  }
   671  func getHealLastActivityTimeMD() MetricDescription {
   672  	return MetricDescription{
   673  		Namespace: healMetricNamespace,
   674  		Subsystem: timeSubsystem,
   675  		Name:      lastActivityTime,
   676  		Help:      "Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity",
   677  		Type:      gaugeMetric,
   678  	}
   679  }
   680  func getNodeOnlineTotalMD() MetricDescription {
   681  	return MetricDescription{
   682  		Namespace: clusterMetricNamespace,
   683  		Subsystem: nodesSubsystem,
   684  		Name:      onlineTotal,
   685  		Help:      "Total number of MinIO nodes online.",
   686  		Type:      gaugeMetric,
   687  	}
   688  }
   689  func getNodeOfflineTotalMD() MetricDescription {
   690  	return MetricDescription{
   691  		Namespace: clusterMetricNamespace,
   692  		Subsystem: nodesSubsystem,
   693  		Name:      offlineTotal,
   694  		Help:      "Total number of MinIO nodes offline.",
   695  		Type:      gaugeMetric,
   696  	}
   697  }
   698  func getMinIOVersionMD() MetricDescription {
   699  	return MetricDescription{
   700  		Namespace: minioMetricNamespace,
   701  		Subsystem: softwareSubsystem,
   702  		Name:      versionInfo,
   703  		Help:      "MinIO Release tag for the server",
   704  		Type:      gaugeMetric,
   705  	}
   706  }
   707  func getMinIOCommitMD() MetricDescription {
   708  	return MetricDescription{
   709  		Namespace: minioMetricNamespace,
   710  		Subsystem: softwareSubsystem,
   711  		Name:      commitInfo,
   712  		Help:      "Git commit hash for the MinIO release.",
   713  		Type:      gaugeMetric,
   714  	}
   715  }
   716  func getS3TTFBDistributionMD() MetricDescription {
   717  	return MetricDescription{
   718  		Namespace: s3MetricNamespace,
   719  		Subsystem: timeSubsystem,
   720  		Name:      ttfbDistribution,
   721  		Help:      "Distribution of the time to first byte across API calls.",
   722  		Type:      gaugeMetric,
   723  	}
   724  }
   725  func getMinioFDOpenMD() MetricDescription {
   726  	return MetricDescription{
   727  		Namespace: nodeMetricNamespace,
   728  		Subsystem: fileDescriptorSubsystem,
   729  		Name:      openTotal,
   730  		Help:      "Total number of open file descriptors by the MinIO Server process.",
   731  		Type:      gaugeMetric,
   732  	}
   733  }
   734  func getMinioFDLimitMD() MetricDescription {
   735  	return MetricDescription{
   736  		Namespace: nodeMetricNamespace,
   737  		Subsystem: fileDescriptorSubsystem,
   738  		Name:      limitTotal,
   739  		Help:      "Limit on total number of open file descriptors for the MinIO Server process.",
   740  		Type:      gaugeMetric,
   741  	}
   742  }
   743  func getMinioProcessIOWriteBytesMD() MetricDescription {
   744  	return MetricDescription{
   745  		Namespace: nodeMetricNamespace,
   746  		Subsystem: ioSubsystem,
   747  		Name:      writeBytes,
   748  		Help:      "Total bytes written by the process to the underlying storage system, /proc/[pid]/io write_bytes",
   749  		Type:      counterMetric,
   750  	}
   751  }
   752  func getMinioProcessIOReadBytesMD() MetricDescription {
   753  	return MetricDescription{
   754  		Namespace: nodeMetricNamespace,
   755  		Subsystem: ioSubsystem,
   756  		Name:      readBytes,
   757  		Help:      "Total bytes read by the process from the underlying storage system, /proc/[pid]/io read_bytes",
   758  		Type:      counterMetric,
   759  	}
   760  }
   761  func getMinioProcessIOWriteCachedBytesMD() MetricDescription {
   762  	return MetricDescription{
   763  		Namespace: nodeMetricNamespace,
   764  		Subsystem: ioSubsystem,
   765  		Name:      wcharBytes,
   766  		Help:      "Total bytes written by the process to the underlying storage system including page cache, /proc/[pid]/io wchar",
   767  		Type:      counterMetric,
   768  	}
   769  }
   770  func getMinioProcessIOReadCachedBytesMD() MetricDescription {
   771  	return MetricDescription{
   772  		Namespace: nodeMetricNamespace,
   773  		Subsystem: ioSubsystem,
   774  		Name:      rcharBytes,
   775  		Help:      "Total bytes read by the process from the underlying storage system including cache, /proc/[pid]/io rchar",
   776  		Type:      counterMetric,
   777  	}
   778  }
   779  func getMinIOProcessSysCallRMD() MetricDescription {
   780  	return MetricDescription{
   781  		Namespace: nodeMetricNamespace,
   782  		Subsystem: sysCallSubsystem,
   783  		Name:      readTotal,
   784  		Help:      "Total read SysCalls to the kernel. /proc/[pid]/io syscr",
   785  		Type:      counterMetric,
   786  	}
   787  }
   788  func getMinIOProcessSysCallWMD() MetricDescription {
   789  	return MetricDescription{
   790  		Namespace: nodeMetricNamespace,
   791  		Subsystem: sysCallSubsystem,
   792  		Name:      writeTotal,
   793  		Help:      "Total write SysCalls to the kernel. /proc/[pid]/io syscw",
   794  		Type:      counterMetric,
   795  	}
   796  }
   797  func getMinIOGORoutineCountMD() MetricDescription {
   798  	return MetricDescription{
   799  		Namespace: nodeMetricNamespace,
   800  		Subsystem: goRoutines,
   801  		Name:      total,
   802  		Help:      "Total number of go routines running.",
   803  		Type:      gaugeMetric,
   804  	}
   805  }
   806  func getMinIOProcessStartTimeMD() MetricDescription {
   807  	return MetricDescription{
   808  		Namespace: nodeMetricNamespace,
   809  		Subsystem: processSubsystem,
   810  		Name:      startTime,
   811  		Help:      "Start time for MinIO process per node, time in seconds since Unix epoc.",
   812  		Type:      gaugeMetric,
   813  	}
   814  }
   815  func getMinIOProcessUptimeMD() MetricDescription {
   816  	return MetricDescription{
   817  		Namespace: nodeMetricNamespace,
   818  		Subsystem: processSubsystem,
   819  		Name:      upTime,
   820  		Help:      "Uptime for MinIO process per node in seconds.",
   821  		Type:      gaugeMetric,
   822  	}
   823  }
   824  func getMinioProcMetrics() MetricsGroup {
   825  	return MetricsGroup{
   826  		id:         "MinioProcMetrics",
   827  		cachedRead: cachedRead,
   828  		read: func(ctx context.Context) (metrics []Metric) {
   829  			if runtime.GOOS == "windows" {
   830  				return nil
   831  			}
   832  			metrics = make([]Metric, 0, 20)
   833  			p, err := procfs.Self()
   834  			if err != nil {
   835  				logger.LogOnceIf(ctx, err, nodeMetricNamespace)
   836  				return
   837  			}
   838  			var openFDs int
   839  			openFDs, err = p.FileDescriptorsLen()
   840  			if err != nil {
   841  				logger.LogOnceIf(ctx, err, getMinioFDOpenMD())
   842  				return
   843  			}
   844  			l, err := p.Limits()
   845  			if err != nil {
   846  				logger.LogOnceIf(ctx, err, getMinioFDLimitMD())
   847  				return
   848  			}
   849  			io, err := p.IO()
   850  			if err != nil {
   851  				logger.LogOnceIf(ctx, err, ioSubsystem)
   852  				return
   853  			}
   854  			stat, err := p.Stat()
   855  			if err != nil {
   856  				logger.LogOnceIf(ctx, err, processSubsystem)
   857  				return
   858  			}
   859  			startTime, err := stat.StartTime()
   860  			if err != nil {
   861  				logger.LogOnceIf(ctx, err, startTime)
   862  				return
   863  			}
   864  
   865  			metrics = append(metrics,
   866  				Metric{
   867  					Description: getMinioFDOpenMD(),
   868  					Value:       float64(openFDs),
   869  				},
   870  			)
   871  			metrics = append(metrics,
   872  				Metric{
   873  					Description: getMinioFDLimitMD(),
   874  					Value:       float64(l.OpenFiles),
   875  				})
   876  			metrics = append(metrics,
   877  				Metric{
   878  					Description: getMinIOProcessSysCallRMD(),
   879  					Value:       float64(io.SyscR),
   880  				})
   881  			metrics = append(metrics,
   882  				Metric{
   883  					Description: getMinIOProcessSysCallWMD(),
   884  					Value:       float64(io.SyscW),
   885  				})
   886  			metrics = append(metrics,
   887  				Metric{
   888  					Description: getMinioProcessIOReadBytesMD(),
   889  					Value:       float64(io.ReadBytes),
   890  				})
   891  			metrics = append(metrics,
   892  				Metric{
   893  					Description: getMinioProcessIOWriteBytesMD(),
   894  					Value:       float64(io.WriteBytes),
   895  				})
   896  			metrics = append(metrics,
   897  				Metric{
   898  					Description: getMinioProcessIOReadCachedBytesMD(),
   899  					Value:       float64(io.RChar),
   900  				})
   901  			metrics = append(metrics,
   902  				Metric{
   903  					Description: getMinioProcessIOWriteCachedBytesMD(),
   904  					Value:       float64(io.WChar),
   905  				})
   906  			metrics = append(metrics,
   907  				Metric{
   908  					Description: getMinIOProcessStartTimeMD(),
   909  					Value:       startTime,
   910  				})
   911  			metrics = append(metrics,
   912  				Metric{
   913  					Description: getMinIOProcessUptimeMD(),
   914  					Value:       time.Since(globalBootTime).Seconds(),
   915  				})
   916  			return
   917  		},
   918  	}
   919  }
   920  func getGoMetrics() MetricsGroup {
   921  	return MetricsGroup{
   922  		id:         "GoMetrics",
   923  		cachedRead: cachedRead,
   924  		read: func(ctx context.Context) (metrics []Metric) {
   925  			metrics = append(metrics, Metric{
   926  				Description: getMinIOGORoutineCountMD(),
   927  				Value:       float64(runtime.NumGoroutine()),
   928  			})
   929  			return
   930  		},
   931  	}
   932  }
   933  func getS3TTFBMetric() MetricsGroup {
   934  	return MetricsGroup{
   935  		id:         "s3TTFBMetric",
   936  		cachedRead: cachedRead,
   937  		read: func(ctx context.Context) (metrics []Metric) {
   938  
   939  			// Read prometheus metric on this channel
   940  			ch := make(chan prometheus.Metric)
   941  			var wg sync.WaitGroup
   942  			wg.Add(1)
   943  
   944  			// Read prometheus histogram data and convert it to internal metric data
   945  			go func() {
   946  				defer wg.Done()
   947  				for promMetric := range ch {
   948  					dtoMetric := &dto.Metric{}
   949  					err := promMetric.Write(dtoMetric)
   950  					if err != nil {
   951  						logger.LogIf(GlobalContext, err)
   952  						return
   953  					}
   954  					h := dtoMetric.GetHistogram()
   955  					for _, b := range h.Bucket {
   956  						labels := make(map[string]string)
   957  						for _, lp := range dtoMetric.GetLabel() {
   958  							labels[*lp.Name] = *lp.Value
   959  						}
   960  						labels["le"] = fmt.Sprintf("%.3f", *b.UpperBound)
   961  						metric := Metric{
   962  							Description:    getS3TTFBDistributionMD(),
   963  							VariableLabels: labels,
   964  							Value:          float64(b.GetCumulativeCount()),
   965  						}
   966  						metrics = append(metrics, metric)
   967  					}
   968  				}
   969  
   970  			}()
   971  
   972  			httpRequestsDuration.Collect(ch)
   973  			close(ch)
   974  			wg.Wait()
   975  			return
   976  		},
   977  	}
   978  }
   979  
   980  func getMinioVersionMetrics() MetricsGroup {
   981  	return MetricsGroup{
   982  		id:         "MinioVersionMetrics",
   983  		cachedRead: cachedRead,
   984  		read: func(_ context.Context) (metrics []Metric) {
   985  			metrics = append(metrics, Metric{
   986  				Description:    getMinIOCommitMD(),
   987  				VariableLabels: map[string]string{"commit": CommitID},
   988  			})
   989  			metrics = append(metrics, Metric{
   990  				Description:    getMinIOVersionMD(),
   991  				VariableLabels: map[string]string{"version": Version},
   992  			})
   993  			return
   994  		},
   995  	}
   996  }
   997  
   998  func getNodeHealthMetrics() MetricsGroup {
   999  	return MetricsGroup{
  1000  		id:         "NodeHealthMetrics",
  1001  		cachedRead: cachedRead,
  1002  		read: func(_ context.Context) (metrics []Metric) {
  1003  			nodesUp, nodesDown := GetPeerOnlineCount()
  1004  			metrics = append(metrics, Metric{
  1005  				Description: getNodeOnlineTotalMD(),
  1006  				Value:       float64(nodesUp),
  1007  			})
  1008  			metrics = append(metrics, Metric{
  1009  				Description: getNodeOfflineTotalMD(),
  1010  				Value:       float64(nodesDown),
  1011  			})
  1012  			return
  1013  		},
  1014  	}
  1015  }
  1016  
  1017  func getMinioHealingMetrics() MetricsGroup {
  1018  	return MetricsGroup{
  1019  		id:         "minioHealingMetrics",
  1020  		cachedRead: cachedRead,
  1021  		read: func(_ context.Context) (metrics []Metric) {
  1022  			metrics = make([]Metric, 0, 5)
  1023  			if !globalIsErasure {
  1024  				return
  1025  			}
  1026  			bgSeq, exists := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
  1027  			if !exists {
  1028  				return
  1029  			}
  1030  
  1031  			if bgSeq.lastHealActivity.IsZero() {
  1032  				return
  1033  			}
  1034  
  1035  			metrics = append(metrics, Metric{
  1036  				Description: getHealLastActivityTimeMD(),
  1037  				Value:       float64(time.Since(bgSeq.lastHealActivity)),
  1038  			})
  1039  			metrics = append(metrics, getObjectsScanned(bgSeq)...)
  1040  			metrics = append(metrics, getScannedItems(bgSeq)...)
  1041  			metrics = append(metrics, getFailedItems(bgSeq)...)
  1042  			return
  1043  		},
  1044  	}
  1045  }
  1046  
  1047  func getFailedItems(seq *healSequence) (m []Metric) {
  1048  	m = make([]Metric, 0, 1)
  1049  	for k, v := range seq.gethealFailedItemsMap() {
  1050  		s := strings.Split(k, ",")
  1051  		m = append(m, Metric{
  1052  			Description: getHealObjectsFailTotalMD(),
  1053  			VariableLabels: map[string]string{
  1054  				"mount_path":    s[0],
  1055  				"volume_status": s[1],
  1056  			},
  1057  			Value: float64(v),
  1058  		})
  1059  	}
  1060  	return
  1061  }
  1062  
  1063  func getScannedItems(seq *healSequence) (m []Metric) {
  1064  	items := seq.getHealedItemsMap()
  1065  	m = make([]Metric, 0, len(items))
  1066  	for k, v := range items {
  1067  		m = append(m, Metric{
  1068  			Description:    getHealObjectsHealTotalMD(),
  1069  			VariableLabels: map[string]string{"type": string(k)},
  1070  			Value:          float64(v),
  1071  		})
  1072  	}
  1073  	return
  1074  }
  1075  
  1076  func getObjectsScanned(seq *healSequence) (m []Metric) {
  1077  	items := seq.getHealedItemsMap()
  1078  	m = make([]Metric, 0, len(items))
  1079  	for k, v := range seq.getScannedItemsMap() {
  1080  		m = append(m, Metric{
  1081  			Description:    getHealObjectsTotalMD(),
  1082  			VariableLabels: map[string]string{"type": string(k)},
  1083  			Value:          float64(v),
  1084  		})
  1085  	}
  1086  	return
  1087  }
  1088  func getCacheMetrics() MetricsGroup {
  1089  	return MetricsGroup{
  1090  		id:         "CacheMetrics",
  1091  		cachedRead: cachedRead,
  1092  		read: func(ctx context.Context) (metrics []Metric) {
  1093  			metrics = make([]Metric, 0, 20)
  1094  			cacheObjLayer := newCachedObjectLayerFn()
  1095  			// Service not initialized yet
  1096  			if cacheObjLayer == nil {
  1097  				return
  1098  			}
  1099  			metrics = append(metrics, Metric{
  1100  				Description: getCacheHitsTotalMD(),
  1101  				Value:       float64(cacheObjLayer.CacheStats().getHits()),
  1102  			})
  1103  			metrics = append(metrics, Metric{
  1104  				Description: getCacheHitsMissedTotalMD(),
  1105  				Value:       float64(cacheObjLayer.CacheStats().getMisses()),
  1106  			})
  1107  			metrics = append(metrics, Metric{
  1108  				Description: getCacheSentBytesMD(),
  1109  				Value:       float64(cacheObjLayer.CacheStats().getBytesServed()),
  1110  			})
  1111  			for _, cdStats := range cacheObjLayer.CacheStats().GetDiskStats() {
  1112  				metrics = append(metrics, Metric{
  1113  					Description:    getCacheUsagePercentMD(),
  1114  					Value:          float64(cdStats.UsagePercent),
  1115  					VariableLabels: map[string]string{"disk": cdStats.Dir},
  1116  				})
  1117  				metrics = append(metrics, Metric{
  1118  					Description:    getCacheUsageInfoMD(),
  1119  					Value:          float64(cdStats.UsageState),
  1120  					VariableLabels: map[string]string{"disk": cdStats.Dir, "level": cdStats.GetUsageLevelString()},
  1121  				})
  1122  				metrics = append(metrics, Metric{
  1123  					Description:    getCacheUsedBytesMD(),
  1124  					Value:          float64(cdStats.UsageSize),
  1125  					VariableLabels: map[string]string{"disk": cdStats.Dir},
  1126  				})
  1127  				metrics = append(metrics, Metric{
  1128  					Description:    getCacheTotalBytesMD(),
  1129  					Value:          float64(cdStats.TotalCapacity),
  1130  					VariableLabels: map[string]string{"disk": cdStats.Dir},
  1131  				})
  1132  			}
  1133  			return
  1134  		},
  1135  	}
  1136  }
  1137  
  1138  func getHTTPMetrics() MetricsGroup {
  1139  	return MetricsGroup{
  1140  		id:         "httpMetrics",
  1141  		cachedRead: cachedRead,
  1142  		read: func(ctx context.Context) (metrics []Metric) {
  1143  			httpStats := globalHTTPStats.toServerHTTPStats()
  1144  			metrics = make([]Metric, 0, 3+
  1145  				len(httpStats.CurrentS3Requests.APIStats)+
  1146  				len(httpStats.TotalS3Requests.APIStats)+
  1147  				len(httpStats.TotalS3Errors.APIStats))
  1148  			metrics = append(metrics, Metric{
  1149  				Description: getS3RejectedAuthRequestsTotalMD(),
  1150  				Value:       float64(httpStats.TotalS3RejectedAuth),
  1151  			})
  1152  			metrics = append(metrics, Metric{
  1153  				Description: getS3RejectedTimestampRequestsTotalMD(),
  1154  				Value:       float64(httpStats.TotalS3RejectedTime),
  1155  			})
  1156  			metrics = append(metrics, Metric{
  1157  				Description: getS3RejectedHeaderRequestsTotalMD(),
  1158  				Value:       float64(httpStats.TotalS3RejectedHeader),
  1159  			})
  1160  			metrics = append(metrics, Metric{
  1161  				Description: getS3RejectedInvalidRequestsTotalMD(),
  1162  				Value:       float64(httpStats.TotalS3RejectedInvalid),
  1163  			})
  1164  			metrics = append(metrics, Metric{
  1165  				Description: getS3RequestsInQueueMD(),
  1166  				Value:       float64(httpStats.S3RequestsInQueue),
  1167  			})
  1168  			for api, value := range httpStats.CurrentS3Requests.APIStats {
  1169  				metrics = append(metrics, Metric{
  1170  					Description:    getS3RequestsInFlightMD(),
  1171  					Value:          float64(value),
  1172  					VariableLabels: map[string]string{"api": api},
  1173  				})
  1174  			}
  1175  			for api, value := range httpStats.TotalS3Requests.APIStats {
  1176  				metrics = append(metrics, Metric{
  1177  					Description:    getS3RequestsTotalMD(),
  1178  					Value:          float64(value),
  1179  					VariableLabels: map[string]string{"api": api},
  1180  				})
  1181  			}
  1182  			for api, value := range httpStats.TotalS3Errors.APIStats {
  1183  				metrics = append(metrics, Metric{
  1184  					Description:    getS3RequestsErrorsMD(),
  1185  					Value:          float64(value),
  1186  					VariableLabels: map[string]string{"api": api},
  1187  				})
  1188  			}
  1189  			for api, value := range httpStats.TotalS3Canceled.APIStats {
  1190  				metrics = append(metrics, Metric{
  1191  					Description:    getS3RequestsCanceledMD(),
  1192  					Value:          float64(value),
  1193  					VariableLabels: map[string]string{"api": api},
  1194  				})
  1195  			}
  1196  			return
  1197  		},
  1198  	}
  1199  }
  1200  
  1201  func getNetworkMetrics() MetricsGroup {
  1202  	return MetricsGroup{
  1203  		id:         "networkMetrics",
  1204  		cachedRead: cachedRead,
  1205  		read: func(ctx context.Context) (metrics []Metric) {
  1206  			metrics = make([]Metric, 0, 10)
  1207  			metrics = append(metrics, Metric{
  1208  				Description: getInternodeFailedRequests(),
  1209  				Value:       float64(loadAndResetRPCNetworkErrsCounter()),
  1210  			})
  1211  			connStats := globalConnStats.toServerConnStats()
  1212  			metrics = append(metrics, Metric{
  1213  				Description: getInterNodeSentBytesMD(),
  1214  				Value:       float64(connStats.TotalOutputBytes),
  1215  			})
  1216  			metrics = append(metrics, Metric{
  1217  				Description: getInterNodeReceivedBytesMD(),
  1218  				Value:       float64(connStats.TotalInputBytes),
  1219  			})
  1220  			metrics = append(metrics, Metric{
  1221  				Description: getS3SentBytesMD(),
  1222  				Value:       float64(connStats.S3OutputBytes),
  1223  			})
  1224  			metrics = append(metrics, Metric{
  1225  				Description: getS3ReceivedBytesMD(),
  1226  				Value:       float64(connStats.S3InputBytes),
  1227  			})
  1228  			return
  1229  		},
  1230  	}
  1231  }
  1232  
  1233  func getBucketUsageMetrics() MetricsGroup {
  1234  	return MetricsGroup{
  1235  		id:         "BucketUsageMetrics",
  1236  		cachedRead: cachedRead,
  1237  		read: func(ctx context.Context) (metrics []Metric) {
  1238  			metrics = make([]Metric, 0, 50)
  1239  			objLayer := newObjectLayerFn()
  1240  			// Service not initialized yet
  1241  			if objLayer == nil {
  1242  				return
  1243  			}
  1244  
  1245  			if GlobalIsGateway {
  1246  				return
  1247  			}
  1248  
  1249  			dataUsageInfo, err := loadDataUsageFromBackend(ctx, objLayer)
  1250  			if err != nil {
  1251  				return
  1252  			}
  1253  
  1254  			// data usage has not captured any data yet.
  1255  			if dataUsageInfo.LastUpdate.IsZero() {
  1256  				return
  1257  			}
  1258  
  1259  			metrics = append(metrics, Metric{
  1260  				Description: getUsageLastScanActivityMD(),
  1261  				Value:       float64(time.Since(dataUsageInfo.LastUpdate)),
  1262  			})
  1263  
  1264  			for bucket, usage := range dataUsageInfo.BucketsUsage {
  1265  				stat := getLatestReplicationStats(bucket, usage)
  1266  
  1267  				metrics = append(metrics, Metric{
  1268  					Description:    getBucketUsageTotalBytesMD(),
  1269  					Value:          float64(usage.Size),
  1270  					VariableLabels: map[string]string{"bucket": bucket},
  1271  				})
  1272  
  1273  				metrics = append(metrics, Metric{
  1274  					Description:    getBucketUsageObjectsTotalMD(),
  1275  					Value:          float64(usage.ObjectsCount),
  1276  					VariableLabels: map[string]string{"bucket": bucket},
  1277  				})
  1278  
  1279  				if stat.hasReplicationUsage() {
  1280  					metrics = append(metrics, Metric{
  1281  						Description:    getBucketRepPendingBytesMD(),
  1282  						Value:          float64(stat.PendingSize),
  1283  						VariableLabels: map[string]string{"bucket": bucket},
  1284  					})
  1285  					metrics = append(metrics, Metric{
  1286  						Description:    getBucketRepFailedBytesMD(),
  1287  						Value:          float64(stat.FailedSize),
  1288  						VariableLabels: map[string]string{"bucket": bucket},
  1289  					})
  1290  					metrics = append(metrics, Metric{
  1291  						Description:    getBucketRepSentBytesMD(),
  1292  						Value:          float64(stat.ReplicatedSize),
  1293  						VariableLabels: map[string]string{"bucket": bucket},
  1294  					})
  1295  					metrics = append(metrics, Metric{
  1296  						Description:    getBucketRepReceivedBytesMD(),
  1297  						Value:          float64(stat.ReplicaSize),
  1298  						VariableLabels: map[string]string{"bucket": bucket},
  1299  					})
  1300  					metrics = append(metrics, Metric{
  1301  						Description:    getBucketRepPendingOperationsMD(),
  1302  						Value:          float64(stat.PendingCount),
  1303  						VariableLabels: map[string]string{"bucket": bucket},
  1304  					})
  1305  					metrics = append(metrics, Metric{
  1306  						Description:    getBucketRepFailedOperationsMD(),
  1307  						Value:          float64(stat.FailedCount),
  1308  						VariableLabels: map[string]string{"bucket": bucket},
  1309  					})
  1310  				}
  1311  
  1312  				metrics = append(metrics, Metric{
  1313  					Description:          getBucketObjectDistributionMD(),
  1314  					Histogram:            usage.ObjectSizesHistogram,
  1315  					HistogramBucketLabel: "range",
  1316  					VariableLabels:       map[string]string{"bucket": bucket},
  1317  				})
  1318  
  1319  			}
  1320  			return
  1321  		},
  1322  	}
  1323  }
  1324  func getLocalStorageMetrics() MetricsGroup {
  1325  	return MetricsGroup{
  1326  		id:         "localStorageMetrics",
  1327  		cachedRead: cachedRead,
  1328  		read: func(ctx context.Context) (metrics []Metric) {
  1329  			objLayer := newObjectLayerFn()
  1330  			// Service not initialized yet
  1331  			if objLayer == nil {
  1332  				return
  1333  			}
  1334  
  1335  			if GlobalIsGateway {
  1336  				return
  1337  			}
  1338  
  1339  			metrics = make([]Metric, 0, 50)
  1340  			storageInfo, _ := objLayer.LocalStorageInfo(ctx)
  1341  			for _, disk := range storageInfo.Disks {
  1342  				metrics = append(metrics, Metric{
  1343  					Description:    getNodeDiskUsedBytesMD(),
  1344  					Value:          float64(disk.UsedSpace),
  1345  					VariableLabels: map[string]string{"disk": disk.DrivePath},
  1346  				})
  1347  
  1348  				metrics = append(metrics, Metric{
  1349  					Description:    getNodeDiskFreeBytesMD(),
  1350  					Value:          float64(disk.AvailableSpace),
  1351  					VariableLabels: map[string]string{"disk": disk.DrivePath},
  1352  				})
  1353  
  1354  				metrics = append(metrics, Metric{
  1355  					Description:    getNodeDiskTotalBytesMD(),
  1356  					Value:          float64(disk.TotalSpace),
  1357  					VariableLabels: map[string]string{"disk": disk.DrivePath},
  1358  				})
  1359  			}
  1360  			return
  1361  		},
  1362  	}
  1363  }
  1364  func getClusterStorageMetrics() MetricsGroup {
  1365  	return MetricsGroup{
  1366  		id:         "ClusterStorageMetrics",
  1367  		cachedRead: cachedRead,
  1368  		read: func(ctx context.Context) (metrics []Metric) {
  1369  			objLayer := newObjectLayerFn()
  1370  			// Service not initialized yet
  1371  			if objLayer == nil {
  1372  				return
  1373  			}
  1374  
  1375  			if GlobalIsGateway {
  1376  				return
  1377  			}
  1378  
  1379  			// Fetch disk space info, ignore errors
  1380  			metrics = make([]Metric, 0, 10)
  1381  			storageInfo, _ := objLayer.StorageInfo(ctx)
  1382  			onlineDisks, offlineDisks := getOnlineOfflineDisksStats(storageInfo.Disks)
  1383  			totalDisks := onlineDisks.Merge(offlineDisks)
  1384  
  1385  			metrics = append(metrics, Metric{
  1386  				Description: getClusterCapacityTotalBytesMD(),
  1387  				Value:       float64(GetTotalCapacity(storageInfo.Disks)),
  1388  			})
  1389  
  1390  			metrics = append(metrics, Metric{
  1391  				Description: getClusterCapacityFreeBytesMD(),
  1392  				Value:       float64(GetTotalCapacityFree(storageInfo.Disks)),
  1393  			})
  1394  
  1395  			metrics = append(metrics, Metric{
  1396  				Description: getClusterCapacityUsageBytesMD(),
  1397  				Value:       GetTotalUsableCapacity(storageInfo.Disks, storageInfo),
  1398  			})
  1399  
  1400  			metrics = append(metrics, Metric{
  1401  				Description: getClusterCapacityUsageFreeBytesMD(),
  1402  				Value:       GetTotalUsableCapacityFree(storageInfo.Disks, storageInfo),
  1403  			})
  1404  
  1405  			metrics = append(metrics, Metric{
  1406  				Description: getClusterDisksOfflineTotalMD(),
  1407  				Value:       float64(offlineDisks.Sum()),
  1408  			})
  1409  
  1410  			metrics = append(metrics, Metric{
  1411  				Description: getClusterDisksOnlineTotalMD(),
  1412  				Value:       float64(onlineDisks.Sum()),
  1413  			})
  1414  
  1415  			metrics = append(metrics, Metric{
  1416  				Description: getClusterDisksTotalMD(),
  1417  				Value:       float64(totalDisks.Sum()),
  1418  			})
  1419  			return
  1420  		},
  1421  	}
  1422  }
  1423  
  1424  type minioClusterCollector struct {
  1425  	desc *prometheus.Desc
  1426  }
  1427  
  1428  func newMinioClusterCollector() *minioClusterCollector {
  1429  	return &minioClusterCollector{
  1430  		desc: prometheus.NewDesc("minio_stats", "Statistics exposed by MinIO server", nil, nil),
  1431  	}
  1432  }
  1433  
  1434  // Describe sends the super-set of all possible descriptors of metrics
  1435  func (c *minioClusterCollector) Describe(ch chan<- *prometheus.Desc) {
  1436  	ch <- c.desc
  1437  }
  1438  
  1439  // Collect is called by the Prometheus registry when collecting metrics.
  1440  func (c *minioClusterCollector) Collect(out chan<- prometheus.Metric) {
  1441  
  1442  	var wg sync.WaitGroup
  1443  	publish := func(in <-chan Metric) {
  1444  		defer wg.Done()
  1445  		for metric := range in {
  1446  			labels, values := getOrderedLabelValueArrays(metric.VariableLabels)
  1447  			if metric.Description.Type == histogramMetric {
  1448  				if metric.Histogram == nil {
  1449  					continue
  1450  				}
  1451  				for k, v := range metric.Histogram {
  1452  					l := append(labels, metric.HistogramBucketLabel)
  1453  					lv := append(values, k)
  1454  					out <- prometheus.MustNewConstMetric(
  1455  						prometheus.NewDesc(
  1456  							prometheus.BuildFQName(string(metric.Description.Namespace),
  1457  								string(metric.Description.Subsystem),
  1458  								string(metric.Description.Name)),
  1459  							metric.Description.Help,
  1460  							l,
  1461  							metric.StaticLabels,
  1462  						),
  1463  						prometheus.GaugeValue,
  1464  						float64(v),
  1465  						lv...)
  1466  				}
  1467  				continue
  1468  			}
  1469  			metricType := prometheus.GaugeValue
  1470  			switch metric.Description.Type {
  1471  			case counterMetric:
  1472  				metricType = prometheus.CounterValue
  1473  			}
  1474  			toPost := prometheus.MustNewConstMetric(
  1475  				prometheus.NewDesc(
  1476  					prometheus.BuildFQName(string(metric.Description.Namespace),
  1477  						string(metric.Description.Subsystem),
  1478  						string(metric.Description.Name)),
  1479  					metric.Description.Help,
  1480  					labels,
  1481  					metric.StaticLabels,
  1482  				),
  1483  				metricType,
  1484  				metric.Value,
  1485  				values...)
  1486  			out <- toPost
  1487  		}
  1488  	}
  1489  
  1490  	// Call peer api to fetch metrics
  1491  	peerCh := GlobalNotificationSys.GetClusterMetrics(GlobalContext)
  1492  	selfCh := ReportMetrics(GlobalContext, GetAllGenerators)
  1493  	wg.Add(2)
  1494  	go publish(peerCh)
  1495  	go publish(selfCh)
  1496  	wg.Wait()
  1497  }
  1498  
  1499  // ReportMetrics reports serialized metrics to the channel passed for the metrics generated.
  1500  func ReportMetrics(ctx context.Context, generators func() []MetricsGenerator) <-chan Metric {
  1501  	ch := make(chan Metric)
  1502  	go func() {
  1503  		defer close(ch)
  1504  		populateAndPublish(generators, func(m Metric) bool {
  1505  			if m.VariableLabels == nil {
  1506  				m.VariableLabels = make(map[string]string)
  1507  			}
  1508  			m.VariableLabels[serverName] = globalLocalNodeName
  1509  			for {
  1510  				select {
  1511  				case ch <- m:
  1512  					return true
  1513  				case <-ctx.Done():
  1514  					return false
  1515  				}
  1516  			}
  1517  		})
  1518  	}()
  1519  	return ch
  1520  }
  1521  
  1522  // minioCollectorV2 is the Custom Collector
  1523  type minioCollectorV2 struct {
  1524  	generator func() []MetricsGenerator
  1525  	desc      *prometheus.Desc
  1526  }
  1527  
  1528  // Describe sends the super-set of all possible descriptors of metrics
  1529  func (c *minioCollectorV2) Describe(ch chan<- *prometheus.Desc) {
  1530  	ch <- c.desc
  1531  }
  1532  
  1533  // populateAndPublish populates and then publishes the metrics generated by the generator function.
  1534  func populateAndPublish(generatorFn func() []MetricsGenerator, publish func(m Metric) bool) {
  1535  	generators := generatorFn()
  1536  	for _, g := range generators {
  1537  		metricsGroup := g()
  1538  		metrics := metricsGroup.cachedRead(GlobalContext, &metricsGroup)
  1539  		for _, metric := range metrics {
  1540  			if !publish(metric) {
  1541  				return
  1542  			}
  1543  		}
  1544  	}
  1545  }
  1546  
  1547  // Collect is called by the Prometheus registry when collecting metrics.
  1548  func (c *minioCollectorV2) Collect(ch chan<- prometheus.Metric) {
  1549  
  1550  	// Expose MinIO's version information
  1551  	minioVersionInfo.WithLabelValues(Version, CommitID).Set(1.0)
  1552  
  1553  	populateAndPublish(c.generator, func(metric Metric) bool {
  1554  		labels, values := getOrderedLabelValueArrays(metric.VariableLabels)
  1555  		values = append(values, globalLocalNodeName)
  1556  		labels = append(labels, serverName)
  1557  
  1558  		if metric.Description.Type == histogramMetric {
  1559  			if metric.Histogram == nil {
  1560  				return true
  1561  			}
  1562  			for k, v := range metric.Histogram {
  1563  				labels = append(labels, metric.HistogramBucketLabel)
  1564  				values = append(values, k)
  1565  				ch <- prometheus.MustNewConstMetric(
  1566  					prometheus.NewDesc(
  1567  						prometheus.BuildFQName(string(metric.Description.Namespace),
  1568  							string(metric.Description.Subsystem),
  1569  							string(metric.Description.Name)),
  1570  						metric.Description.Help,
  1571  						labels,
  1572  						metric.StaticLabels,
  1573  					),
  1574  					prometheus.GaugeValue,
  1575  					float64(v),
  1576  					values...)
  1577  			}
  1578  			return true
  1579  		}
  1580  
  1581  		metricType := prometheus.GaugeValue
  1582  		switch metric.Description.Type {
  1583  		case counterMetric:
  1584  			metricType = prometheus.CounterValue
  1585  		}
  1586  		ch <- prometheus.MustNewConstMetric(
  1587  			prometheus.NewDesc(
  1588  				prometheus.BuildFQName(string(metric.Description.Namespace),
  1589  					string(metric.Description.Subsystem),
  1590  					string(metric.Description.Name)),
  1591  				metric.Description.Help,
  1592  				labels,
  1593  				metric.StaticLabels,
  1594  			),
  1595  			metricType,
  1596  			metric.Value,
  1597  			values...)
  1598  		return true
  1599  	})
  1600  }
  1601  
  1602  func getOrderedLabelValueArrays(labelsWithValue map[string]string) (labels, values []string) {
  1603  	labels = make([]string, 0)
  1604  	values = make([]string, 0)
  1605  	for l, v := range labelsWithValue {
  1606  		labels = append(labels, l)
  1607  		values = append(values, v)
  1608  	}
  1609  	return
  1610  }
  1611  
  1612  // newMinioCollectorV2 describes the collector
  1613  // and returns reference of minioCollector for version 2
  1614  // It creates the Prometheus Description which is used
  1615  // to define Metric and  help string
  1616  func newMinioCollectorV2(generator func() []MetricsGenerator) *minioCollectorV2 {
  1617  	return &minioCollectorV2{
  1618  		generator: generator,
  1619  		desc:      prometheus.NewDesc("minio_stats", "Statistics exposed by MinIO server", nil, nil),
  1620  	}
  1621  }
  1622  
  1623  func metricsServerHandler() http.Handler {
  1624  
  1625  	registry := prometheus.NewRegistry()
  1626  
  1627  	// Report all other metrics
  1628  	err := registry.Register(newMinioClusterCollector())
  1629  	if err != nil {
  1630  		logger.CriticalIf(GlobalContext, err)
  1631  	}
  1632  	// DefaultGatherers include golang metrics and process metrics.
  1633  	gatherers := prometheus.Gatherers{
  1634  		registry,
  1635  	}
  1636  	// Delegate http serving to Prometheus client library, which will call collector.Collect.
  1637  	return promhttp.InstrumentMetricHandler(
  1638  		registry,
  1639  		promhttp.HandlerFor(gatherers,
  1640  			promhttp.HandlerOpts{
  1641  				ErrorHandling: promhttp.ContinueOnError,
  1642  			}),
  1643  	)
  1644  }
  1645  
  1646  func metricsNodeHandler() http.Handler {
  1647  	registry := prometheus.NewRegistry()
  1648  
  1649  	err := registry.Register(newMinioCollectorV2(GetSingleNodeGenerators))
  1650  	if err != nil {
  1651  		logger.CriticalIf(GlobalContext, err)
  1652  	}
  1653  	err = registry.Register(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{
  1654  		Namespace:    minioNamespace,
  1655  		ReportErrors: true,
  1656  	}))
  1657  	if err != nil {
  1658  		logger.CriticalIf(GlobalContext, err)
  1659  	}
  1660  	err = registry.Register(prometheus.NewGoCollector())
  1661  	if err != nil {
  1662  		logger.CriticalIf(GlobalContext, err)
  1663  	}
  1664  	gatherers := prometheus.Gatherers{
  1665  		registry,
  1666  	}
  1667  	// Delegate http serving to Prometheus client library, which will call collector.Collect.
  1668  	return promhttp.InstrumentMetricHandler(
  1669  		registry,
  1670  		promhttp.HandlerFor(gatherers,
  1671  			promhttp.HandlerOpts{
  1672  				ErrorHandling: promhttp.ContinueOnError,
  1673  			}),
  1674  	)
  1675  }