storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/metrics.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2018-2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"net/http"
    21  	"strings"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/prometheus/client_golang/prometheus"
    26  	"github.com/prometheus/client_golang/prometheus/promhttp"
    27  
    28  	"storj.io/minio/cmd/logger"
    29  	iampolicy "storj.io/minio/pkg/iam/policy"
    30  	"storj.io/minio/pkg/madmin"
    31  )
    32  
    33  var (
    34  	httpRequestsDuration = prometheus.NewHistogramVec(
    35  		prometheus.HistogramOpts{
    36  			Name:    "s3_ttfb_seconds",
    37  			Help:    "Time taken by requests served by current MinIO server instance",
    38  			Buckets: []float64{.05, .1, .25, .5, 1, 2.5, 5, 10},
    39  		},
    40  		[]string{"api"},
    41  	)
    42  	minioVersionInfo = prometheus.NewGaugeVec(
    43  		prometheus.GaugeOpts{
    44  			Namespace: "minio",
    45  			Name:      "version_info",
    46  			Help:      "Version of current MinIO server instance",
    47  		},
    48  		[]string{
    49  			// current version
    50  			"version",
    51  			// commit-id of the current version
    52  			"commit",
    53  		},
    54  	)
    55  )
    56  
    57  const (
    58  	healMetricsNamespace = "self_heal"
    59  	gatewayNamespace     = "gateway"
    60  	cacheNamespace       = "cache"
    61  	s3Namespace          = "s3"
    62  	bucketNamespace      = "bucket"
    63  	minioNamespace       = "minio"
    64  	diskNamespace        = "disk"
    65  	interNodeNamespace   = "internode"
    66  )
    67  
    68  func init() {
    69  	prometheus.MustRegister(httpRequestsDuration)
    70  	prometheus.MustRegister(newMinioCollector())
    71  	prometheus.MustRegister(minioVersionInfo)
    72  }
    73  
    74  // newMinioCollector describes the collector
    75  // and returns reference of minioCollector
    76  // It creates the Prometheus Description which is used
    77  // to define metric and  help string
    78  func newMinioCollector() *minioCollector {
    79  	return &minioCollector{
    80  		desc: prometheus.NewDesc("minio_stats", "Statistics exposed by MinIO server", nil, nil),
    81  	}
    82  }
    83  
    84  // minioCollector is the Custom Collector
    85  type minioCollector struct {
    86  	desc *prometheus.Desc
    87  }
    88  
    89  // Describe sends the super-set of all possible descriptors of metrics
    90  func (c *minioCollector) Describe(ch chan<- *prometheus.Desc) {
    91  	ch <- c.desc
    92  }
    93  
    94  // Collect is called by the Prometheus registry when collecting metrics.
    95  func (c *minioCollector) Collect(ch chan<- prometheus.Metric) {
    96  
    97  	// Expose MinIO's version information
    98  	minioVersionInfo.WithLabelValues(Version, CommitID).Set(1.0)
    99  
   100  	storageMetricsPrometheus(ch)
   101  	nodeHealthMetricsPrometheus(ch)
   102  	bucketUsageMetricsPrometheus(ch)
   103  	networkMetricsPrometheus(ch)
   104  	httpMetricsPrometheus(ch)
   105  	cacheMetricsPrometheus(ch)
   106  	gatewayMetricsPrometheus(ch)
   107  	healingMetricsPrometheus(ch)
   108  }
   109  
   110  func nodeHealthMetricsPrometheus(ch chan<- prometheus.Metric) {
   111  	nodesUp, nodesDown := GetPeerOnlineCount()
   112  	ch <- prometheus.MustNewConstMetric(
   113  		prometheus.NewDesc(
   114  			prometheus.BuildFQName(minioNamespace, "nodes", "online"),
   115  			"Total number of MinIO nodes online",
   116  			nil, nil),
   117  		prometheus.GaugeValue,
   118  		float64(nodesUp),
   119  	)
   120  	ch <- prometheus.MustNewConstMetric(
   121  		prometheus.NewDesc(
   122  			prometheus.BuildFQName(minioNamespace, "nodes", "offline"),
   123  			"Total number of MinIO nodes offline",
   124  			nil, nil),
   125  		prometheus.GaugeValue,
   126  		float64(nodesDown),
   127  	)
   128  }
   129  
   130  // collects healing specific metrics for MinIO instance in Prometheus specific format
   131  // and sends to given channel
   132  func healingMetricsPrometheus(ch chan<- prometheus.Metric) {
   133  	if !globalIsErasure {
   134  		return
   135  	}
   136  	bgSeq, exists := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
   137  	if !exists {
   138  		return
   139  	}
   140  
   141  	var dur time.Duration
   142  	if !bgSeq.lastHealActivity.IsZero() {
   143  		dur = time.Since(bgSeq.lastHealActivity)
   144  	}
   145  
   146  	ch <- prometheus.MustNewConstMetric(
   147  		prometheus.NewDesc(
   148  			prometheus.BuildFQName(healMetricsNamespace, "time", "since_last_activity"),
   149  			"Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity",
   150  			nil, nil),
   151  		prometheus.GaugeValue,
   152  		float64(dur),
   153  	)
   154  	for k, v := range bgSeq.getScannedItemsMap() {
   155  		ch <- prometheus.MustNewConstMetric(
   156  			prometheus.NewDesc(
   157  				prometheus.BuildFQName(healMetricsNamespace, "objects", "scanned"),
   158  				"Objects scanned in current self healing run",
   159  				[]string{"type"}, nil),
   160  			prometheus.GaugeValue,
   161  			float64(v), string(k),
   162  		)
   163  	}
   164  	for k, v := range bgSeq.getHealedItemsMap() {
   165  		ch <- prometheus.MustNewConstMetric(
   166  			prometheus.NewDesc(
   167  				prometheus.BuildFQName(healMetricsNamespace, "objects", "healed"),
   168  				"Objects healed in current self healing run",
   169  				[]string{"type"}, nil),
   170  			prometheus.GaugeValue,
   171  			float64(v), string(k),
   172  		)
   173  	}
   174  	for k, v := range bgSeq.gethealFailedItemsMap() {
   175  		// healFailedItemsMap stores the endpoint and volume state separated by comma,
   176  		// split the fields and pass to channel at correct index
   177  		s := strings.Split(k, ",")
   178  		ch <- prometheus.MustNewConstMetric(
   179  			prometheus.NewDesc(
   180  				prometheus.BuildFQName(healMetricsNamespace, "objects", "heal_failed"),
   181  				"Objects for which healing failed in current self healing run",
   182  				[]string{"mount_path", "volume_status"}, nil),
   183  			prometheus.GaugeValue,
   184  			float64(v), string(s[0]), string(s[1]),
   185  		)
   186  	}
   187  }
   188  
   189  // collects gateway specific metrics for MinIO instance in Prometheus specific format
   190  // and sends to given channel
   191  func gatewayMetricsPrometheus(ch chan<- prometheus.Metric) {
   192  	if !GlobalIsGateway || (globalGatewayName != S3BackendGateway && globalGatewayName != AzureBackendGateway && globalGatewayName != GCSBackendGateway) {
   193  		return
   194  	}
   195  
   196  	objLayer := newObjectLayerFn()
   197  	// Service not initialized yet
   198  	if objLayer == nil {
   199  		return
   200  	}
   201  
   202  	m, err := objLayer.GetMetrics(GlobalContext)
   203  	if err != nil {
   204  		return
   205  	}
   206  
   207  	ch <- prometheus.MustNewConstMetric(
   208  		prometheus.NewDesc(
   209  			prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_received"),
   210  			"Total number of bytes received by current MinIO Gateway "+globalGatewayName+" backend",
   211  			nil, nil),
   212  		prometheus.CounterValue,
   213  		float64(m.GetBytesReceived()),
   214  	)
   215  	ch <- prometheus.MustNewConstMetric(
   216  		prometheus.NewDesc(
   217  			prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "bytes_sent"),
   218  			"Total number of bytes sent by current MinIO Gateway to "+globalGatewayName+" backend",
   219  			nil, nil),
   220  		prometheus.CounterValue,
   221  		float64(m.GetBytesSent()),
   222  	)
   223  	s := m.GetRequests()
   224  	ch <- prometheus.MustNewConstMetric(
   225  		prometheus.NewDesc(
   226  			prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
   227  			"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
   228  			[]string{"method"}, nil),
   229  		prometheus.CounterValue,
   230  		float64(atomic.LoadUint64(&s.Get)),
   231  		http.MethodGet,
   232  	)
   233  	ch <- prometheus.MustNewConstMetric(
   234  		prometheus.NewDesc(
   235  			prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
   236  			"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
   237  			[]string{"method"}, nil),
   238  		prometheus.CounterValue,
   239  		float64(atomic.LoadUint64(&s.Head)),
   240  		http.MethodHead,
   241  	)
   242  	ch <- prometheus.MustNewConstMetric(
   243  		prometheus.NewDesc(
   244  			prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
   245  			"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
   246  			[]string{"method"}, nil),
   247  		prometheus.CounterValue,
   248  		float64(atomic.LoadUint64(&s.Put)),
   249  		http.MethodPut,
   250  	)
   251  	ch <- prometheus.MustNewConstMetric(
   252  		prometheus.NewDesc(
   253  			prometheus.BuildFQName(gatewayNamespace, globalGatewayName, "requests"),
   254  			"Total number of requests made to "+globalGatewayName+" by current MinIO Gateway",
   255  			[]string{"method"}, nil),
   256  		prometheus.CounterValue,
   257  		float64(atomic.LoadUint64(&s.Post)),
   258  		http.MethodPost,
   259  	)
   260  }
   261  
   262  // collects cache metrics for MinIO server in Prometheus specific format
   263  // and sends to given channel
   264  func cacheMetricsPrometheus(ch chan<- prometheus.Metric) {
   265  	cacheObjLayer := newCachedObjectLayerFn()
   266  	// Service not initialized yet
   267  	if cacheObjLayer == nil {
   268  		return
   269  	}
   270  
   271  	ch <- prometheus.MustNewConstMetric(
   272  		prometheus.NewDesc(
   273  			prometheus.BuildFQName(cacheNamespace, "hits", "total"),
   274  			"Total number of disk cache hits in current MinIO instance",
   275  			nil, nil),
   276  		prometheus.CounterValue,
   277  		float64(cacheObjLayer.CacheStats().getHits()),
   278  	)
   279  	ch <- prometheus.MustNewConstMetric(
   280  		prometheus.NewDesc(
   281  			prometheus.BuildFQName(cacheNamespace, "misses", "total"),
   282  			"Total number of disk cache misses in current MinIO instance",
   283  			nil, nil),
   284  		prometheus.CounterValue,
   285  		float64(cacheObjLayer.CacheStats().getMisses()),
   286  	)
   287  	ch <- prometheus.MustNewConstMetric(
   288  		prometheus.NewDesc(
   289  			prometheus.BuildFQName(cacheNamespace, "data", "served"),
   290  			"Total number of bytes served from cache of current MinIO instance",
   291  			nil, nil),
   292  		prometheus.CounterValue,
   293  		float64(cacheObjLayer.CacheStats().getBytesServed()),
   294  	)
   295  	for _, cdStats := range cacheObjLayer.CacheStats().GetDiskStats() {
   296  		// Cache disk usage percentage
   297  		ch <- prometheus.MustNewConstMetric(
   298  			prometheus.NewDesc(
   299  				prometheus.BuildFQName(cacheNamespace, "usage", "percent"),
   300  				"Total percentage cache usage",
   301  				[]string{"disk"}, nil),
   302  			prometheus.GaugeValue,
   303  			float64(cdStats.UsagePercent),
   304  			cdStats.Dir,
   305  		)
   306  		ch <- prometheus.MustNewConstMetric(
   307  			prometheus.NewDesc(
   308  				prometheus.BuildFQName(cacheNamespace, "usage", "high"),
   309  				"Indicates cache usage is high or low, relative to current cache 'quota' settings",
   310  				[]string{"disk"}, nil),
   311  			prometheus.GaugeValue,
   312  			float64(cdStats.UsageState),
   313  			cdStats.Dir,
   314  		)
   315  
   316  		ch <- prometheus.MustNewConstMetric(
   317  			prometheus.NewDesc(
   318  				prometheus.BuildFQName("cache", "usage", "size"),
   319  				"Indicates current cache usage in bytes",
   320  				[]string{"disk"}, nil),
   321  			prometheus.GaugeValue,
   322  			float64(cdStats.UsageSize),
   323  			cdStats.Dir,
   324  		)
   325  
   326  		ch <- prometheus.MustNewConstMetric(
   327  			prometheus.NewDesc(
   328  				prometheus.BuildFQName("cache", "total", "size"),
   329  				"Indicates total size of cache disk",
   330  				[]string{"disk"}, nil),
   331  			prometheus.GaugeValue,
   332  			float64(cdStats.TotalCapacity),
   333  			cdStats.Dir,
   334  		)
   335  	}
   336  }
   337  
   338  // collects http metrics for MinIO server in Prometheus specific format
   339  // and sends to given channel
   340  func httpMetricsPrometheus(ch chan<- prometheus.Metric) {
   341  	httpStats := globalHTTPStats.toServerHTTPStats()
   342  
   343  	for api, value := range httpStats.CurrentS3Requests.APIStats {
   344  		ch <- prometheus.MustNewConstMetric(
   345  			prometheus.NewDesc(
   346  				prometheus.BuildFQName(s3Namespace, "requests", "current"),
   347  				"Total number of running s3 requests in current MinIO server instance",
   348  				[]string{"api"}, nil),
   349  			prometheus.CounterValue,
   350  			float64(value),
   351  			api,
   352  		)
   353  	}
   354  
   355  	for api, value := range httpStats.TotalS3Requests.APIStats {
   356  		ch <- prometheus.MustNewConstMetric(
   357  			prometheus.NewDesc(
   358  				prometheus.BuildFQName(s3Namespace, "requests", "total"),
   359  				"Total number of s3 requests in current MinIO server instance",
   360  				[]string{"api"}, nil),
   361  			prometheus.CounterValue,
   362  			float64(value),
   363  			api,
   364  		)
   365  	}
   366  
   367  	for api, value := range httpStats.TotalS3Errors.APIStats {
   368  		ch <- prometheus.MustNewConstMetric(
   369  			prometheus.NewDesc(
   370  				prometheus.BuildFQName(s3Namespace, "errors", "total"),
   371  				"Total number of s3 errors in current MinIO server instance",
   372  				[]string{"api"}, nil),
   373  			prometheus.CounterValue,
   374  			float64(value),
   375  			api,
   376  		)
   377  	}
   378  
   379  	for api, value := range httpStats.TotalS3Canceled.APIStats {
   380  		ch <- prometheus.MustNewConstMetric(
   381  			prometheus.NewDesc(
   382  				prometheus.BuildFQName(s3Namespace, "canceled", "total"),
   383  				"Total number of client canceled s3 request in current MinIO server instance",
   384  				[]string{"api"}, nil),
   385  			prometheus.CounterValue,
   386  			float64(value),
   387  			api,
   388  		)
   389  	}
   390  }
   391  
   392  // collects network metrics for MinIO server in Prometheus specific format
   393  // and sends to given channel
   394  func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
   395  	connStats := globalConnStats.toServerConnStats()
   396  
   397  	// Network Sent/Received Bytes (internode)
   398  	ch <- prometheus.MustNewConstMetric(
   399  		prometheus.NewDesc(
   400  			prometheus.BuildFQName(interNodeNamespace, "tx", "bytes_total"),
   401  			"Total number of bytes sent to the other peer nodes by current MinIO server instance",
   402  			nil, nil),
   403  		prometheus.CounterValue,
   404  		float64(connStats.TotalOutputBytes),
   405  	)
   406  
   407  	ch <- prometheus.MustNewConstMetric(
   408  		prometheus.NewDesc(
   409  			prometheus.BuildFQName(interNodeNamespace, "rx", "bytes_total"),
   410  			"Total number of internode bytes received by current MinIO server instance",
   411  			nil, nil),
   412  		prometheus.CounterValue,
   413  		float64(connStats.TotalInputBytes),
   414  	)
   415  
   416  	// Network Sent/Received Bytes (Outbound)
   417  	ch <- prometheus.MustNewConstMetric(
   418  		prometheus.NewDesc(
   419  			prometheus.BuildFQName(s3Namespace, "tx", "bytes_total"),
   420  			"Total number of s3 bytes sent by current MinIO server instance",
   421  			nil, nil),
   422  		prometheus.CounterValue,
   423  		float64(connStats.S3OutputBytes),
   424  	)
   425  
   426  	ch <- prometheus.MustNewConstMetric(
   427  		prometheus.NewDesc(
   428  			prometheus.BuildFQName(s3Namespace, "rx", "bytes_total"),
   429  			"Total number of s3 bytes received by current MinIO server instance",
   430  			nil, nil),
   431  		prometheus.CounterValue,
   432  		float64(connStats.S3InputBytes),
   433  	)
   434  }
   435  
   436  // get the most current of in-memory replication stats  and data usage info from crawler.
   437  func getLatestReplicationStats(bucket string, u madmin.BucketUsageInfo) (s BucketReplicationStats) {
   438  	bucketStats := GlobalNotificationSys.GetClusterBucketStats(GlobalContext, bucket)
   439  
   440  	replStats := BucketReplicationStats{}
   441  	for _, bucketStat := range bucketStats {
   442  		replStats.FailedCount += bucketStat.ReplicationStats.FailedCount
   443  		replStats.FailedSize += bucketStat.ReplicationStats.FailedSize
   444  		replStats.PendingCount += bucketStat.ReplicationStats.PendingCount
   445  		replStats.PendingSize += bucketStat.ReplicationStats.PendingSize
   446  		replStats.ReplicaSize += bucketStat.ReplicationStats.ReplicaSize
   447  		replStats.ReplicatedSize += bucketStat.ReplicationStats.ReplicatedSize
   448  	}
   449  	usageStat := globalReplicationStats.GetInitialUsage(bucket)
   450  	replStats.FailedCount += usageStat.FailedCount
   451  	replStats.FailedSize += usageStat.FailedSize
   452  	replStats.PendingCount += usageStat.PendingCount
   453  	replStats.PendingSize += usageStat.PendingSize
   454  	replStats.ReplicaSize += usageStat.ReplicaSize
   455  	replStats.ReplicatedSize += usageStat.ReplicatedSize
   456  
   457  	// use in memory replication stats if it is ahead of usage info.
   458  	if replStats.ReplicatedSize >= u.ReplicatedSize {
   459  		s.ReplicatedSize = replStats.ReplicatedSize
   460  	} else {
   461  		s.ReplicatedSize = u.ReplicatedSize
   462  	}
   463  
   464  	if replStats.PendingSize > u.ReplicationPendingSize {
   465  		s.PendingSize = replStats.PendingSize
   466  	} else {
   467  		s.PendingSize = u.ReplicationPendingSize
   468  	}
   469  
   470  	if replStats.FailedSize > u.ReplicationFailedSize {
   471  		s.FailedSize = replStats.FailedSize
   472  	} else {
   473  		s.FailedSize = u.ReplicationFailedSize
   474  	}
   475  
   476  	if replStats.ReplicaSize > u.ReplicaSize {
   477  		s.ReplicaSize = replStats.ReplicaSize
   478  	} else {
   479  		s.ReplicaSize = u.ReplicaSize
   480  	}
   481  
   482  	if replStats.PendingCount > u.ReplicationPendingCount {
   483  		s.PendingCount = replStats.PendingCount
   484  	} else {
   485  		s.PendingCount = u.ReplicationPendingCount
   486  	}
   487  
   488  	if replStats.FailedCount > u.ReplicationFailedCount {
   489  		s.FailedCount = replStats.FailedCount
   490  	} else {
   491  		s.FailedCount = u.ReplicationFailedCount
   492  	}
   493  
   494  	return s
   495  }
   496  
   497  // Populates prometheus with bucket usage metrics, this metrics
   498  // is only enabled if scanner is enabled.
   499  func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) {
   500  	objLayer := newObjectLayerFn()
   501  	// Service not initialized yet
   502  	if objLayer == nil {
   503  		return
   504  	}
   505  
   506  	if GlobalIsGateway {
   507  		return
   508  	}
   509  
   510  	dataUsageInfo, err := loadDataUsageFromBackend(GlobalContext, objLayer)
   511  	if err != nil {
   512  		return
   513  	}
   514  	// data usage has not captured any data yet.
   515  	if dataUsageInfo.LastUpdate.IsZero() {
   516  		return
   517  	}
   518  
   519  	for bucket, usageInfo := range dataUsageInfo.BucketsUsage {
   520  		stat := getLatestReplicationStats(bucket, usageInfo)
   521  		// Total space used by bucket
   522  		ch <- prometheus.MustNewConstMetric(
   523  			prometheus.NewDesc(
   524  				prometheus.BuildFQName(bucketNamespace, "usage", "size"),
   525  				"Total bucket size",
   526  				[]string{"bucket"}, nil),
   527  			prometheus.GaugeValue,
   528  			float64(usageInfo.Size),
   529  			bucket,
   530  		)
   531  		ch <- prometheus.MustNewConstMetric(
   532  			prometheus.NewDesc(
   533  				prometheus.BuildFQName(bucketNamespace, "objects", "count"),
   534  				"Total number of objects in a bucket",
   535  				[]string{"bucket"}, nil),
   536  			prometheus.GaugeValue,
   537  			float64(usageInfo.ObjectsCount),
   538  			bucket,
   539  		)
   540  		ch <- prometheus.MustNewConstMetric(
   541  			prometheus.NewDesc(
   542  				prometheus.BuildFQName("bucket", "replication", "pending_size"),
   543  				"Total capacity pending to be replicated",
   544  				[]string{"bucket"}, nil),
   545  			prometheus.GaugeValue,
   546  			float64(stat.PendingSize),
   547  			bucket,
   548  		)
   549  		ch <- prometheus.MustNewConstMetric(
   550  			prometheus.NewDesc(
   551  				prometheus.BuildFQName("bucket", "replication", "failed_size"),
   552  				"Total capacity failed to replicate at least once",
   553  				[]string{"bucket"}, nil),
   554  			prometheus.GaugeValue,
   555  			float64(stat.FailedSize),
   556  			bucket,
   557  		)
   558  		ch <- prometheus.MustNewConstMetric(
   559  			prometheus.NewDesc(
   560  				prometheus.BuildFQName("bucket", "replication", "successful_size"),
   561  				"Total capacity replicated to destination",
   562  				[]string{"bucket"}, nil),
   563  			prometheus.GaugeValue,
   564  			float64(stat.ReplicatedSize),
   565  			bucket,
   566  		)
   567  		ch <- prometheus.MustNewConstMetric(
   568  			prometheus.NewDesc(
   569  				prometheus.BuildFQName("bucket", "replication", "received_size"),
   570  				"Total capacity replicated to this instance",
   571  				[]string{"bucket"}, nil),
   572  			prometheus.GaugeValue,
   573  			float64(stat.ReplicaSize),
   574  			bucket,
   575  		)
   576  		ch <- prometheus.MustNewConstMetric(
   577  			prometheus.NewDesc(
   578  				prometheus.BuildFQName("bucket", "replication", "pending_count"),
   579  				"Total replication operations pending",
   580  				[]string{"bucket"}, nil),
   581  			prometheus.GaugeValue,
   582  			float64(stat.PendingCount),
   583  			bucket,
   584  		)
   585  		ch <- prometheus.MustNewConstMetric(
   586  			prometheus.NewDesc(
   587  				prometheus.BuildFQName("bucket", "replication", "failed_count"),
   588  				"Total replication operations failed",
   589  				[]string{"bucket"}, nil),
   590  			prometheus.GaugeValue,
   591  			float64(stat.FailedCount),
   592  			bucket,
   593  		)
   594  		for k, v := range usageInfo.ObjectSizesHistogram {
   595  			ch <- prometheus.MustNewConstMetric(
   596  				prometheus.NewDesc(
   597  					prometheus.BuildFQName(bucketNamespace, "objects", "histogram"),
   598  					"Total number of objects of different sizes in a bucket",
   599  					[]string{"bucket", "object_size"}, nil),
   600  				prometheus.GaugeValue,
   601  				float64(v),
   602  				bucket,
   603  				k,
   604  			)
   605  		}
   606  	}
   607  }
   608  
   609  // collects storage metrics for MinIO server in Prometheus specific format
   610  // and sends to given channel
   611  func storageMetricsPrometheus(ch chan<- prometheus.Metric) {
   612  	objLayer := newObjectLayerFn()
   613  	// Service not initialized yet
   614  	if objLayer == nil {
   615  		return
   616  	}
   617  
   618  	if GlobalIsGateway {
   619  		return
   620  	}
   621  
   622  	server := getLocalServerProperty(globalEndpoints, &http.Request{
   623  		Host: globalLocalNodeName,
   624  	})
   625  
   626  	onlineDisks, offlineDisks := getOnlineOfflineDisksStats(server.Disks)
   627  	totalDisks := offlineDisks.Merge(onlineDisks)
   628  
   629  	// Report total capacity
   630  	ch <- prometheus.MustNewConstMetric(
   631  		prometheus.NewDesc(
   632  			prometheus.BuildFQName(minioNamespace, "capacity_raw", "total"),
   633  			"Total capacity online in the cluster",
   634  			nil, nil),
   635  		prometheus.GaugeValue,
   636  		float64(GetTotalCapacity(server.Disks)),
   637  	)
   638  
   639  	// Report total capacity free
   640  	ch <- prometheus.MustNewConstMetric(
   641  		prometheus.NewDesc(
   642  			prometheus.BuildFQName(minioNamespace, "capacity_raw_free", "total"),
   643  			"Total free capacity online in the cluster",
   644  			nil, nil),
   645  		prometheus.GaugeValue,
   646  		float64(GetTotalCapacityFree(server.Disks)),
   647  	)
   648  
   649  	s, _ := objLayer.StorageInfo(GlobalContext)
   650  	// Report total usable capacity
   651  	ch <- prometheus.MustNewConstMetric(
   652  		prometheus.NewDesc(
   653  			prometheus.BuildFQName(minioNamespace, "capacity_usable", "total"),
   654  			"Total usable capacity online in the cluster",
   655  			nil, nil),
   656  		prometheus.GaugeValue,
   657  		GetTotalUsableCapacity(server.Disks, s),
   658  	)
   659  	// Report total usable capacity free
   660  	ch <- prometheus.MustNewConstMetric(
   661  		prometheus.NewDesc(
   662  			prometheus.BuildFQName(minioNamespace, "capacity_usable_free", "total"),
   663  			"Total free usable capacity online in the cluster",
   664  			nil, nil),
   665  		prometheus.GaugeValue,
   666  		GetTotalUsableCapacityFree(server.Disks, s),
   667  	)
   668  
   669  	// MinIO Offline Disks per node
   670  	ch <- prometheus.MustNewConstMetric(
   671  		prometheus.NewDesc(
   672  			prometheus.BuildFQName(minioNamespace, "disks", "offline"),
   673  			"Total number of offline disks in current MinIO server instance",
   674  			nil, nil),
   675  		prometheus.GaugeValue,
   676  		float64(offlineDisks.Sum()),
   677  	)
   678  
   679  	// MinIO Total Disks per node
   680  	ch <- prometheus.MustNewConstMetric(
   681  		prometheus.NewDesc(
   682  			prometheus.BuildFQName(minioNamespace, "disks", "total"),
   683  			"Total number of disks for current MinIO server instance",
   684  			nil, nil),
   685  		prometheus.GaugeValue,
   686  		float64(totalDisks.Sum()),
   687  	)
   688  
   689  	for _, disk := range server.Disks {
   690  		// Total disk usage by the disk
   691  		ch <- prometheus.MustNewConstMetric(
   692  			prometheus.NewDesc(
   693  				prometheus.BuildFQName(diskNamespace, "storage", "used"),
   694  				"Total disk storage used on the disk",
   695  				[]string{"disk"}, nil),
   696  			prometheus.GaugeValue,
   697  			float64(disk.UsedSpace),
   698  			disk.DrivePath,
   699  		)
   700  
   701  		// Total available space in the disk
   702  		ch <- prometheus.MustNewConstMetric(
   703  			prometheus.NewDesc(
   704  				prometheus.BuildFQName(diskNamespace, "storage", "available"),
   705  				"Total available space left on the disk",
   706  				[]string{"disk"}, nil),
   707  			prometheus.GaugeValue,
   708  			float64(disk.AvailableSpace),
   709  			disk.DrivePath,
   710  		)
   711  
   712  		// Total storage space of the disk
   713  		ch <- prometheus.MustNewConstMetric(
   714  			prometheus.NewDesc(
   715  				prometheus.BuildFQName(diskNamespace, "storage", "total"),
   716  				"Total space on the disk",
   717  				[]string{"disk"}, nil),
   718  			prometheus.GaugeValue,
   719  			float64(disk.TotalSpace),
   720  			disk.DrivePath,
   721  		)
   722  	}
   723  }
   724  
   725  func metricsHandler() http.Handler {
   726  
   727  	registry := prometheus.NewRegistry()
   728  
   729  	err := registry.Register(minioVersionInfo)
   730  	logger.LogIf(GlobalContext, err)
   731  
   732  	err = registry.Register(httpRequestsDuration)
   733  	logger.LogIf(GlobalContext, err)
   734  
   735  	err = registry.Register(newMinioCollector())
   736  	logger.LogIf(GlobalContext, err)
   737  
   738  	gatherers := prometheus.Gatherers{
   739  		prometheus.DefaultGatherer,
   740  		registry,
   741  	}
   742  	// Delegate http serving to Prometheus client library, which will call collector.Collect.
   743  	return promhttp.InstrumentMetricHandler(
   744  		registry,
   745  		promhttp.HandlerFor(gatherers,
   746  			promhttp.HandlerOpts{
   747  				ErrorHandling: promhttp.ContinueOnError,
   748  			}),
   749  	)
   750  
   751  }
   752  
   753  // AuthMiddleware checks if the bearer token is valid and authorized.
   754  func AuthMiddleware(h http.Handler) http.Handler {
   755  	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   756  		claims, owner, authErr := webRequestAuthenticate(r)
   757  		if authErr != nil || !claims.VerifyIssuer("prometheus", true) {
   758  			w.WriteHeader(http.StatusForbidden)
   759  			return
   760  		}
   761  		// For authenticated users apply IAM policy.
   762  		if !GlobalIAMSys.IsAllowed(iampolicy.Args{
   763  			AccountName:     claims.AccessKey,
   764  			Action:          iampolicy.PrometheusAdminAction,
   765  			ConditionValues: getConditionValues(r, "", claims.AccessKey, claims.Map()),
   766  			IsOwner:         owner,
   767  			Claims:          claims.Map(),
   768  		}) {
   769  			w.WriteHeader(http.StatusForbidden)
   770  			return
   771  		}
   772  		h.ServeHTTP(w, r)
   773  	})
   774  }