
     1  // Copyright (c) 2015-2023 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <>.
    18  package cmd
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"math"
    24  	"net/http"
    25  	"sync"
    26  	"time"
    28  	""
    29  	""
    30  )
    32  const (
    33  	resourceMetricsCollectionInterval = time.Minute
    34  	resourceMetricsCacheInterval      = time.Minute
    36  	// drive stats
    37  	totalInodes    MetricName = "total_inodes"
    38  	readsPerSec    MetricName = "reads_per_sec"
    39  	writesPerSec   MetricName = "writes_per_sec"
    40  	readsKBPerSec  MetricName = "reads_kb_per_sec"
    41  	writesKBPerSec MetricName = "writes_kb_per_sec"
    42  	readsAwait     MetricName = "reads_await"
    43  	writesAwait    MetricName = "writes_await"
    44  	percUtil       MetricName = "perc_util"
    45  	usedInodes     MetricName = "used_inodes"
    47  	// network stats
    48  	interfaceRxBytes  MetricName = "rx_bytes"
    49  	interfaceRxErrors MetricName = "rx_errors"
    50  	interfaceTxBytes  MetricName = "tx_bytes"
    51  	interfaceTxErrors MetricName = "tx_errors"
    53  	// memory stats
    54  	memUsed      MetricName = "used"
    55  	memUsedPerc  MetricName = "used_perc"
    56  	memFree      MetricName = "free"
    57  	memShared    MetricName = "shared"
    58  	memBuffers   MetricName = "buffers"
    59  	memCache     MetricName = "cache"
    60  	memAvailable MetricName = "available"
    62  	// cpu stats
    63  	cpuUser       MetricName = "user"
    64  	cpuSystem     MetricName = "system"
    65  	cpuIOWait     MetricName = "iowait"
    66  	cpuIdle       MetricName = "idle"
    67  	cpuNice       MetricName = "nice"
    68  	cpuSteal      MetricName = "steal"
    69  	cpuLoad1      MetricName = "load1"
    70  	cpuLoad5      MetricName = "load5"
    71  	cpuLoad15     MetricName = "load15"
    72  	cpuLoad1Perc  MetricName = "load1_perc"
    73  	cpuLoad5Perc  MetricName = "load5_perc"
    74  	cpuLoad15Perc MetricName = "load15_perc"
    75  )
    77  var (
    78  	resourceCollector *minioResourceCollector
    79  	// resourceMetricsMap is a map of subsystem to its metrics
    80  	resourceMetricsMap   map[MetricSubsystem]ResourceMetrics
    81  	resourceMetricsMapMu sync.RWMutex
    82  	// resourceMetricsHelpMap maps metric name to its help string
    83  	resourceMetricsHelpMap map[MetricName]string
    84  	resourceMetricsGroups  []*MetricsGroupV2
    85  	// initial values for drives (at the time  of server startup)
    86  	// used for calculating avg values for drive metrics
    87  	latestDriveStats      map[string]madmin.DiskIOStats
    88  	latestDriveStatsMu    sync.RWMutex
    89  	lastDriveStatsRefresh time.Time
    90  )
    92  // PeerResourceMetrics represents the resource metrics
    93  // retrieved from a peer, along with errors if any
    94  type PeerResourceMetrics struct {
    95  	Metrics map[MetricSubsystem]ResourceMetrics
    96  	Errors  []string
    97  }
    99  // ResourceMetrics is a map of unique key identifying
   100  // a resource metric (e.g. reads_per_sec_{node}_{drive})
   101  // to its data
   102  type ResourceMetrics map[string]ResourceMetric
   104  // ResourceMetric represents a single resource metric
   105  // The metrics are collected from all servers periodically
   106  // and stored in the resource metrics map.
   107  // It also maintains the count of number of times this metric
   108  // was collected since the server started, and the sum,
   109  // average and max values across the same.
   110  type ResourceMetric struct {
   111  	Name   MetricName
   112  	Labels map[string]string
   114  	// value captured in current cycle
   115  	Current float64
   117  	// Used when system provides cumulative (since uptime) values
   118  	// helps in calculating the current value by comparing the new
   119  	// cumulative value with previous one
   120  	Cumulative float64
   122  	Max   float64
   123  	Avg   float64
   124  	Sum   float64
   125  	Count uint64
   126  }
   128  func init() {
   129  	interval := fmt.Sprintf("%ds", int(resourceMetricsCollectionInterval.Seconds()))
   130  	resourceMetricsHelpMap = map[MetricName]string{
   131  		interfaceRxBytes:  "Bytes received on the interface in " + interval,
   132  		interfaceRxErrors: "Receive errors in " + interval,
   133  		interfaceTxBytes:  "Bytes transmitted in " + interval,
   134  		interfaceTxErrors: "Transmit errors in " + interval,
   135  		total:             "Total memory on the node",
   136  		memUsed:           "Used memory on the node",
   137  		memUsedPerc:       "Used memory percentage on the node",
   138  		memFree:           "Free memory on the node",
   139  		memShared:         "Shared memory on the node",
   140  		memBuffers:        "Buffers memory on the node",
   141  		memCache:          "Cache memory on the node",
   142  		memAvailable:      "Available memory on the node",
   143  		readsPerSec:       "Reads per second on a drive",
   144  		writesPerSec:      "Writes per second on a drive",
   145  		readsKBPerSec:     "Kilobytes read per second on a drive",
   146  		writesKBPerSec:    "Kilobytes written per second on a drive",
   147  		readsAwait:        "Average time for read requests to be served on a drive",
   148  		writesAwait:       "Average time for write requests to be served on a drive",
   149  		percUtil:          "Percentage of time the disk was busy",
   150  		usedBytes:         "Used bytes on a drive",
   151  		totalBytes:        "Total bytes on a drive",
   152  		usedInodes:        "Total inodes used on a drive",
   153  		totalInodes:       "Total inodes on a drive",
   154  		cpuUser:           "CPU user time",
   155  		cpuSystem:         "CPU system time",
   156  		cpuIdle:           "CPU idle time",
   157  		cpuIOWait:         "CPU ioWait time",
   158  		cpuSteal:          "CPU steal time",
   159  		cpuNice:           "CPU nice time",
   160  		cpuLoad1:          "CPU load average 1min",
   161  		cpuLoad5:          "CPU load average 5min",
   162  		cpuLoad15:         "CPU load average 15min",
   163  		cpuLoad1Perc:      "CPU load average 1min (perentage)",
   164  		cpuLoad5Perc:      "CPU load average 5min (percentage)",
   165  		cpuLoad15Perc:     "CPU load average 15min (percentage)",
   166  	}
   167  	resourceMetricsGroups = []*MetricsGroupV2{
   168  		getResourceMetrics(),
   169  	}
   171  	resourceCollector = newMinioResourceCollector(resourceMetricsGroups)
   172  }
   174  func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64, labels map[string]string, isCumulative bool) {
   175  	resourceMetricsMapMu.Lock()
   176  	defer resourceMetricsMapMu.Unlock()
   177  	subsysMetrics, found := resourceMetricsMap[subSys]
   178  	if !found {
   179  		subsysMetrics = ResourceMetrics{}
   180  	}
   182  	// labels are used to uniquely identify a metric
   183  	// e.g. reads_per_sec_{drive} inside the map
   184  	sfx := ""
   185  	for _, v := range labels {
   186  		if len(sfx) > 0 {
   187  			sfx += "_"
   188  		}
   189  		sfx += v
   190  	}
   192  	key := string(name) + "_" + sfx
   193  	metric, found := subsysMetrics[key]
   194  	if !found {
   195  		metric = ResourceMetric{
   196  			Name:   name,
   197  			Labels: labels,
   198  		}
   199  	}
   201  	if isCumulative {
   202  		metric.Current = val - metric.Cumulative
   203  		metric.Cumulative = val
   204  	} else {
   205  		metric.Current = val
   206  	}
   208  	if metric.Current > metric.Max {
   209  		metric.Max = val
   210  	}
   212  	metric.Sum += metric.Current
   213  	metric.Count++
   215  	metric.Avg = metric.Sum / float64(metric.Count)
   216  	subsysMetrics[key] = metric
   218  	resourceMetricsMap[subSys] = subsysMetrics
   219  }
   221  // updateDriveIOStats - Updates the drive IO stats by calculating the difference between the current and latest updated values.
   222  func updateDriveIOStats(currentStats madmin.DiskIOStats, latestStats madmin.DiskIOStats, labels map[string]string) {
   223  	sectorSize := uint64(512)
   224  	kib := float64(1 << 10)
   225  	diffInSeconds := time.Now().UTC().Sub(lastDriveStatsRefresh).Seconds()
   226  	if diffInSeconds == 0 {
   227  		// too soon to update the stats
   228  		return
   229  	}
   230  	diffStats := madmin.DiskIOStats{
   231  		ReadIOs:      currentStats.ReadIOs - latestStats.ReadIOs,
   232  		WriteIOs:     currentStats.WriteIOs - latestStats.WriteIOs,
   233  		ReadTicks:    currentStats.ReadTicks - latestStats.ReadTicks,
   234  		WriteTicks:   currentStats.WriteTicks - latestStats.WriteTicks,
   235  		TotalTicks:   currentStats.TotalTicks - latestStats.TotalTicks,
   236  		ReadSectors:  currentStats.ReadSectors - latestStats.ReadSectors,
   237  		WriteSectors: currentStats.WriteSectors - latestStats.WriteSectors,
   238  	}
   240  	updateResourceMetrics(driveSubsystem, readsPerSec, float64(diffStats.ReadIOs)/diffInSeconds, labels, false)
   241  	readKib := float64(diffStats.ReadSectors*sectorSize) / kib
   242  	updateResourceMetrics(driveSubsystem, readsKBPerSec, readKib/diffInSeconds, labels, false)
   244  	updateResourceMetrics(driveSubsystem, writesPerSec, float64(diffStats.WriteIOs)/diffInSeconds, labels, false)
   245  	writeKib := float64(diffStats.WriteSectors*sectorSize) / kib
   246  	updateResourceMetrics(driveSubsystem, writesKBPerSec, writeKib/diffInSeconds, labels, false)
   248  	rdAwait := 0.0
   249  	if diffStats.ReadIOs > 0 {
   250  		rdAwait = float64(diffStats.ReadTicks) / float64(diffStats.ReadIOs)
   251  	}
   252  	updateResourceMetrics(driveSubsystem, readsAwait, rdAwait, labels, false)
   254  	wrAwait := 0.0
   255  	if diffStats.WriteIOs > 0 {
   256  		wrAwait = float64(diffStats.WriteTicks) / float64(diffStats.WriteIOs)
   257  	}
   258  	updateResourceMetrics(driveSubsystem, writesAwait, wrAwait, labels, false)
   259  	updateResourceMetrics(driveSubsystem, percUtil, float64(diffStats.TotalTicks)/(diffInSeconds*10), labels, false)
   260  }
   262  func collectDriveMetrics(m madmin.RealtimeMetrics) {
   263  	latestDriveStatsMu.Lock()
   264  	for d, dm := range m.ByDisk {
   265  		labels := map[string]string{"drive": d}
   266  		latestStats, ok := latestDriveStats[d]
   267  		if !ok {
   268  			latestDriveStats[d] = dm.IOStats
   269  			continue
   270  		}
   271  		updateDriveIOStats(dm.IOStats, latestStats, labels)
   272  		latestDriveStats[d] = dm.IOStats
   273  	}
   274  	lastDriveStatsRefresh = time.Now().UTC()
   275  	latestDriveStatsMu.Unlock()
   277  	globalLocalDrivesMu.RLock()
   278  	localDrives := cloneDrives(globalLocalDrives)
   279  	globalLocalDrivesMu.RUnlock()
   281  	for _, d := range localDrives {
   282  		di, err := d.DiskInfo(GlobalContext, DiskInfoOptions{})
   283  		labels := map[string]string{"drive": di.Endpoint}
   284  		if err == nil {
   285  			updateResourceMetrics(driveSubsystem, usedBytes, float64(di.Used), labels, false)
   286  			updateResourceMetrics(driveSubsystem, totalBytes, float64(di.Total), labels, false)
   287  			updateResourceMetrics(driveSubsystem, usedInodes, float64(di.UsedInodes), labels, false)
   288  			updateResourceMetrics(driveSubsystem, totalInodes, float64(di.FreeInodes+di.UsedInodes), labels, false)
   289  		}
   290  	}
   291  }
   293  func collectLocalResourceMetrics() {
   294  	var types madmin.MetricType = madmin.MetricsDisk | madmin.MetricNet | madmin.MetricsMem | madmin.MetricsCPU
   296  	m := collectLocalMetrics(types, collectMetricsOpts{
   297  		hosts: map[string]struct{}{
   298  			globalLocalNodeName: {},
   299  		},
   300  	})
   302  	for host, hm := range m.ByHost {
   303  		if len(host) > 0 {
   304  			if hm.Net != nil && len(hm.Net.NetStats.Name) > 0 {
   305  				stats := hm.Net.NetStats
   306  				labels := map[string]string{"interface": stats.Name}
   307  				updateResourceMetrics(interfaceSubsystem, interfaceRxBytes, float64(stats.RxBytes), labels, true)
   308  				updateResourceMetrics(interfaceSubsystem, interfaceRxErrors, float64(stats.RxErrors), labels, true)
   309  				updateResourceMetrics(interfaceSubsystem, interfaceTxBytes, float64(stats.TxBytes), labels, true)
   310  				updateResourceMetrics(interfaceSubsystem, interfaceTxErrors, float64(stats.TxErrors), labels, true)
   311  			}
   312  			if hm.Mem != nil && len(hm.Mem.Info.Addr) > 0 {
   313  				labels := map[string]string{}
   314  				stats := hm.Mem.Info
   315  				updateResourceMetrics(memSubsystem, total, float64(stats.Total), labels, false)
   316  				updateResourceMetrics(memSubsystem, memUsed, float64(stats.Used), labels, false)
   317  				perc := math.Round(float64(stats.Used*100*100)/float64(stats.Total)) / 100
   318  				updateResourceMetrics(memSubsystem, memUsedPerc, perc, labels, false)
   319  				updateResourceMetrics(memSubsystem, memFree, float64(stats.Free), labels, false)
   320  				updateResourceMetrics(memSubsystem, memShared, float64(stats.Shared), labels, false)
   321  				updateResourceMetrics(memSubsystem, memBuffers, float64(stats.Buffers), labels, false)
   322  				updateResourceMetrics(memSubsystem, memAvailable, float64(stats.Available), labels, false)
   323  				updateResourceMetrics(memSubsystem, memCache, float64(stats.Cache), labels, false)
   324  			}
   325  			if hm.CPU != nil {
   326  				labels := map[string]string{}
   327  				ts := hm.CPU.TimesStat
   328  				if ts != nil {
   329  					tot := ts.User + ts.System + ts.Idle + ts.Iowait + ts.Nice + ts.Steal
   330  					cpuUserVal := math.Round(ts.User/tot*100*100) / 100
   331  					updateResourceMetrics(cpuSubsystem, cpuUser, cpuUserVal, labels, false)
   332  					cpuSystemVal := math.Round(ts.System/tot*100*100) / 100
   333  					updateResourceMetrics(cpuSubsystem, cpuSystem, cpuSystemVal, labels, false)
   334  					cpuIdleVal := math.Round(ts.Idle/tot*100*100) / 100
   335  					updateResourceMetrics(cpuSubsystem, cpuIdle, cpuIdleVal, labels, false)
   336  					cpuIOWaitVal := math.Round(ts.Iowait/tot*100*100) / 100
   337  					updateResourceMetrics(cpuSubsystem, cpuIOWait, cpuIOWaitVal, labels, false)
   338  					cpuNiceVal := math.Round(ts.Nice/tot*100*100) / 100
   339  					updateResourceMetrics(cpuSubsystem, cpuNice, cpuNiceVal, labels, false)
   340  					cpuStealVal := math.Round(ts.Steal/tot*100*100) / 100
   341  					updateResourceMetrics(cpuSubsystem, cpuSteal, cpuStealVal, labels, false)
   342  				}
   343  				ls := hm.CPU.LoadStat
   344  				if ls != nil {
   345  					updateResourceMetrics(cpuSubsystem, cpuLoad1, ls.Load1, labels, false)
   346  					updateResourceMetrics(cpuSubsystem, cpuLoad5, ls.Load5, labels, false)
   347  					updateResourceMetrics(cpuSubsystem, cpuLoad15, ls.Load15, labels, false)
   348  					if hm.CPU.CPUCount > 0 {
   349  						perc := math.Round(ls.Load1*100*100/float64(hm.CPU.CPUCount)) / 100
   350  						updateResourceMetrics(cpuSubsystem, cpuLoad1Perc, perc, labels, false)
   351  						perc = math.Round(ls.Load5*100*100/float64(hm.CPU.CPUCount)) / 100
   352  						updateResourceMetrics(cpuSubsystem, cpuLoad5Perc, perc, labels, false)
   353  						perc = math.Round(ls.Load15*100*100/float64(hm.CPU.CPUCount)) / 100
   354  						updateResourceMetrics(cpuSubsystem, cpuLoad15Perc, perc, labels, false)
   355  					}
   356  				}
   357  			}
   358  			break // only one host expected
   359  		}
   360  	}
   362  	collectDriveMetrics(m)
   363  }
   365  func initLatestValues() {
   366  	m := collectLocalMetrics(madmin.MetricsDisk, collectMetricsOpts{
   367  		hosts: map[string]struct{}{
   368  			globalLocalNodeName: {},
   369  		},
   370  	})
   372  	latestDriveStatsMu.Lock()
   373  	latestDriveStats = map[string]madmin.DiskIOStats{}
   374  	for d, dm := range m.ByDisk {
   375  		latestDriveStats[d] = dm.IOStats
   376  	}
   377  	lastDriveStatsRefresh = time.Now().UTC()
   378  	latestDriveStatsMu.Unlock()
   379  }
   381  // startResourceMetricsCollection - starts the job for collecting resource metrics
   382  func startResourceMetricsCollection() {
   383  	initLatestValues()
   385  	resourceMetricsMapMu.Lock()
   386  	resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{}
   387  	resourceMetricsMapMu.Unlock()
   388  	metricsTimer := time.NewTimer(resourceMetricsCollectionInterval)
   389  	defer metricsTimer.Stop()
   391  	collectLocalResourceMetrics()
   393  	for {
   394  		select {
   395  		case <-GlobalContext.Done():
   396  			return
   397  		case <-metricsTimer.C:
   398  			collectLocalResourceMetrics()
   400  			// Reset the timer for next cycle.
   401  			metricsTimer.Reset(resourceMetricsCollectionInterval)
   402  		}
   403  	}
   404  }
   406  // minioResourceCollector is the Collector for resource metrics
   407  type minioResourceCollector struct {
   408  	metricsGroups []*MetricsGroupV2
   409  	desc          *prometheus.Desc
   410  }
   412  // Describe sends the super-set of all possible descriptors of metrics
   413  func (c *minioResourceCollector) Describe(ch chan<- *prometheus.Desc) {
   414  	ch <- c.desc
   415  }
   417  // Collect is called by the Prometheus registry when collecting metrics.
   418  func (c *minioResourceCollector) Collect(out chan<- prometheus.Metric) {
   419  	var wg sync.WaitGroup
   420  	publish := func(in <-chan MetricV2) {
   421  		defer wg.Done()
   422  		for metric := range in {
   423  			labels, values := getOrderedLabelValueArrays(metric.VariableLabels)
   424  			collectMetric(metric, labels, values, "resource", out)
   425  		}
   426  	}
   428  	// Call peer api to fetch metrics
   429  	wg.Add(2)
   430  	go publish(ReportMetrics(GlobalContext, c.metricsGroups))
   431  	go publish(globalNotificationSys.GetResourceMetrics(GlobalContext))
   432  	wg.Wait()
   433  }
   435  // newMinioResourceCollector describes the collector
   436  // and returns reference of minio resource Collector
   437  // It creates the Prometheus Description which is used
   438  // to define Metric and  help string
   439  func newMinioResourceCollector(metricsGroups []*MetricsGroupV2) *minioResourceCollector {
   440  	return &minioResourceCollector{
   441  		metricsGroups: metricsGroups,
   442  		desc:          prometheus.NewDesc("minio_resource_stats", "Resource statistics exposed by MinIO server", nil, nil),
   443  	}
   444  }
   446  func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []MetricV2 {
   447  	help := resourceMetricsHelpMap[rm.Name]
   448  	name := rm.Name
   449  	metrics := make([]MetricV2, 0, 3)
   450  	metrics = append(metrics, MetricV2{
   451  		Description:    getResourceMetricDescription(subSys, name, help),
   452  		Value:          rm.Current,
   453  		VariableLabels: cloneMSS(rm.Labels),
   454  	})
   456  	if requireAvgMax {
   457  		avgName := MetricName(fmt.Sprintf("%s_avg", name))
   458  		avgHelp := fmt.Sprintf("%s (avg)", help)
   459  		metrics = append(metrics, MetricV2{
   460  			Description:    getResourceMetricDescription(subSys, avgName, avgHelp),
   461  			Value:          math.Round(rm.Avg*100) / 100,
   462  			VariableLabels: cloneMSS(rm.Labels),
   463  		})
   465  		maxName := MetricName(fmt.Sprintf("%s_max", name))
   466  		maxHelp := fmt.Sprintf("%s (max)", help)
   467  		metrics = append(metrics, MetricV2{
   468  			Description:    getResourceMetricDescription(subSys, maxName, maxHelp),
   469  			Value:          rm.Max,
   470  			VariableLabels: cloneMSS(rm.Labels),
   471  		})
   472  	}
   474  	return metrics
   475  }
   477  func getResourceMetricDescription(subSys MetricSubsystem, name MetricName, help string) MetricDescription {
   478  	return MetricDescription{
   479  		Namespace: nodeMetricNamespace,
   480  		Subsystem: subSys,
   481  		Name:      name,
   482  		Help:      help,
   483  		Type:      gaugeMetric,
   484  	}
   485  }
   487  func getResourceMetrics() *MetricsGroupV2 {
   488  	mg := &MetricsGroupV2{
   489  		cacheInterval: resourceMetricsCacheInterval,
   490  	}
   491  	mg.RegisterRead(func(ctx context.Context) []MetricV2 {
   492  		metrics := []MetricV2{}
   494  		subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem}
   495  		resourceMetricsMapMu.RLock()
   496  		defer resourceMetricsMapMu.RUnlock()
   497  		for _, subSys := range subSystems {
   498  			stats, found := resourceMetricsMap[subSys]
   499  			if found {
   500  				requireAvgMax := true
   501  				if subSys == driveSubsystem {
   502  					requireAvgMax = false
   503  				}
   504  				for _, m := range stats {
   505  					metrics = append(metrics, prepareResourceMetrics(m, subSys, requireAvgMax)...)
   506  				}
   507  			}
   508  		}
   510  		return metrics
   511  	})
   512  	return mg
   513  }
   515  // metricsResourceHandler is the prometheus handler for resource metrics
   516  func metricsResourceHandler() http.Handler {
   517  	return metricsHTTPHandler(resourceCollector, "handler.MetricsResource")
   518  }