github.com/openebs/node-disk-manager@v1.9.1-0.20230225014141-4531f06ffa1e/pkg/metrics/smart/metrics.go (about)

     1  /*
     2  Copyright 2019 The OpenEBS Authors
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package smart
    18  
    19  import (
    20  	"strings"
    21  
    22  	"github.com/prometheus/client_golang/prometheus"
    23  )
    24  
    25  // MetricsData is the prometheus metrics that are exposed by the exporter. This includes
    26  // all the metrics that are available via SMART
    27  // TODO additional smart metrics need to be added here
    28  type MetricsData struct {
    29  	// blockDeviceCurrentTemperatureValid tells whether the current temperature data is valid
    30  	blockDeviceCurrentTemperatureValid *prometheus.GaugeVec
    31  	// blockDeviceTemperature is the temperature of the the blockdevice if it is reported
    32  	blockDeviceCurrentTemperature *prometheus.GaugeVec
    33  
    34  	// blockDevicehighestTemperature is the highest temperature of the the blockdevice if it is reported
    35  	blockDeviceHighestTemperature *prometheus.GaugeVec
    36  
    37  	// blockDeviceHighestTemperatureValid tells whether the highest temperature data is valid
    38  	blockDeviceHighestTemperatureValid *prometheus.GaugeVec
    39  
    40  	// blockDevicelowestTemperature is the lowest temperature of the the blockdevice if it is reported
    41  	blockDeviceLowestTemperature *prometheus.GaugeVec
    42  
    43  	// blockDeviceLowestTemperatureValid tells whether the lowest temperature data is valid
    44  	blockDeviceLowestTemperatureValid *prometheus.GaugeVec
    45  
    46  	// blockDeviceCapacity is capacity of block device
    47  	blockDeviceCapacity *prometheus.GaugeVec
    48  
    49  	// blockDeviceTotalReadBytes is the total number of bytes read from the block device
    50  	blockDeviceTotalReadBytes *prometheus.CounterVec
    51  
    52  	// blockDeviceTotalWrittenBytes is the total number of bytes written from the block device
    53  	blockDeviceTotalWrittenBytes *prometheus.CounterVec
    54  
    55  	// blockDeviceUtilizationRate is utilization rate of the block device
    56  	blockDeviceUtilizationRate *prometheus.GaugeVec
    57  
    58  	// blockDevicePercentEnduranceUsed  is percentage of endurance used by a block device
    59  	blockDevicePercentEnduranceUsed *prometheus.GaugeVec
    60  
    61  	// errors and rejected requests
    62  	rejectRequestCount prometheus.Counter
    63  	errorRequestCount  prometheus.Counter
    64  }
    65  
    66  //MetricsLabels are the labels that are available on the prometheus metrics
    67  type MetricsLabels struct {
    68  	UUID     string
    69  	Path     string
    70  	HostName string
    71  	NodeName string
    72  }
    73  
    74  // Metrics defines the metrics data along with the labels present on those metrics.
    75  // The collector(currently seachest/smart) used to fetch the metrics is also defined
    76  type Metrics struct {
    77  	CollectorType string
    78  	MetricsData
    79  	MetricsLabels
    80  }
    81  
    82  // NewMetrics creates a new Metrics with the given collector type
    83  func NewMetrics(collector string) *Metrics {
    84  	return &Metrics{
    85  		CollectorType: collector,
    86  	}
    87  }
    88  
    89  // Collectors lists out all the collectors for which the metrics is exposed
    90  func (m *Metrics) Collectors() []prometheus.Collector {
    91  	return []prometheus.Collector{
    92  		m.blockDeviceCurrentTemperatureValid,
    93  		m.blockDeviceHighestTemperatureValid,
    94  		m.blockDeviceLowestTemperatureValid,
    95  		m.blockDeviceCurrentTemperature,
    96  		m.blockDeviceHighestTemperature,
    97  		m.blockDeviceLowestTemperature,
    98  		m.blockDeviceTotalReadBytes,
    99  		m.blockDeviceTotalWrittenBytes,
   100  		m.blockDeviceUtilizationRate,
   101  		m.blockDevicePercentEnduranceUsed,
   102  		m.rejectRequestCount,
   103  		m.errorRequestCount,
   104  	}
   105  }
   106  
   107  var labels []string = []string{"blockdevicename", "path", "hostname", "nodename"}
   108  
   109  // ErrorCollectors lists out all collectors for metrics related to error
   110  func (m *Metrics) ErrorCollectors() []prometheus.Collector {
   111  	return []prometheus.Collector{
   112  		m.rejectRequestCount,
   113  		m.errorRequestCount,
   114  	}
   115  }
   116  
   117  // IncRejectRequestCounter increments the reject request error counter
   118  func (m *Metrics) IncRejectRequestCounter() {
   119  	m.rejectRequestCount.Inc()
   120  }
   121  
   122  // IncErrorRequestCounter increments the no of requests errored out.
   123  func (m *Metrics) IncErrorRequestCounter() {
   124  	m.errorRequestCount.Inc()
   125  }
   126  
   127  // WithBlockDeviceCurrentTemperature declares the metric current temperature
   128  // as a prometheus metric
   129  func (m *Metrics) WithBlockDeviceCurrentTemperature() *Metrics {
   130  	m.blockDeviceCurrentTemperature = prometheus.NewGaugeVec(
   131  		prometheus.GaugeOpts{
   132  			Namespace: m.CollectorType,
   133  			Name:      "block_device_current_temperature_celsius",
   134  			Help:      `Current reported temperature of the blockdevice. -1 if not reported`,
   135  		},
   136  		labels,
   137  	)
   138  	return m
   139  }
   140  
   141  // WithBlockDeviceHighestTemperature declares the metric highest temperature
   142  // as a prometheus metric
   143  func (m *Metrics) WithBlockDeviceHighestTemperature() *Metrics {
   144  	m.blockDeviceHighestTemperature = prometheus.NewGaugeVec(
   145  		prometheus.GaugeOpts{
   146  			Namespace: m.CollectorType,
   147  			Name:      "block_device_highest_temperature_celsius",
   148  			Help:      `Highest reported temperature of the blockdevice. -1 if not reported`,
   149  		},
   150  		labels,
   151  	)
   152  	return m
   153  }
   154  
   155  // WithBlockDeviceLowestTemperature declares the metric lowest temperature
   156  // as a prometheus metric
   157  func (m *Metrics) WithBlockDeviceLowestTemperature() *Metrics {
   158  	m.blockDeviceLowestTemperature = prometheus.NewGaugeVec(
   159  		prometheus.GaugeOpts{
   160  			Namespace: m.CollectorType,
   161  			Name:      "block_device_lowest_temperature_celsius",
   162  			Help:      `Lowest reported temperature of the blockdevice. -1 if not reported`,
   163  		},
   164  		labels,
   165  	)
   166  	return m
   167  }
   168  
   169  // WithBlockDeviceCurrentTemperatureValid declares the metric current temperature valid
   170  // as a prometheus metric
   171  func (m *Metrics) WithBlockDeviceCurrentTemperatureValid() *Metrics {
   172  	m.blockDeviceCurrentTemperatureValid = prometheus.NewGaugeVec(
   173  		prometheus.GaugeOpts{
   174  			Namespace: m.CollectorType,
   175  			Name:      "block_device_current_temperature_valid",
   176  			Help:      `Validity of the current temperature data reported. 0 means not valid, 1 means valid`,
   177  		},
   178  		labels,
   179  	)
   180  	return m
   181  }
   182  
   183  // WithBlockDeviceHighestTemperatureValid declares the metric highest temperature valid
   184  // as a prometheus metric
   185  func (m *Metrics) WithBlockDeviceHighestTemperatureValid() *Metrics {
   186  	m.blockDeviceHighestTemperatureValid = prometheus.NewGaugeVec(
   187  		prometheus.GaugeOpts{
   188  			Namespace: m.CollectorType,
   189  			Name:      "block_device_highest_temperature_valid",
   190  			Help:      `Validity of the highest temperature data reported. 0 means not valid, 1 means valid`,
   191  		},
   192  		labels,
   193  	)
   194  	return m
   195  }
   196  
   197  // WithBlockDeviceLowestTemperatureValid declares the metric lowest temperature valid
   198  // as a prometheus metric
   199  func (m *Metrics) WithBlockDeviceLowestTemperatureValid() *Metrics {
   200  	m.blockDeviceLowestTemperatureValid = prometheus.NewGaugeVec(
   201  		prometheus.GaugeOpts{
   202  			Namespace: m.CollectorType,
   203  			Name:      "block_device_lowest_temperature_valid",
   204  			Help:      `Validity of the lowest temperature data reported. 0 means not valid, 1 means valid`,
   205  		},
   206  		labels,
   207  	)
   208  	return m
   209  }
   210  
   211  // WithBlockDeviceCapacity declares the blockdevice capacity
   212  func (m *Metrics) WithBlockDeviceCapacity() *Metrics {
   213  	m.blockDeviceCapacity = prometheus.NewGaugeVec(
   214  		prometheus.GaugeOpts{
   215  			Namespace: m.CollectorType,
   216  			Name:      "block_device_capacity_bytes",
   217  			Help:      `Capacity of the block device in bytes`,
   218  		},
   219  		labels,
   220  	)
   221  	return m
   222  }
   223  
   224  // WithBlockDeviceTotalBytesRead declares the total number of bytes read by a block device
   225  func (m *Metrics) WithBlockDeviceTotalBytesRead() *Metrics {
   226  	m.blockDeviceTotalReadBytes = prometheus.NewCounterVec(
   227  		prometheus.CounterOpts{
   228  			Namespace: m.CollectorType,
   229  			Name:      "block_device_total_read_bytes",
   230  			Help:      `total number of bytes read by a block device in bytes `,
   231  		},
   232  		labels,
   233  	)
   234  	return m
   235  }
   236  
   237  // WithBlockDeviceTotalBytesWritten declares the total number of bytes written by a block device
   238  func (m *Metrics) WithBlockDeviceTotalBytesWritten() *Metrics {
   239  	m.blockDeviceTotalWrittenBytes = prometheus.NewCounterVec(
   240  		prometheus.CounterOpts{
   241  			Namespace: m.CollectorType,
   242  			Name:      "block_device_total_written_bytes",
   243  			Help:      `total number of bytes written by a block device in bytes `,
   244  		},
   245  		labels,
   246  	)
   247  	return m
   248  }
   249  
   250  // WithBlockDeviceUtilizationRate declares the utilization rate of a block device
   251  func (m *Metrics) WithBlockDeviceUtilizationRate() *Metrics {
   252  	m.blockDeviceUtilizationRate = prometheus.NewGaugeVec(
   253  		prometheus.GaugeOpts{
   254  			Namespace: m.CollectorType,
   255  			Name:      "block_device_utilization_rate_percent",
   256  			Help:      `Ratio of actual workload to manufacturer's designed workload for the device `,
   257  		},
   258  		labels,
   259  	)
   260  	return m
   261  }
   262  
   263  // WithBlockDevicePercentEnduranceUsed declares the percentage of endurance used by a block device
   264  func (m *Metrics) WithBlockDevicePercentEnduranceUsed() *Metrics {
   265  	m.blockDevicePercentEnduranceUsed = prometheus.NewGaugeVec(
   266  		prometheus.GaugeOpts{
   267  			Namespace: m.CollectorType,
   268  			Name:      "block_device_endurance_used_percent",
   269  			Help:      `Estimate of the percentage of the device life that has been used `,
   270  		},
   271  		labels,
   272  	)
   273  	return m
   274  }
   275  
   276  // WithRejectRequest declares the reject request count metric
   277  func (m *Metrics) WithRejectRequest() *Metrics {
   278  	m.rejectRequestCount = prometheus.NewCounter(
   279  		prometheus.CounterOpts{
   280  			Namespace: m.CollectorType,
   281  			Name:      "reject_request_count",
   282  			Help:      `No. of requests rejected by the exporter`,
   283  		},
   284  	)
   285  	return m
   286  }
   287  
   288  // WithErrorRequest declares the error request count metric
   289  func (m *Metrics) WithErrorRequest() *Metrics {
   290  	m.errorRequestCount = prometheus.NewCounter(
   291  		prometheus.CounterOpts{
   292  			Namespace: m.CollectorType,
   293  			Name:      "error_request_count",
   294  			Help:      `No. of requests errored out by the exporter`,
   295  		})
   296  	return m
   297  }
   298  
   299  // WithBlockDeviceUUID sets the blockdevice UUID to the metric label
   300  func (ml *MetricsLabels) WithBlockDeviceUUID(uuid string) *MetricsLabels {
   301  	ml.UUID = uuid
   302  	return ml
   303  }
   304  
   305  // WithBlockDevicePath sets the blockdevice path to the metric label
   306  func (ml *MetricsLabels) WithBlockDevicePath(path string) *MetricsLabels {
   307  	// remove /dev from the device path so that the device path is similar to the
   308  	// path given by node exporter
   309  	ml.Path = strings.ReplaceAll(path, "/dev/", "")
   310  	return ml
   311  }
   312  
   313  // WithBlockDeviceHostName sets the blockdevice hostname to the metric label
   314  func (ml *MetricsLabels) WithBlockDeviceHostName(hostName string) *MetricsLabels {
   315  	ml.HostName = hostName
   316  	return ml
   317  }
   318  
   319  // WithBlockDeviceNodeName sets the blockdevice nodename to the metric label
   320  func (ml *MetricsLabels) WithBlockDeviceNodeName(nodeName string) *MetricsLabels {
   321  	ml.NodeName = nodeName
   322  	return ml
   323  }
   324  
   325  // SetBlockDeviceCurrentTemperature sets the current temperature value to the metric
   326  func (m *Metrics) SetBlockDeviceCurrentTemperature(currentTemp int16) *Metrics {
   327  	m.blockDeviceCurrentTemperature.WithLabelValues(m.UUID,
   328  		m.Path,
   329  		m.HostName,
   330  		m.NodeName,
   331  	).
   332  		Set(float64(currentTemp))
   333  	return m
   334  }
   335  
   336  // SetBlockDeviceHighestTemperature sets the highest temperature value to the metric
   337  func (m *Metrics) SetBlockDeviceHighestTemperature(highTemp int16) *Metrics {
   338  	m.blockDeviceHighestTemperature.WithLabelValues(m.UUID,
   339  		m.Path,
   340  		m.HostName,
   341  		m.NodeName,
   342  	).
   343  		Set(float64(highTemp))
   344  	return m
   345  }
   346  
   347  // SetBlockDeviceLowestTemperature sets the lowest temperature value to the metric
   348  func (m *Metrics) SetBlockDeviceLowestTemperature(lowTemp int16) *Metrics {
   349  	m.blockDeviceLowestTemperature.WithLabelValues(m.UUID,
   350  		m.Path,
   351  		m.HostName,
   352  		m.NodeName,
   353  	).
   354  		Set(float64(lowTemp))
   355  	return m
   356  }
   357  
   358  // SetBlockDeviceCurrentTemperatureValid sets the validity of the exposed current
   359  // temperature metrics
   360  func (m *Metrics) SetBlockDeviceCurrentTemperatureValid(valid bool) *Metrics {
   361  	m.blockDeviceCurrentTemperatureValid.WithLabelValues(m.UUID,
   362  		m.Path,
   363  		m.HostName,
   364  		m.NodeName,
   365  	).
   366  		Set(getTemperatureValidity(valid))
   367  	return m
   368  }
   369  
   370  // SetBlockDeviceHighestTemperatureValid sets the validity of the exposed highest
   371  // temperature metrics
   372  func (m *Metrics) SetBlockDeviceHighestTemperatureValid(valid bool) *Metrics {
   373  	m.blockDeviceCurrentTemperatureValid.WithLabelValues(m.UUID,
   374  		m.Path,
   375  		m.HostName,
   376  		m.NodeName,
   377  	).
   378  		Set(getTemperatureValidity(valid))
   379  	return m
   380  }
   381  
   382  // SetBlockDeviceLowestTemperatureValid sets the validity of the exposed lowest
   383  // temperature metrics
   384  func (m *Metrics) SetBlockDeviceLowestTemperatureValid(valid bool) *Metrics {
   385  	m.blockDeviceCurrentTemperatureValid.WithLabelValues(m.UUID,
   386  		m.Path,
   387  		m.HostName,
   388  		m.NodeName,
   389  	).
   390  		Set(getTemperatureValidity(valid))
   391  	return m
   392  }
   393  
   394  // getTemperatureValidity converts temperature validity
   395  // flag to a metric
   396  func getTemperatureValidity(isValid bool) float64 {
   397  	if isValid {
   398  		return 1
   399  	}
   400  	return 0
   401  }
   402  
   403  // SetBlockDeviceCapacity sets the current block device capacity value to the metric
   404  func (m *Metrics) SetBlockDeviceCapacity(capacity uint64) *Metrics {
   405  	m.blockDeviceCapacity.WithLabelValues(m.UUID,
   406  		m.Path,
   407  		m.HostName,
   408  		m.NodeName,
   409  	).
   410  		Set(float64(capacity))
   411  	return m
   412  }
   413  
   414  // SetBlockDeviceTotalBytesRead sets the total bytes read value to the metric
   415  func (m *Metrics) SetBlockDeviceTotalBytesRead(size uint64) *Metrics {
   416  	m.blockDeviceTotalReadBytes.WithLabelValues(m.UUID,
   417  		m.Path,
   418  		m.HostName,
   419  		m.NodeName,
   420  	)
   421  	return m
   422  }
   423  
   424  // SetBlockDeviceTotalBytesWritten sets the total bytes written value to the metric
   425  func (m *Metrics) SetBlockDeviceTotalBytesWritten(size uint64) *Metrics {
   426  	m.blockDeviceTotalWrittenBytes.WithLabelValues(m.UUID,
   427  		m.Path,
   428  		m.HostName,
   429  		m.NodeName,
   430  	)
   431  	return m
   432  }
   433  
   434  // SetBlockDeviceUtilizationRate sets the utilization rate value to the metric
   435  func (m *Metrics) SetBlockDeviceUtilizationRate(size float64) *Metrics {
   436  	m.blockDeviceUtilizationRate.WithLabelValues(m.UUID,
   437  		m.Path,
   438  		m.HostName,
   439  		m.NodeName,
   440  	).
   441  		Set(float64(size))
   442  	return m
   443  }
   444  
   445  // SetBlockDevicePercentEnduranceUsed sets the percentage of endurance used by a block device to the metric
   446  func (m *Metrics) SetBlockDevicePercentEnduranceUsed(size float64) *Metrics {
   447  	m.blockDevicePercentEnduranceUsed.WithLabelValues(m.UUID,
   448  		m.Path,
   449  		m.HostName,
   450  		m.NodeName,
   451  	).
   452  		Set(float64(size))
   453  	return m
   454  }