github.com/openebs/node-disk-manager@v1.9.1-0.20230225014141-4531f06ffa1e/pkg/metrics/smart/metrics.go (about) 1 /* 2 Copyright 2019 The OpenEBS Authors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package smart 18 19 import ( 20 "strings" 21 22 "github.com/prometheus/client_golang/prometheus" 23 ) 24 25 // MetricsData is the prometheus metrics that are exposed by the exporter. This includes 26 // all the metrics that are available via SMART 27 // TODO additional smart metrics need to be added here 28 type MetricsData struct { 29 // blockDeviceCurrentTemperatureValid tells whether the current temperature data is valid 30 blockDeviceCurrentTemperatureValid *prometheus.GaugeVec 31 // blockDeviceTemperature is the temperature of the the blockdevice if it is reported 32 blockDeviceCurrentTemperature *prometheus.GaugeVec 33 34 // blockDevicehighestTemperature is the highest temperature of the the blockdevice if it is reported 35 blockDeviceHighestTemperature *prometheus.GaugeVec 36 37 // blockDeviceHighestTemperatureValid tells whether the highest temperature data is valid 38 blockDeviceHighestTemperatureValid *prometheus.GaugeVec 39 40 // blockDevicelowestTemperature is the lowest temperature of the the blockdevice if it is reported 41 blockDeviceLowestTemperature *prometheus.GaugeVec 42 43 // blockDeviceLowestTemperatureValid tells whether the lowest temperature data is valid 44 blockDeviceLowestTemperatureValid *prometheus.GaugeVec 45 46 // blockDeviceCapacity is capacity of block device 47 blockDeviceCapacity *prometheus.GaugeVec 48 49 // blockDeviceTotalReadBytes is the total number of bytes read from the block device 50 blockDeviceTotalReadBytes *prometheus.CounterVec 51 52 // blockDeviceTotalWrittenBytes is the total number of bytes written from the block device 53 blockDeviceTotalWrittenBytes *prometheus.CounterVec 54 55 // blockDeviceUtilizationRate is utilization rate of the block device 56 blockDeviceUtilizationRate *prometheus.GaugeVec 57 58 // blockDevicePercentEnduranceUsed is percentage of endurance used by a block device 59 blockDevicePercentEnduranceUsed *prometheus.GaugeVec 60 61 // errors and rejected requests 62 rejectRequestCount prometheus.Counter 63 errorRequestCount prometheus.Counter 64 } 65 66 //MetricsLabels are the labels that are available on the prometheus metrics 67 type MetricsLabels struct { 68 UUID string 69 Path string 70 HostName string 71 NodeName string 72 } 73 74 // Metrics defines the metrics data along with the labels present on those metrics. 75 // The collector(currently seachest/smart) used to fetch the metrics is also defined 76 type Metrics struct { 77 CollectorType string 78 MetricsData 79 MetricsLabels 80 } 81 82 // NewMetrics creates a new Metrics with the given collector type 83 func NewMetrics(collector string) *Metrics { 84 return &Metrics{ 85 CollectorType: collector, 86 } 87 } 88 89 // Collectors lists out all the collectors for which the metrics is exposed 90 func (m *Metrics) Collectors() []prometheus.Collector { 91 return []prometheus.Collector{ 92 m.blockDeviceCurrentTemperatureValid, 93 m.blockDeviceHighestTemperatureValid, 94 m.blockDeviceLowestTemperatureValid, 95 m.blockDeviceCurrentTemperature, 96 m.blockDeviceHighestTemperature, 97 m.blockDeviceLowestTemperature, 98 m.blockDeviceTotalReadBytes, 99 m.blockDeviceTotalWrittenBytes, 100 m.blockDeviceUtilizationRate, 101 m.blockDevicePercentEnduranceUsed, 102 m.rejectRequestCount, 103 m.errorRequestCount, 104 } 105 } 106 107 var labels []string = []string{"blockdevicename", "path", "hostname", "nodename"} 108 109 // ErrorCollectors lists out all collectors for metrics related to error 110 func (m *Metrics) ErrorCollectors() []prometheus.Collector { 111 return []prometheus.Collector{ 112 m.rejectRequestCount, 113 m.errorRequestCount, 114 } 115 } 116 117 // IncRejectRequestCounter increments the reject request error counter 118 func (m *Metrics) IncRejectRequestCounter() { 119 m.rejectRequestCount.Inc() 120 } 121 122 // IncErrorRequestCounter increments the no of requests errored out. 123 func (m *Metrics) IncErrorRequestCounter() { 124 m.errorRequestCount.Inc() 125 } 126 127 // WithBlockDeviceCurrentTemperature declares the metric current temperature 128 // as a prometheus metric 129 func (m *Metrics) WithBlockDeviceCurrentTemperature() *Metrics { 130 m.blockDeviceCurrentTemperature = prometheus.NewGaugeVec( 131 prometheus.GaugeOpts{ 132 Namespace: m.CollectorType, 133 Name: "block_device_current_temperature_celsius", 134 Help: `Current reported temperature of the blockdevice. -1 if not reported`, 135 }, 136 labels, 137 ) 138 return m 139 } 140 141 // WithBlockDeviceHighestTemperature declares the metric highest temperature 142 // as a prometheus metric 143 func (m *Metrics) WithBlockDeviceHighestTemperature() *Metrics { 144 m.blockDeviceHighestTemperature = prometheus.NewGaugeVec( 145 prometheus.GaugeOpts{ 146 Namespace: m.CollectorType, 147 Name: "block_device_highest_temperature_celsius", 148 Help: `Highest reported temperature of the blockdevice. -1 if not reported`, 149 }, 150 labels, 151 ) 152 return m 153 } 154 155 // WithBlockDeviceLowestTemperature declares the metric lowest temperature 156 // as a prometheus metric 157 func (m *Metrics) WithBlockDeviceLowestTemperature() *Metrics { 158 m.blockDeviceLowestTemperature = prometheus.NewGaugeVec( 159 prometheus.GaugeOpts{ 160 Namespace: m.CollectorType, 161 Name: "block_device_lowest_temperature_celsius", 162 Help: `Lowest reported temperature of the blockdevice. -1 if not reported`, 163 }, 164 labels, 165 ) 166 return m 167 } 168 169 // WithBlockDeviceCurrentTemperatureValid declares the metric current temperature valid 170 // as a prometheus metric 171 func (m *Metrics) WithBlockDeviceCurrentTemperatureValid() *Metrics { 172 m.blockDeviceCurrentTemperatureValid = prometheus.NewGaugeVec( 173 prometheus.GaugeOpts{ 174 Namespace: m.CollectorType, 175 Name: "block_device_current_temperature_valid", 176 Help: `Validity of the current temperature data reported. 0 means not valid, 1 means valid`, 177 }, 178 labels, 179 ) 180 return m 181 } 182 183 // WithBlockDeviceHighestTemperatureValid declares the metric highest temperature valid 184 // as a prometheus metric 185 func (m *Metrics) WithBlockDeviceHighestTemperatureValid() *Metrics { 186 m.blockDeviceHighestTemperatureValid = prometheus.NewGaugeVec( 187 prometheus.GaugeOpts{ 188 Namespace: m.CollectorType, 189 Name: "block_device_highest_temperature_valid", 190 Help: `Validity of the highest temperature data reported. 0 means not valid, 1 means valid`, 191 }, 192 labels, 193 ) 194 return m 195 } 196 197 // WithBlockDeviceLowestTemperatureValid declares the metric lowest temperature valid 198 // as a prometheus metric 199 func (m *Metrics) WithBlockDeviceLowestTemperatureValid() *Metrics { 200 m.blockDeviceLowestTemperatureValid = prometheus.NewGaugeVec( 201 prometheus.GaugeOpts{ 202 Namespace: m.CollectorType, 203 Name: "block_device_lowest_temperature_valid", 204 Help: `Validity of the lowest temperature data reported. 0 means not valid, 1 means valid`, 205 }, 206 labels, 207 ) 208 return m 209 } 210 211 // WithBlockDeviceCapacity declares the blockdevice capacity 212 func (m *Metrics) WithBlockDeviceCapacity() *Metrics { 213 m.blockDeviceCapacity = prometheus.NewGaugeVec( 214 prometheus.GaugeOpts{ 215 Namespace: m.CollectorType, 216 Name: "block_device_capacity_bytes", 217 Help: `Capacity of the block device in bytes`, 218 }, 219 labels, 220 ) 221 return m 222 } 223 224 // WithBlockDeviceTotalBytesRead declares the total number of bytes read by a block device 225 func (m *Metrics) WithBlockDeviceTotalBytesRead() *Metrics { 226 m.blockDeviceTotalReadBytes = prometheus.NewCounterVec( 227 prometheus.CounterOpts{ 228 Namespace: m.CollectorType, 229 Name: "block_device_total_read_bytes", 230 Help: `total number of bytes read by a block device in bytes `, 231 }, 232 labels, 233 ) 234 return m 235 } 236 237 // WithBlockDeviceTotalBytesWritten declares the total number of bytes written by a block device 238 func (m *Metrics) WithBlockDeviceTotalBytesWritten() *Metrics { 239 m.blockDeviceTotalWrittenBytes = prometheus.NewCounterVec( 240 prometheus.CounterOpts{ 241 Namespace: m.CollectorType, 242 Name: "block_device_total_written_bytes", 243 Help: `total number of bytes written by a block device in bytes `, 244 }, 245 labels, 246 ) 247 return m 248 } 249 250 // WithBlockDeviceUtilizationRate declares the utilization rate of a block device 251 func (m *Metrics) WithBlockDeviceUtilizationRate() *Metrics { 252 m.blockDeviceUtilizationRate = prometheus.NewGaugeVec( 253 prometheus.GaugeOpts{ 254 Namespace: m.CollectorType, 255 Name: "block_device_utilization_rate_percent", 256 Help: `Ratio of actual workload to manufacturer's designed workload for the device `, 257 }, 258 labels, 259 ) 260 return m 261 } 262 263 // WithBlockDevicePercentEnduranceUsed declares the percentage of endurance used by a block device 264 func (m *Metrics) WithBlockDevicePercentEnduranceUsed() *Metrics { 265 m.blockDevicePercentEnduranceUsed = prometheus.NewGaugeVec( 266 prometheus.GaugeOpts{ 267 Namespace: m.CollectorType, 268 Name: "block_device_endurance_used_percent", 269 Help: `Estimate of the percentage of the device life that has been used `, 270 }, 271 labels, 272 ) 273 return m 274 } 275 276 // WithRejectRequest declares the reject request count metric 277 func (m *Metrics) WithRejectRequest() *Metrics { 278 m.rejectRequestCount = prometheus.NewCounter( 279 prometheus.CounterOpts{ 280 Namespace: m.CollectorType, 281 Name: "reject_request_count", 282 Help: `No. of requests rejected by the exporter`, 283 }, 284 ) 285 return m 286 } 287 288 // WithErrorRequest declares the error request count metric 289 func (m *Metrics) WithErrorRequest() *Metrics { 290 m.errorRequestCount = prometheus.NewCounter( 291 prometheus.CounterOpts{ 292 Namespace: m.CollectorType, 293 Name: "error_request_count", 294 Help: `No. of requests errored out by the exporter`, 295 }) 296 return m 297 } 298 299 // WithBlockDeviceUUID sets the blockdevice UUID to the metric label 300 func (ml *MetricsLabels) WithBlockDeviceUUID(uuid string) *MetricsLabels { 301 ml.UUID = uuid 302 return ml 303 } 304 305 // WithBlockDevicePath sets the blockdevice path to the metric label 306 func (ml *MetricsLabels) WithBlockDevicePath(path string) *MetricsLabels { 307 // remove /dev from the device path so that the device path is similar to the 308 // path given by node exporter 309 ml.Path = strings.ReplaceAll(path, "/dev/", "") 310 return ml 311 } 312 313 // WithBlockDeviceHostName sets the blockdevice hostname to the metric label 314 func (ml *MetricsLabels) WithBlockDeviceHostName(hostName string) *MetricsLabels { 315 ml.HostName = hostName 316 return ml 317 } 318 319 // WithBlockDeviceNodeName sets the blockdevice nodename to the metric label 320 func (ml *MetricsLabels) WithBlockDeviceNodeName(nodeName string) *MetricsLabels { 321 ml.NodeName = nodeName 322 return ml 323 } 324 325 // SetBlockDeviceCurrentTemperature sets the current temperature value to the metric 326 func (m *Metrics) SetBlockDeviceCurrentTemperature(currentTemp int16) *Metrics { 327 m.blockDeviceCurrentTemperature.WithLabelValues(m.UUID, 328 m.Path, 329 m.HostName, 330 m.NodeName, 331 ). 332 Set(float64(currentTemp)) 333 return m 334 } 335 336 // SetBlockDeviceHighestTemperature sets the highest temperature value to the metric 337 func (m *Metrics) SetBlockDeviceHighestTemperature(highTemp int16) *Metrics { 338 m.blockDeviceHighestTemperature.WithLabelValues(m.UUID, 339 m.Path, 340 m.HostName, 341 m.NodeName, 342 ). 343 Set(float64(highTemp)) 344 return m 345 } 346 347 // SetBlockDeviceLowestTemperature sets the lowest temperature value to the metric 348 func (m *Metrics) SetBlockDeviceLowestTemperature(lowTemp int16) *Metrics { 349 m.blockDeviceLowestTemperature.WithLabelValues(m.UUID, 350 m.Path, 351 m.HostName, 352 m.NodeName, 353 ). 354 Set(float64(lowTemp)) 355 return m 356 } 357 358 // SetBlockDeviceCurrentTemperatureValid sets the validity of the exposed current 359 // temperature metrics 360 func (m *Metrics) SetBlockDeviceCurrentTemperatureValid(valid bool) *Metrics { 361 m.blockDeviceCurrentTemperatureValid.WithLabelValues(m.UUID, 362 m.Path, 363 m.HostName, 364 m.NodeName, 365 ). 366 Set(getTemperatureValidity(valid)) 367 return m 368 } 369 370 // SetBlockDeviceHighestTemperatureValid sets the validity of the exposed highest 371 // temperature metrics 372 func (m *Metrics) SetBlockDeviceHighestTemperatureValid(valid bool) *Metrics { 373 m.blockDeviceCurrentTemperatureValid.WithLabelValues(m.UUID, 374 m.Path, 375 m.HostName, 376 m.NodeName, 377 ). 378 Set(getTemperatureValidity(valid)) 379 return m 380 } 381 382 // SetBlockDeviceLowestTemperatureValid sets the validity of the exposed lowest 383 // temperature metrics 384 func (m *Metrics) SetBlockDeviceLowestTemperatureValid(valid bool) *Metrics { 385 m.blockDeviceCurrentTemperatureValid.WithLabelValues(m.UUID, 386 m.Path, 387 m.HostName, 388 m.NodeName, 389 ). 390 Set(getTemperatureValidity(valid)) 391 return m 392 } 393 394 // getTemperatureValidity converts temperature validity 395 // flag to a metric 396 func getTemperatureValidity(isValid bool) float64 { 397 if isValid { 398 return 1 399 } 400 return 0 401 } 402 403 // SetBlockDeviceCapacity sets the current block device capacity value to the metric 404 func (m *Metrics) SetBlockDeviceCapacity(capacity uint64) *Metrics { 405 m.blockDeviceCapacity.WithLabelValues(m.UUID, 406 m.Path, 407 m.HostName, 408 m.NodeName, 409 ). 410 Set(float64(capacity)) 411 return m 412 } 413 414 // SetBlockDeviceTotalBytesRead sets the total bytes read value to the metric 415 func (m *Metrics) SetBlockDeviceTotalBytesRead(size uint64) *Metrics { 416 m.blockDeviceTotalReadBytes.WithLabelValues(m.UUID, 417 m.Path, 418 m.HostName, 419 m.NodeName, 420 ) 421 return m 422 } 423 424 // SetBlockDeviceTotalBytesWritten sets the total bytes written value to the metric 425 func (m *Metrics) SetBlockDeviceTotalBytesWritten(size uint64) *Metrics { 426 m.blockDeviceTotalWrittenBytes.WithLabelValues(m.UUID, 427 m.Path, 428 m.HostName, 429 m.NodeName, 430 ) 431 return m 432 } 433 434 // SetBlockDeviceUtilizationRate sets the utilization rate value to the metric 435 func (m *Metrics) SetBlockDeviceUtilizationRate(size float64) *Metrics { 436 m.blockDeviceUtilizationRate.WithLabelValues(m.UUID, 437 m.Path, 438 m.HostName, 439 m.NodeName, 440 ). 441 Set(float64(size)) 442 return m 443 } 444 445 // SetBlockDevicePercentEnduranceUsed sets the percentage of endurance used by a block device to the metric 446 func (m *Metrics) SetBlockDevicePercentEnduranceUsed(size float64) *Metrics { 447 m.blockDevicePercentEnduranceUsed.WithLabelValues(m.UUID, 448 m.Path, 449 m.HostName, 450 m.NodeName, 451 ). 452 Set(float64(size)) 453 return m 454 }