github.com/netdata/go.d.plugin@v0.58.1/modules/nvme/charts.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package nvme
     4  
     5  import (
     6  	"fmt"
     7  	"strings"
     8  
     9  	"github.com/netdata/go.d.plugin/agent/module"
    10  )
    11  
    12  const (
    13  	_ = 2050 + iota // right after Disks section
    14  	prioDeviceEstimatedEndurancePerc
    15  	prioDeviceAvailableSparePerc
    16  	prioDeviceCompositeTemperature
    17  	prioDeviceIOTransferredCount
    18  	prioDevicePowerCyclesCount
    19  	prioDevicePowerOnTime
    20  	prioDeviceUnsafeShutdownsCount
    21  	prioDeviceCriticalWarningsState
    22  	prioDeviceMediaErrorsRate
    23  	prioDeviceErrorLogEntriesRate
    24  	prioDeviceWarningCompositeTemperatureTime
    25  	prioDeviceCriticalCompositeTemperatureTime
    26  	prioDeviceThmTemp1TransitionsCount
    27  	prioDeviceThmTemp2TransitionsRate
    28  	prioDeviceThmTemp1Time
    29  	prioDeviceThmTemp2Time
    30  )
    31  
    32  var deviceChartsTmpl = module.Charts{
    33  	deviceEstimatedEndurancePercChartTmpl.Copy(),
    34  	deviceAvailableSparePercChartTmpl.Copy(),
    35  	deviceCompositeTemperatureChartTmpl.Copy(),
    36  	deviceIOTransferredCountChartTmpl.Copy(),
    37  	devicePowerCyclesCountChartTmpl.Copy(),
    38  	devicePowerOnTimeChartTmpl.Copy(),
    39  	deviceUnsafeShutdownsCountChartTmpl.Copy(),
    40  	deviceCriticalWarningsStateChartTmpl.Copy(),
    41  	deviceMediaErrorsRateChartTmpl.Copy(),
    42  	deviceErrorLogEntriesRateChartTmpl.Copy(),
    43  	deviceWarnCompositeTemperatureTimeChartTmpl.Copy(),
    44  	deviceCritCompositeTemperatureTimeChartTmpl.Copy(),
    45  	deviceThmTemp1TransitionsRateChartTmpl.Copy(),
    46  	deviceThmTemp2TransitionsRateChartTmpl.Copy(),
    47  	deviceThmTemp1TimeChartTmpl.Copy(),
    48  	deviceThmTemp2TimeChartTmpl.Copy(),
    49  }
    50  
    51  var deviceEstimatedEndurancePercChartTmpl = module.Chart{
    52  	ID:       "device_%s_estimated_endurance_perc",
    53  	Title:    "Estimated endurance",
    54  	Units:    "percentage",
    55  	Fam:      "endurance",
    56  	Ctx:      "nvme.device_estimated_endurance_perc",
    57  	Priority: prioDeviceEstimatedEndurancePerc,
    58  	Dims: module.Dims{
    59  		{ID: "device_%s_percentage_used", Name: "used"},
    60  	},
    61  }
    62  var deviceAvailableSparePercChartTmpl = module.Chart{
    63  	ID:       "device_%s_available_spare_perc",
    64  	Title:    "Remaining spare capacity",
    65  	Units:    "percentage",
    66  	Fam:      "spare",
    67  	Ctx:      "nvme.device_available_spare_perc",
    68  	Priority: prioDeviceAvailableSparePerc,
    69  	Dims: module.Dims{
    70  		{ID: "device_%s_available_spare", Name: "spare"},
    71  	},
    72  }
    73  var deviceCompositeTemperatureChartTmpl = module.Chart{
    74  	ID:       "device_%s_temperature",
    75  	Title:    "Composite temperature",
    76  	Units:    "celsius",
    77  	Fam:      "temperature",
    78  	Ctx:      "nvme.device_composite_temperature",
    79  	Priority: prioDeviceCompositeTemperature,
    80  	Dims: module.Dims{
    81  		{ID: "device_%s_temperature", Name: "temperature"},
    82  	},
    83  }
    84  var deviceIOTransferredCountChartTmpl = module.Chart{
    85  	ID:       "device_%s_io_transferred_count",
    86  	Title:    "Amount of data transferred to and from device",
    87  	Units:    "bytes",
    88  	Fam:      "transferred data",
    89  	Ctx:      "nvme.device_io_transferred_count",
    90  	Priority: prioDeviceIOTransferredCount,
    91  	Type:     module.Area,
    92  	Dims: module.Dims{
    93  		{ID: "device_%s_data_units_read", Name: "read"},
    94  		{ID: "device_%s_data_units_written", Name: "written", Mul: -1},
    95  	},
    96  }
    97  
    98  var devicePowerCyclesCountChartTmpl = module.Chart{
    99  	ID:       "device_%s_power_cycles_count",
   100  	Title:    "Power cycles",
   101  	Units:    "cycles",
   102  	Fam:      "power cycles",
   103  	Ctx:      "nvme.device_power_cycles_count",
   104  	Priority: prioDevicePowerCyclesCount,
   105  	Dims: module.Dims{
   106  		{ID: "device_%s_power_cycles", Name: "power"},
   107  	},
   108  }
   109  var devicePowerOnTimeChartTmpl = module.Chart{
   110  	ID:       "device_%s_power_on_time",
   111  	Title:    "Power-on time",
   112  	Units:    "seconds",
   113  	Fam:      "power-on time",
   114  	Ctx:      "nvme.device_power_on_time",
   115  	Priority: prioDevicePowerOnTime,
   116  	Dims: module.Dims{
   117  		{ID: "device_%s_power_on_time", Name: "power-on"},
   118  	},
   119  }
   120  var deviceCriticalWarningsStateChartTmpl = module.Chart{
   121  	ID:       "device_%s_critical_warnings_state",
   122  	Title:    "Critical warnings state",
   123  	Units:    "state",
   124  	Fam:      "critical warnings",
   125  	Ctx:      "nvme.device_critical_warnings_state",
   126  	Priority: prioDeviceCriticalWarningsState,
   127  	Dims: module.Dims{
   128  		{ID: "device_%s_critical_warning_available_spare", Name: "available_spare"},
   129  		{ID: "device_%s_critical_warning_temp_threshold", Name: "temp_threshold"},
   130  		{ID: "device_%s_critical_warning_nvm_subsystem_reliability", Name: "nvm_subsystem_reliability"},
   131  		{ID: "device_%s_critical_warning_read_only", Name: "read_only"},
   132  		{ID: "device_%s_critical_warning_volatile_mem_backup_failed", Name: "volatile_mem_backup_failed"},
   133  		{ID: "device_%s_critical_warning_persistent_memory_read_only", Name: "persistent_memory_read_only"},
   134  	},
   135  }
   136  var deviceUnsafeShutdownsCountChartTmpl = module.Chart{
   137  	ID:       "device_%s_unsafe_shutdowns_count",
   138  	Title:    "Unsafe shutdowns",
   139  	Units:    "shutdowns",
   140  	Fam:      "shutdowns",
   141  	Ctx:      "nvme.device_unsafe_shutdowns_count",
   142  	Priority: prioDeviceUnsafeShutdownsCount,
   143  	Dims: module.Dims{
   144  		{ID: "device_%s_unsafe_shutdowns", Name: "unsafe"},
   145  	},
   146  }
   147  var deviceMediaErrorsRateChartTmpl = module.Chart{
   148  	ID:       "device_%s_media_errors_rate",
   149  	Title:    "Media and data integrity errors",
   150  	Units:    "errors/s",
   151  	Fam:      "media errors",
   152  	Ctx:      "nvme.device_media_errors_rate",
   153  	Priority: prioDeviceMediaErrorsRate,
   154  	Dims: module.Dims{
   155  		{ID: "device_%s_media_errors", Name: "media", Algo: module.Incremental},
   156  	},
   157  }
   158  var deviceErrorLogEntriesRateChartTmpl = module.Chart{
   159  	ID:       "device_%s_error_log_entries_rate",
   160  	Title:    "Error log entries",
   161  	Units:    "entries/s",
   162  	Fam:      "error log",
   163  	Ctx:      "nvme.device_error_log_entries_rate",
   164  	Priority: prioDeviceErrorLogEntriesRate,
   165  	Dims: module.Dims{
   166  		{ID: "device_%s_num_err_log_entries", Name: "error_log", Algo: module.Incremental},
   167  	},
   168  }
   169  var deviceWarnCompositeTemperatureTimeChartTmpl = module.Chart{
   170  	ID:       "device_%s_warning_composite_temperature_time",
   171  	Title:    "Warning composite temperature time",
   172  	Units:    "seconds",
   173  	Fam:      "warn temp time",
   174  	Ctx:      "nvme.device_warning_composite_temperature_time",
   175  	Priority: prioDeviceWarningCompositeTemperatureTime,
   176  	Dims: module.Dims{
   177  		{ID: "device_%s_warning_temp_time", Name: "wctemp"},
   178  	},
   179  }
   180  var deviceCritCompositeTemperatureTimeChartTmpl = module.Chart{
   181  	ID:       "device_%s_critical_composite_temperature_time",
   182  	Title:    "Critical composite temperature time",
   183  	Units:    "seconds",
   184  	Fam:      "crit temp time",
   185  	Ctx:      "nvme.device_critical_composite_temperature_time",
   186  	Priority: prioDeviceCriticalCompositeTemperatureTime,
   187  	Dims: module.Dims{
   188  		{ID: "device_%s_critical_comp_time", Name: "cctemp"},
   189  	},
   190  }
   191  var (
   192  	deviceThmTemp1TransitionsRateChartTmpl = module.Chart{
   193  		ID:       "device_%s_thm_temp1_transitions_rate",
   194  		Title:    "Thermal management temp1 transitions",
   195  		Units:    "transitions/s",
   196  		Fam:      "thermal mgmt transitions",
   197  		Ctx:      "nvme.device_thermal_mgmt_temp1_transitions_rate",
   198  		Priority: prioDeviceThmTemp1TransitionsCount,
   199  		Dims: module.Dims{
   200  			{ID: "device_%s_thm_temp1_trans_count", Name: "temp1", Algo: module.Incremental},
   201  		},
   202  	}
   203  	deviceThmTemp2TransitionsRateChartTmpl = module.Chart{
   204  		ID:       "device_%s_thm_temp2_transitions_rate",
   205  		Title:    "Thermal management temp2 transitions",
   206  		Units:    "transitions/s",
   207  		Fam:      "thermal mgmt transitions",
   208  		Ctx:      "nvme.device_thermal_mgmt_temp2_transitions_rate",
   209  		Priority: prioDeviceThmTemp2TransitionsRate,
   210  		Dims: module.Dims{
   211  			{ID: "device_%s_thm_temp2_trans_count", Name: "temp2", Algo: module.Incremental},
   212  		},
   213  	}
   214  )
   215  var (
   216  	deviceThmTemp1TimeChartTmpl = module.Chart{
   217  		ID:       "device_%s_thm_temp1_time",
   218  		Title:    "Thermal management temp1 time",
   219  		Units:    "seconds",
   220  		Fam:      "thermal mgmt time",
   221  		Ctx:      "nvme.device_thermal_mgmt_temp1_time",
   222  		Priority: prioDeviceThmTemp1Time,
   223  		Dims: module.Dims{
   224  			{ID: "device_%s_thm_temp1_total_time", Name: "temp1"},
   225  		},
   226  	}
   227  	deviceThmTemp2TimeChartTmpl = module.Chart{
   228  		ID:       "device_%s_thm_temp2_time",
   229  		Title:    "Thermal management temp1 time",
   230  		Units:    "seconds",
   231  		Fam:      "thermal mgmt time",
   232  		Ctx:      "nvme.device_thermal_mgmt_temp2_time",
   233  		Priority: prioDeviceThmTemp2Time,
   234  		Dims: module.Dims{
   235  			{ID: "device_%s_thm_temp2_total_time", Name: "temp2"},
   236  		},
   237  	}
   238  )
   239  
   240  func (n *NVMe) addDeviceCharts(device string) {
   241  	charts := deviceChartsTmpl.Copy()
   242  
   243  	for _, chart := range *charts {
   244  		chart.ID = fmt.Sprintf(chart.ID, device)
   245  		chart.Labels = []module.Label{
   246  			{Key: "device", Value: device},
   247  		}
   248  		for _, dim := range chart.Dims {
   249  			dim.ID = fmt.Sprintf(dim.ID, device)
   250  		}
   251  	}
   252  
   253  	if err := n.Charts().Add(*charts...); err != nil {
   254  		n.Warning(err)
   255  	}
   256  }
   257  
   258  func (n *NVMe) removeDeviceCharts(device string) {
   259  	px := fmt.Sprintf("device_%s", device)
   260  
   261  	for _, chart := range *n.Charts() {
   262  		if strings.HasPrefix(chart.ID, px) {
   263  			chart.MarkRemove()
   264  			chart.MarkNotCreated()
   265  		}
   266  	}
   267  }