github.com/netdata/go.d.plugin@v0.58.1/modules/nvme/charts.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package nvme 4 5 import ( 6 "fmt" 7 "strings" 8 9 "github.com/netdata/go.d.plugin/agent/module" 10 ) 11 12 const ( 13 _ = 2050 + iota // right after Disks section 14 prioDeviceEstimatedEndurancePerc 15 prioDeviceAvailableSparePerc 16 prioDeviceCompositeTemperature 17 prioDeviceIOTransferredCount 18 prioDevicePowerCyclesCount 19 prioDevicePowerOnTime 20 prioDeviceUnsafeShutdownsCount 21 prioDeviceCriticalWarningsState 22 prioDeviceMediaErrorsRate 23 prioDeviceErrorLogEntriesRate 24 prioDeviceWarningCompositeTemperatureTime 25 prioDeviceCriticalCompositeTemperatureTime 26 prioDeviceThmTemp1TransitionsCount 27 prioDeviceThmTemp2TransitionsRate 28 prioDeviceThmTemp1Time 29 prioDeviceThmTemp2Time 30 ) 31 32 var deviceChartsTmpl = module.Charts{ 33 deviceEstimatedEndurancePercChartTmpl.Copy(), 34 deviceAvailableSparePercChartTmpl.Copy(), 35 deviceCompositeTemperatureChartTmpl.Copy(), 36 deviceIOTransferredCountChartTmpl.Copy(), 37 devicePowerCyclesCountChartTmpl.Copy(), 38 devicePowerOnTimeChartTmpl.Copy(), 39 deviceUnsafeShutdownsCountChartTmpl.Copy(), 40 deviceCriticalWarningsStateChartTmpl.Copy(), 41 deviceMediaErrorsRateChartTmpl.Copy(), 42 deviceErrorLogEntriesRateChartTmpl.Copy(), 43 deviceWarnCompositeTemperatureTimeChartTmpl.Copy(), 44 deviceCritCompositeTemperatureTimeChartTmpl.Copy(), 45 deviceThmTemp1TransitionsRateChartTmpl.Copy(), 46 deviceThmTemp2TransitionsRateChartTmpl.Copy(), 47 deviceThmTemp1TimeChartTmpl.Copy(), 48 deviceThmTemp2TimeChartTmpl.Copy(), 49 } 50 51 var deviceEstimatedEndurancePercChartTmpl = module.Chart{ 52 ID: "device_%s_estimated_endurance_perc", 53 Title: "Estimated endurance", 54 Units: "percentage", 55 Fam: "endurance", 56 Ctx: "nvme.device_estimated_endurance_perc", 57 Priority: prioDeviceEstimatedEndurancePerc, 58 Dims: module.Dims{ 59 {ID: "device_%s_percentage_used", Name: "used"}, 60 }, 61 } 62 var deviceAvailableSparePercChartTmpl = module.Chart{ 63 ID: "device_%s_available_spare_perc", 64 Title: "Remaining spare capacity", 65 Units: "percentage", 66 Fam: "spare", 67 Ctx: "nvme.device_available_spare_perc", 68 Priority: prioDeviceAvailableSparePerc, 69 Dims: module.Dims{ 70 {ID: "device_%s_available_spare", Name: "spare"}, 71 }, 72 } 73 var deviceCompositeTemperatureChartTmpl = module.Chart{ 74 ID: "device_%s_temperature", 75 Title: "Composite temperature", 76 Units: "celsius", 77 Fam: "temperature", 78 Ctx: "nvme.device_composite_temperature", 79 Priority: prioDeviceCompositeTemperature, 80 Dims: module.Dims{ 81 {ID: "device_%s_temperature", Name: "temperature"}, 82 }, 83 } 84 var deviceIOTransferredCountChartTmpl = module.Chart{ 85 ID: "device_%s_io_transferred_count", 86 Title: "Amount of data transferred to and from device", 87 Units: "bytes", 88 Fam: "transferred data", 89 Ctx: "nvme.device_io_transferred_count", 90 Priority: prioDeviceIOTransferredCount, 91 Type: module.Area, 92 Dims: module.Dims{ 93 {ID: "device_%s_data_units_read", Name: "read"}, 94 {ID: "device_%s_data_units_written", Name: "written", Mul: -1}, 95 }, 96 } 97 98 var devicePowerCyclesCountChartTmpl = module.Chart{ 99 ID: "device_%s_power_cycles_count", 100 Title: "Power cycles", 101 Units: "cycles", 102 Fam: "power cycles", 103 Ctx: "nvme.device_power_cycles_count", 104 Priority: prioDevicePowerCyclesCount, 105 Dims: module.Dims{ 106 {ID: "device_%s_power_cycles", Name: "power"}, 107 }, 108 } 109 var devicePowerOnTimeChartTmpl = module.Chart{ 110 ID: "device_%s_power_on_time", 111 Title: "Power-on time", 112 Units: "seconds", 113 Fam: "power-on time", 114 Ctx: "nvme.device_power_on_time", 115 Priority: prioDevicePowerOnTime, 116 Dims: module.Dims{ 117 {ID: "device_%s_power_on_time", Name: "power-on"}, 118 }, 119 } 120 var deviceCriticalWarningsStateChartTmpl = module.Chart{ 121 ID: "device_%s_critical_warnings_state", 122 Title: "Critical warnings state", 123 Units: "state", 124 Fam: "critical warnings", 125 Ctx: "nvme.device_critical_warnings_state", 126 Priority: prioDeviceCriticalWarningsState, 127 Dims: module.Dims{ 128 {ID: "device_%s_critical_warning_available_spare", Name: "available_spare"}, 129 {ID: "device_%s_critical_warning_temp_threshold", Name: "temp_threshold"}, 130 {ID: "device_%s_critical_warning_nvm_subsystem_reliability", Name: "nvm_subsystem_reliability"}, 131 {ID: "device_%s_critical_warning_read_only", Name: "read_only"}, 132 {ID: "device_%s_critical_warning_volatile_mem_backup_failed", Name: "volatile_mem_backup_failed"}, 133 {ID: "device_%s_critical_warning_persistent_memory_read_only", Name: "persistent_memory_read_only"}, 134 }, 135 } 136 var deviceUnsafeShutdownsCountChartTmpl = module.Chart{ 137 ID: "device_%s_unsafe_shutdowns_count", 138 Title: "Unsafe shutdowns", 139 Units: "shutdowns", 140 Fam: "shutdowns", 141 Ctx: "nvme.device_unsafe_shutdowns_count", 142 Priority: prioDeviceUnsafeShutdownsCount, 143 Dims: module.Dims{ 144 {ID: "device_%s_unsafe_shutdowns", Name: "unsafe"}, 145 }, 146 } 147 var deviceMediaErrorsRateChartTmpl = module.Chart{ 148 ID: "device_%s_media_errors_rate", 149 Title: "Media and data integrity errors", 150 Units: "errors/s", 151 Fam: "media errors", 152 Ctx: "nvme.device_media_errors_rate", 153 Priority: prioDeviceMediaErrorsRate, 154 Dims: module.Dims{ 155 {ID: "device_%s_media_errors", Name: "media", Algo: module.Incremental}, 156 }, 157 } 158 var deviceErrorLogEntriesRateChartTmpl = module.Chart{ 159 ID: "device_%s_error_log_entries_rate", 160 Title: "Error log entries", 161 Units: "entries/s", 162 Fam: "error log", 163 Ctx: "nvme.device_error_log_entries_rate", 164 Priority: prioDeviceErrorLogEntriesRate, 165 Dims: module.Dims{ 166 {ID: "device_%s_num_err_log_entries", Name: "error_log", Algo: module.Incremental}, 167 }, 168 } 169 var deviceWarnCompositeTemperatureTimeChartTmpl = module.Chart{ 170 ID: "device_%s_warning_composite_temperature_time", 171 Title: "Warning composite temperature time", 172 Units: "seconds", 173 Fam: "warn temp time", 174 Ctx: "nvme.device_warning_composite_temperature_time", 175 Priority: prioDeviceWarningCompositeTemperatureTime, 176 Dims: module.Dims{ 177 {ID: "device_%s_warning_temp_time", Name: "wctemp"}, 178 }, 179 } 180 var deviceCritCompositeTemperatureTimeChartTmpl = module.Chart{ 181 ID: "device_%s_critical_composite_temperature_time", 182 Title: "Critical composite temperature time", 183 Units: "seconds", 184 Fam: "crit temp time", 185 Ctx: "nvme.device_critical_composite_temperature_time", 186 Priority: prioDeviceCriticalCompositeTemperatureTime, 187 Dims: module.Dims{ 188 {ID: "device_%s_critical_comp_time", Name: "cctemp"}, 189 }, 190 } 191 var ( 192 deviceThmTemp1TransitionsRateChartTmpl = module.Chart{ 193 ID: "device_%s_thm_temp1_transitions_rate", 194 Title: "Thermal management temp1 transitions", 195 Units: "transitions/s", 196 Fam: "thermal mgmt transitions", 197 Ctx: "nvme.device_thermal_mgmt_temp1_transitions_rate", 198 Priority: prioDeviceThmTemp1TransitionsCount, 199 Dims: module.Dims{ 200 {ID: "device_%s_thm_temp1_trans_count", Name: "temp1", Algo: module.Incremental}, 201 }, 202 } 203 deviceThmTemp2TransitionsRateChartTmpl = module.Chart{ 204 ID: "device_%s_thm_temp2_transitions_rate", 205 Title: "Thermal management temp2 transitions", 206 Units: "transitions/s", 207 Fam: "thermal mgmt transitions", 208 Ctx: "nvme.device_thermal_mgmt_temp2_transitions_rate", 209 Priority: prioDeviceThmTemp2TransitionsRate, 210 Dims: module.Dims{ 211 {ID: "device_%s_thm_temp2_trans_count", Name: "temp2", Algo: module.Incremental}, 212 }, 213 } 214 ) 215 var ( 216 deviceThmTemp1TimeChartTmpl = module.Chart{ 217 ID: "device_%s_thm_temp1_time", 218 Title: "Thermal management temp1 time", 219 Units: "seconds", 220 Fam: "thermal mgmt time", 221 Ctx: "nvme.device_thermal_mgmt_temp1_time", 222 Priority: prioDeviceThmTemp1Time, 223 Dims: module.Dims{ 224 {ID: "device_%s_thm_temp1_total_time", Name: "temp1"}, 225 }, 226 } 227 deviceThmTemp2TimeChartTmpl = module.Chart{ 228 ID: "device_%s_thm_temp2_time", 229 Title: "Thermal management temp1 time", 230 Units: "seconds", 231 Fam: "thermal mgmt time", 232 Ctx: "nvme.device_thermal_mgmt_temp2_time", 233 Priority: prioDeviceThmTemp2Time, 234 Dims: module.Dims{ 235 {ID: "device_%s_thm_temp2_total_time", Name: "temp2"}, 236 }, 237 } 238 ) 239 240 func (n *NVMe) addDeviceCharts(device string) { 241 charts := deviceChartsTmpl.Copy() 242 243 for _, chart := range *charts { 244 chart.ID = fmt.Sprintf(chart.ID, device) 245 chart.Labels = []module.Label{ 246 {Key: "device", Value: device}, 247 } 248 for _, dim := range chart.Dims { 249 dim.ID = fmt.Sprintf(dim.ID, device) 250 } 251 } 252 253 if err := n.Charts().Add(*charts...); err != nil { 254 n.Warning(err) 255 } 256 } 257 258 func (n *NVMe) removeDeviceCharts(device string) { 259 px := fmt.Sprintf("device_%s", device) 260 261 for _, chart := range *n.Charts() { 262 if strings.HasPrefix(chart.ID, px) { 263 chart.MarkRemove() 264 chart.MarkNotCreated() 265 } 266 } 267 }