github.com/netdata/go.d.plugin@v0.58.1/modules/nvme/collect.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package nvme
     4  
     5  import (
     6  	"errors"
     7  	"fmt"
     8  	"path/filepath"
     9  	"strconv"
    10  	"time"
    11  )
    12  
    13  func (n *NVMe) collect() (map[string]int64, error) {
    14  	if n.exec == nil {
    15  		return nil, errors.New("nvme-cli is not initialized (nil)")
    16  	}
    17  
    18  	now := time.Now()
    19  	if n.forceListDevices || now.Sub(n.listDevicesTime) > n.listDevicesEvery {
    20  		n.forceListDevices = false
    21  		n.listDevicesTime = now
    22  		if err := n.listNVMeDevices(); err != nil {
    23  			return nil, err
    24  		}
    25  	}
    26  
    27  	mx := make(map[string]int64)
    28  
    29  	for path := range n.devicePaths {
    30  		if err := n.collectNVMeDevice(mx, path); err != nil {
    31  			n.Error(err)
    32  			n.forceListDevices = true
    33  			continue
    34  		}
    35  	}
    36  
    37  	return mx, nil
    38  }
    39  
    40  func (n *NVMe) collectNVMeDevice(mx map[string]int64, devicePath string) error {
    41  	stats, err := n.exec.smartLog(devicePath)
    42  	if err != nil {
    43  		return fmt.Errorf("exec nvme smart-log for '%s': %v", devicePath, err)
    44  	}
    45  
    46  	device := extractDeviceFromPath(devicePath)
    47  
    48  	mx["device_"+device+"_temperature"] = int64(float64(parseValue(stats.Temperature)) - 273.15) // Kelvin => Celsius
    49  	mx["device_"+device+"_percentage_used"] = parseValue(stats.PercentUsed)
    50  	mx["device_"+device+"_available_spare"] = parseValue(stats.AvailSpare)
    51  	mx["device_"+device+"_data_units_read"] = parseValue(stats.DataUnitsRead) * 1000 * 512       // units => bytes
    52  	mx["device_"+device+"_data_units_written"] = parseValue(stats.DataUnitsWritten) * 1000 * 512 // units => bytes
    53  	mx["device_"+device+"_host_read_commands"] = parseValue(stats.HostReadCommands)
    54  	mx["device_"+device+"_host_write_commands"] = parseValue(stats.HostWriteCommands)
    55  	mx["device_"+device+"_power_cycles"] = parseValue(stats.PowerCycles)
    56  	mx["device_"+device+"_power_on_time"] = parseValue(stats.PowerOnHours) * 3600 // hours => seconds
    57  	mx["device_"+device+"_unsafe_shutdowns"] = parseValue(stats.UnsafeShutdowns)
    58  	mx["device_"+device+"_media_errors"] = parseValue(stats.MediaErrors)
    59  	mx["device_"+device+"_num_err_log_entries"] = parseValue(stats.NumErrLogEntries)
    60  	mx["device_"+device+"_controller_busy_time"] = parseValue(stats.ControllerBusyTime) * 60 // minutes => seconds
    61  	mx["device_"+device+"_warning_temp_time"] = parseValue(stats.WarningTempTime) * 60       // minutes => seconds
    62  	mx["device_"+device+"_critical_comp_time"] = parseValue(stats.CriticalCompTime) * 60     // minutes => seconds
    63  	mx["device_"+device+"_thm_temp1_trans_count"] = parseValue(stats.ThmTemp1TransCount)
    64  	mx["device_"+device+"_thm_temp2_trans_count"] = parseValue(stats.ThmTemp2TransCount)
    65  	mx["device_"+device+"_thm_temp1_total_time"] = parseValue(stats.ThmTemp1TotalTime) // seconds
    66  	mx["device_"+device+"_thm_temp2_total_time"] = parseValue(stats.ThmTemp2TotalTime) // seconds
    67  
    68  	mx["device_"+device+"_critical_warning_available_spare"] = boolToInt(parseValue(stats.CriticalWarning)&1 != 0)
    69  	mx["device_"+device+"_critical_warning_temp_threshold"] = boolToInt(parseValue(stats.CriticalWarning)&(1<<1) != 0)
    70  	mx["device_"+device+"_critical_warning_nvm_subsystem_reliability"] = boolToInt(parseValue(stats.CriticalWarning)&(1<<2) != 0)
    71  	mx["device_"+device+"_critical_warning_read_only"] = boolToInt(parseValue(stats.CriticalWarning)&(1<<3) != 0)
    72  	mx["device_"+device+"_critical_warning_volatile_mem_backup_failed"] = boolToInt(parseValue(stats.CriticalWarning)&(1<<4) != 0)
    73  	mx["device_"+device+"_critical_warning_persistent_memory_read_only"] = boolToInt(parseValue(stats.CriticalWarning)&(1<<5) != 0)
    74  
    75  	return nil
    76  }
    77  
    78  func (n *NVMe) listNVMeDevices() error {
    79  	devices, err := n.exec.list()
    80  	if err != nil {
    81  		return fmt.Errorf("exec nvme list: %v", err)
    82  	}
    83  
    84  	seen := make(map[string]bool)
    85  	for _, v := range devices.Devices {
    86  		device := extractDeviceFromPath(v.DevicePath)
    87  		seen[device] = true
    88  
    89  		if !n.devicePaths[v.DevicePath] {
    90  			n.devicePaths[v.DevicePath] = true
    91  			n.addDeviceCharts(device)
    92  		}
    93  	}
    94  	for path := range n.devicePaths {
    95  		device := extractDeviceFromPath(path)
    96  		if !seen[device] {
    97  			delete(n.devicePaths, device)
    98  			n.removeDeviceCharts(device)
    99  		}
   100  	}
   101  
   102  	return nil
   103  }
   104  
   105  func extractDeviceFromPath(devicePath string) string {
   106  	_, name := filepath.Split(devicePath)
   107  	return name
   108  }
   109  
   110  func boolToInt(v bool) int64 {
   111  	if v {
   112  		return 1
   113  	}
   114  	return 0
   115  }
   116  
   117  func parseValue(s nvmeNumber) int64 {
   118  	v, _ := strconv.ParseFloat(string(s), 64)
   119  	return int64(v)
   120  }