github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/devices/gpu/nvidia/nvml/driver_linux.go (about)

     1  package nvml
     2  
     3  import (
     4  	"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
     5  )
     6  
     7  // Initialize nvml library by locating nvml shared object file and calling ldopen
     8  func (n *nvmlDriver) Initialize() error {
     9  	return nvml.Init()
    10  }
    11  
    12  // Shutdown stops any further interaction with nvml
    13  func (n *nvmlDriver) Shutdown() error {
    14  	return nvml.Shutdown()
    15  }
    16  
    17  // SystemDriverVersion returns installed driver version
    18  func (n *nvmlDriver) SystemDriverVersion() (string, error) {
    19  	return nvml.GetDriverVersion()
    20  }
    21  
    22  // DeviceCount reports number of available GPU devices
    23  func (n *nvmlDriver) DeviceCount() (uint, error) {
    24  	return nvml.GetDeviceCount()
    25  }
    26  
    27  // DeviceInfoByIndex returns DeviceInfo for index GPU in system device list
    28  func (n *nvmlDriver) DeviceInfoByIndex(index uint) (*DeviceInfo, error) {
    29  	device, err := nvml.NewDevice(index)
    30  	if err != nil {
    31  		return nil, err
    32  	}
    33  	deviceMode, err := device.GetDeviceMode()
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  	return &DeviceInfo{
    38  		UUID:               device.UUID,
    39  		Name:               device.Model,
    40  		MemoryMiB:          device.Memory,
    41  		PowerW:             device.Power,
    42  		BAR1MiB:            device.PCI.BAR1,
    43  		PCIBandwidthMBPerS: device.PCI.Bandwidth,
    44  		PCIBusID:           device.PCI.BusID,
    45  		CoresClockMHz:      device.Clocks.Cores,
    46  		MemoryClockMHz:     device.Clocks.Memory,
    47  		DisplayState:       deviceMode.DisplayInfo.Mode.String(),
    48  		PersistenceMode:    deviceMode.Persistence.String(),
    49  	}, nil
    50  }
    51  
    52  // DeviceInfoByIndex returns DeviceInfo and DeviceStatus for index GPU in system device list
    53  func (n *nvmlDriver) DeviceInfoAndStatusByIndex(index uint) (*DeviceInfo, *DeviceStatus, error) {
    54  	device, err := nvml.NewDevice(index)
    55  	if err != nil {
    56  		return nil, nil, err
    57  	}
    58  	status, err := device.Status()
    59  	if err != nil {
    60  		return nil, nil, err
    61  	}
    62  	return &DeviceInfo{
    63  			UUID:               device.UUID,
    64  			Name:               device.Model,
    65  			MemoryMiB:          device.Memory,
    66  			PowerW:             device.Power,
    67  			BAR1MiB:            device.PCI.BAR1,
    68  			PCIBandwidthMBPerS: device.PCI.Bandwidth,
    69  			PCIBusID:           device.PCI.BusID,
    70  			CoresClockMHz:      device.Clocks.Cores,
    71  			MemoryClockMHz:     device.Clocks.Memory,
    72  		}, &DeviceStatus{
    73  			TemperatureC:       status.Temperature,
    74  			GPUUtilization:     status.Utilization.GPU,
    75  			MemoryUtilization:  status.Utilization.Memory,
    76  			EncoderUtilization: status.Utilization.Encoder,
    77  			DecoderUtilization: status.Utilization.Decoder,
    78  			UsedMemoryMiB:      status.Memory.Global.Used,
    79  			ECCErrorsL1Cache:   status.Memory.ECCErrors.L1Cache,
    80  			ECCErrorsL2Cache:   status.Memory.ECCErrors.L2Cache,
    81  			ECCErrorsDevice:    status.Memory.ECCErrors.Device,
    82  			PowerUsageW:        status.Power,
    83  			BAR1UsedMiB:        status.PCI.BAR1Used,
    84  		}, nil
    85  }