github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/devices/gpu/nvidia/nvml/driver_linux.go (about) 1 package nvml 2 3 import ( 4 "github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml" 5 ) 6 7 // Initialize nvml library by locating nvml shared object file and calling ldopen 8 func (n *nvmlDriver) Initialize() error { 9 return nvml.Init() 10 } 11 12 // Shutdown stops any further interaction with nvml 13 func (n *nvmlDriver) Shutdown() error { 14 return nvml.Shutdown() 15 } 16 17 // SystemDriverVersion returns installed driver version 18 func (n *nvmlDriver) SystemDriverVersion() (string, error) { 19 return nvml.GetDriverVersion() 20 } 21 22 // DeviceCount reports number of available GPU devices 23 func (n *nvmlDriver) DeviceCount() (uint, error) { 24 return nvml.GetDeviceCount() 25 } 26 27 // DeviceInfoByIndex returns DeviceInfo for index GPU in system device list 28 func (n *nvmlDriver) DeviceInfoByIndex(index uint) (*DeviceInfo, error) { 29 device, err := nvml.NewDevice(index) 30 if err != nil { 31 return nil, err 32 } 33 deviceMode, err := device.GetDeviceMode() 34 if err != nil { 35 return nil, err 36 } 37 return &DeviceInfo{ 38 UUID: device.UUID, 39 Name: device.Model, 40 MemoryMiB: device.Memory, 41 PowerW: device.Power, 42 BAR1MiB: device.PCI.BAR1, 43 PCIBandwidthMBPerS: device.PCI.Bandwidth, 44 PCIBusID: device.PCI.BusID, 45 CoresClockMHz: device.Clocks.Cores, 46 MemoryClockMHz: device.Clocks.Memory, 47 DisplayState: deviceMode.DisplayInfo.Mode.String(), 48 PersistenceMode: deviceMode.Persistence.String(), 49 }, nil 50 } 51 52 // DeviceInfoByIndex returns DeviceInfo and DeviceStatus for index GPU in system device list 53 func (n *nvmlDriver) DeviceInfoAndStatusByIndex(index uint) (*DeviceInfo, *DeviceStatus, error) { 54 device, err := nvml.NewDevice(index) 55 if err != nil { 56 return nil, nil, err 57 } 58 status, err := device.Status() 59 if err != nil { 60 return nil, nil, err 61 } 62 return &DeviceInfo{ 63 UUID: device.UUID, 64 Name: device.Model, 65 MemoryMiB: device.Memory, 66 PowerW: device.Power, 67 BAR1MiB: device.PCI.BAR1, 68 PCIBandwidthMBPerS: device.PCI.Bandwidth, 69 PCIBusID: device.PCI.BusID, 70 CoresClockMHz: device.Clocks.Cores, 71 MemoryClockMHz: device.Clocks.Memory, 72 }, &DeviceStatus{ 73 TemperatureC: status.Temperature, 74 GPUUtilization: status.Utilization.GPU, 75 MemoryUtilization: status.Utilization.Memory, 76 EncoderUtilization: status.Utilization.Encoder, 77 DecoderUtilization: status.Utilization.Decoder, 78 UsedMemoryMiB: status.Memory.Global.Used, 79 ECCErrorsL1Cache: status.Memory.ECCErrors.L1Cache, 80 ECCErrorsL2Cache: status.Memory.ECCErrors.L2Cache, 81 ECCErrorsDevice: status.Memory.ECCErrors.Device, 82 PowerUsageW: status.Power, 83 BAR1UsedMiB: status.PCI.BAR1Used, 84 }, nil 85 }