github.com/netdata/go.d.plugin@v0.58.1/modules/nvidia_smi/collect_csv.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package nvidia_smi 4 5 import ( 6 "bufio" 7 "bytes" 8 "encoding/csv" 9 "errors" 10 "fmt" 11 "io" 12 "regexp" 13 "strconv" 14 "strings" 15 ) 16 17 // use of property aliases is not implemented ('"<property>" or "<alias>"' in help-query-gpu) 18 var knownProperties = map[string]bool{ 19 "uuid": true, 20 "name": true, 21 "fan.speed": true, 22 "pstate": true, 23 "utilization.gpu": true, 24 "utilization.memory": true, 25 "memory.used": true, 26 "memory.free": true, 27 "memory.reserved": true, 28 "temperature.gpu": true, 29 "clocks.current.graphics": true, 30 "clocks.current.video": true, 31 "clocks.current.sm": true, 32 "clocks.current.memory": true, 33 "power.draw": true, 34 } 35 36 var reHelpProperty = regexp.MustCompile(`"([a-zA-Z_.]+)"`) 37 38 func (nv *NvidiaSMI) collectGPUInfoCSV(mx map[string]int64) error { 39 if len(nv.gpuQueryProperties) == 0 { 40 bs, err := nv.exec.queryHelpQueryGPU() 41 if err != nil { 42 return err 43 } 44 45 sc := bufio.NewScanner(bytes.NewBuffer(bs)) 46 47 for sc.Scan() { 48 if !strings.HasPrefix(sc.Text(), "\"") { 49 continue 50 } 51 matches := reHelpProperty.FindAllString(sc.Text(), -1) 52 if len(matches) == 0 { 53 continue 54 } 55 for _, v := range matches { 56 if v = strings.Trim(v, "\""); knownProperties[v] { 57 nv.gpuQueryProperties = append(nv.gpuQueryProperties, v) 58 } 59 } 60 } 61 nv.Debugf("found query GPU properties: %v", nv.gpuQueryProperties) 62 } 63 64 bs, err := nv.exec.queryGPUInfoCSV(nv.gpuQueryProperties) 65 if err != nil { 66 return err 67 } 68 69 nv.Debugf("GPU info:\n%s", bs) 70 71 r := csv.NewReader(bytes.NewBuffer(bs)) 72 r.Comma = ',' 73 r.ReuseRecord = true 74 r.TrimLeadingSpace = true 75 76 // skip headers 77 if _, err := r.Read(); err != nil && err != io.EOF { 78 return err 79 } 80 81 var gpusInfo []csvGPUInfo 82 for { 83 record, err := r.Read() 84 if err != nil { 85 if errors.Is(err, io.EOF) { 86 break 87 } 88 return err 89 } 90 91 if len(record) != len(nv.gpuQueryProperties) { 92 return fmt.Errorf("record values (%d) != queried properties (%d)", len(record), len(nv.gpuQueryProperties)) 93 } 94 95 var gpu csvGPUInfo 96 for i, v := range record { 97 switch nv.gpuQueryProperties[i] { 98 case "uuid": 99 gpu.uuid = v 100 case "name": 101 gpu.name = v 102 case "fan.speed": 103 gpu.fanSpeed = v 104 case "pstate": 105 gpu.pstate = v 106 case "utilization.gpu": 107 gpu.utilizationGPU = v 108 case "utilization.memory": 109 gpu.utilizationMemory = v 110 case "memory.used": 111 gpu.memoryUsed = v 112 case "memory.free": 113 gpu.memoryFree = v 114 case "memory.reserved": 115 gpu.memoryReserved = v 116 case "temperature.gpu": 117 gpu.temperatureGPU = v 118 case "clocks.current.graphics": 119 gpu.clocksCurrentGraphics = v 120 case "clocks.current.video": 121 gpu.clocksCurrentVideo = v 122 case "clocks.current.sm": 123 gpu.clocksCurrentSM = v 124 case "clocks.current.memory": 125 gpu.clocksCurrentMemory = v 126 case "power.draw": 127 gpu.powerDraw = v 128 } 129 } 130 gpusInfo = append(gpusInfo, gpu) 131 } 132 133 seen := make(map[string]bool) 134 135 for _, gpu := range gpusInfo { 136 if !isValidValue(gpu.uuid) || !isValidValue(gpu.name) { 137 continue 138 } 139 140 px := "gpu_" + gpu.uuid + "_" 141 142 seen[px] = true 143 144 if !nv.gpus[px] { 145 nv.gpus[px] = true 146 nv.addGPUCSVCharts(gpu) 147 } 148 149 addMetric(mx, px+"fan_speed_perc", gpu.fanSpeed, 0) 150 addMetric(mx, px+"gpu_utilization", gpu.utilizationGPU, 0) 151 addMetric(mx, px+"mem_utilization", gpu.utilizationMemory, 0) 152 addMetric(mx, px+"frame_buffer_memory_usage_free", gpu.memoryFree, 1024*1024) // MiB => bytes 153 addMetric(mx, px+"frame_buffer_memory_usage_used", gpu.memoryUsed, 1024*1024) // MiB => bytes 154 addMetric(mx, px+"frame_buffer_memory_usage_reserved", gpu.memoryReserved, 1024*1024) // MiB => bytes 155 addMetric(mx, px+"temperature", gpu.temperatureGPU, 0) 156 addMetric(mx, px+"graphics_clock", gpu.clocksCurrentGraphics, 0) 157 addMetric(mx, px+"video_clock", gpu.clocksCurrentVideo, 0) 158 addMetric(mx, px+"sm_clock", gpu.clocksCurrentSM, 0) 159 addMetric(mx, px+"mem_clock", gpu.clocksCurrentMemory, 0) 160 addMetric(mx, px+"power_draw", gpu.powerDraw, 0) 161 for i := 0; i < 16; i++ { 162 if s := "P" + strconv.Itoa(i); gpu.pstate == s { 163 mx[px+"performance_state_"+s] = 1 164 } else { 165 mx[px+"performance_state_"+s] = 0 166 } 167 } 168 } 169 170 for px := range nv.gpus { 171 if !seen[px] { 172 delete(nv.gpus, px) 173 nv.removeCharts(px) 174 } 175 } 176 177 return nil 178 } 179 180 type ( 181 csvGPUInfo struct { 182 uuid string 183 name string 184 fanSpeed string 185 pstate string 186 utilizationGPU string 187 utilizationMemory string 188 memoryUsed string 189 memoryFree string 190 memoryReserved string 191 temperatureGPU string 192 clocksCurrentGraphics string 193 clocksCurrentVideo string 194 clocksCurrentSM string 195 clocksCurrentMemory string 196 powerDraw string 197 } 198 )