github.com/MetalBlockchain/metalgo@v1.11.9/utils/resource/usage.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package resource 5 6 import ( 7 "math" 8 "strconv" 9 "sync" 10 "time" 11 12 "github.com/prometheus/client_golang/prometheus" 13 "github.com/shirou/gopsutil/cpu" 14 "github.com/shirou/gopsutil/process" 15 "go.uber.org/zap" 16 17 "github.com/MetalBlockchain/metalgo/utils/logging" 18 "github.com/MetalBlockchain/metalgo/utils/storage" 19 ) 20 21 var ( 22 lnHalf = math.Log(.5) 23 24 _ Manager = (*manager)(nil) 25 ) 26 27 type CPUUser interface { 28 // CPUUsage returns the number of CPU cores of usage this user has attributed 29 // to it. 30 // 31 // For example, if this user is reporting a process's CPU utilization and 32 // that process is currently using 150% CPU (i.e. one and a half cores of 33 // compute) then the return value will be 1.5. 34 CPUUsage() float64 35 } 36 37 type DiskUser interface { 38 // DiskUsage returns the number of bytes per second read from/written to 39 // disk recently. 40 DiskUsage() (read float64, write float64) 41 42 // returns number of bytes available in the db volume 43 AvailableDiskBytes() uint64 44 } 45 46 type User interface { 47 CPUUser 48 DiskUser 49 } 50 51 type ProcessTracker interface { 52 // TrackProcess adds [pid] to the list of processes that this tracker is 53 // currently managing. Duplicate requests are dropped. 54 TrackProcess(pid int) 55 56 // UntrackProcess removes [pid] from the list of processes that this tracker 57 // is currently managing. Untracking a currently untracked [pid] is a noop. 58 UntrackProcess(pid int) 59 } 60 61 type Manager interface { 62 User 63 ProcessTracker 64 65 // Shutdown allocated resources and stop tracking all processes. 66 Shutdown() 67 } 68 69 type manager struct { 70 log logging.Logger 71 processMetrics *metrics 72 73 processesLock sync.Mutex 74 processes map[int]*proc 75 76 usageLock sync.RWMutex 77 cpuUsage float64 78 // [readUsage] is the number of bytes/second read from disk recently. 79 readUsage float64 80 // [writeUsage] is the number of bytes/second written to disk recently. 81 writeUsage float64 82 83 availableDiskBytes uint64 84 85 closeOnce sync.Once 86 onClose chan struct{} 87 } 88 89 func NewManager( 90 log logging.Logger, 91 diskPath string, 92 frequency, 93 cpuHalflife, 94 diskHalflife time.Duration, 95 metricsRegisterer prometheus.Registerer, 96 ) (Manager, error) { 97 processMetrics, err := newMetrics(metricsRegisterer) 98 if err != nil { 99 return nil, err 100 } 101 102 m := &manager{ 103 log: log, 104 processMetrics: processMetrics, 105 processes: make(map[int]*proc), 106 onClose: make(chan struct{}), 107 availableDiskBytes: math.MaxUint64, 108 } 109 110 go m.update(diskPath, frequency, cpuHalflife, diskHalflife) 111 return m, nil 112 } 113 114 func (m *manager) CPUUsage() float64 { 115 m.usageLock.RLock() 116 defer m.usageLock.RUnlock() 117 118 return m.cpuUsage 119 } 120 121 func (m *manager) DiskUsage() (float64, float64) { 122 m.usageLock.RLock() 123 defer m.usageLock.RUnlock() 124 125 return m.readUsage, m.writeUsage 126 } 127 128 func (m *manager) AvailableDiskBytes() uint64 { 129 m.usageLock.RLock() 130 defer m.usageLock.RUnlock() 131 132 return m.availableDiskBytes 133 } 134 135 func (m *manager) TrackProcess(pid int) { 136 p, err := process.NewProcess(int32(pid)) 137 if err != nil { 138 return 139 } 140 141 process := &proc{ 142 p: p, 143 log: m.log, 144 } 145 146 m.processesLock.Lock() 147 m.processes[pid] = process 148 m.processesLock.Unlock() 149 } 150 151 func (m *manager) UntrackProcess(pid int) { 152 m.processesLock.Lock() 153 delete(m.processes, pid) 154 m.processesLock.Unlock() 155 } 156 157 func (m *manager) Shutdown() { 158 m.closeOnce.Do(func() { 159 close(m.onClose) 160 }) 161 } 162 163 func (m *manager) update(diskPath string, frequency, cpuHalflife, diskHalflife time.Duration) { 164 ticker := time.NewTicker(frequency) 165 defer ticker.Stop() 166 167 newCPUWeight, oldCPUWeight := getSampleWeights(frequency, cpuHalflife) 168 newDiskWeight, oldDiskWeight := getSampleWeights(frequency, diskHalflife) 169 170 frequencyInSeconds := frequency.Seconds() 171 for { 172 currentCPUUsage, currentReadUsage, currentWriteUsage := m.getActiveUsage(frequencyInSeconds) 173 currentScaledCPUUsage := newCPUWeight * currentCPUUsage 174 currentScaledReadUsage := newDiskWeight * currentReadUsage 175 currentScaledWriteUsage := newDiskWeight * currentWriteUsage 176 177 availableBytes, getBytesErr := storage.AvailableBytes(diskPath) 178 if getBytesErr != nil { 179 m.log.Verbo("failed to lookup resource", 180 zap.String("resource", "system disk"), 181 zap.String("path", diskPath), 182 zap.Error(getBytesErr), 183 ) 184 } 185 186 m.usageLock.Lock() 187 m.cpuUsage = oldCPUWeight*m.cpuUsage + currentScaledCPUUsage 188 m.readUsage = oldDiskWeight*m.readUsage + currentScaledReadUsage 189 m.writeUsage = oldDiskWeight*m.writeUsage + currentScaledWriteUsage 190 191 if getBytesErr == nil { 192 m.availableDiskBytes = availableBytes 193 } 194 195 m.usageLock.Unlock() 196 197 select { 198 case <-ticker.C: 199 case <-m.onClose: 200 return 201 } 202 } 203 } 204 205 // Returns: 206 // 1. Current CPU usage by all processes. 207 // 2. Current bytes/sec read from disk by all processes. 208 // 3. Current bytes/sec written to disk by all processes. 209 func (m *manager) getActiveUsage(secondsSinceLastUpdate float64) (float64, float64, float64) { 210 m.processesLock.Lock() 211 defer m.processesLock.Unlock() 212 213 var ( 214 totalCPU float64 215 totalRead float64 216 totalWrite float64 217 ) 218 for _, p := range m.processes { 219 cpu, read, write := p.getActiveUsage(secondsSinceLastUpdate) 220 totalCPU += cpu 221 totalRead += read 222 totalWrite += write 223 224 processIDStr := strconv.Itoa(int(p.p.Pid)) 225 m.processMetrics.numCPUCycles.WithLabelValues(processIDStr).Set(p.lastTotalCPU) 226 m.processMetrics.numDiskReads.WithLabelValues(processIDStr).Set(float64(p.numReads)) 227 m.processMetrics.numDiskReadBytes.WithLabelValues(processIDStr).Set(float64(p.lastReadBytes)) 228 m.processMetrics.numDiskWrites.WithLabelValues(processIDStr).Set(float64(p.numWrites)) 229 m.processMetrics.numDiskWritesBytes.WithLabelValues(processIDStr).Set(float64(p.lastWriteBytes)) 230 } 231 232 return totalCPU, totalRead, totalWrite 233 } 234 235 type proc struct { 236 p *process.Process 237 log logging.Logger 238 239 initialized bool 240 241 // [lastTotalCPU] is the most recent measurement of total CPU usage. 242 lastTotalCPU float64 243 244 // [numReads] is the total number of disk reads performed. 245 numReads uint64 246 // [lastReadBytes] is the most recent measurement of total disk bytes read. 247 lastReadBytes uint64 248 249 // [numWrites] is the total number of disk writes performed. 250 numWrites uint64 251 // [lastWriteBytes] is the most recent measurement of total disk bytes 252 // written. 253 lastWriteBytes uint64 254 } 255 256 func (p *proc) getActiveUsage(secondsSinceLastUpdate float64) (float64, float64, float64) { 257 // If there is an error tracking the CPU/disk utilization of a process, 258 // assume that the utilization is 0. 259 times, err := p.p.Times() 260 if err != nil { 261 p.log.Verbo("failed to lookup resource", 262 zap.String("resource", "process CPU"), 263 zap.Int32("pid", p.p.Pid), 264 zap.Error(err), 265 ) 266 times = &cpu.TimesStat{} 267 } 268 269 // Note: IOCounters is not implemented on macos and therefore always returns 270 // an error on macos. 271 io, err := p.p.IOCounters() 272 if err != nil { 273 p.log.Verbo("failed to lookup resource", 274 zap.String("resource", "process IO"), 275 zap.Int32("pid", p.p.Pid), 276 zap.Error(err), 277 ) 278 io = &process.IOCountersStat{} 279 } 280 281 var ( 282 cpu float64 283 read float64 284 write float64 285 ) 286 totalCPU := times.Total() 287 if p.initialized { 288 if totalCPU > p.lastTotalCPU { 289 newCPU := totalCPU - p.lastTotalCPU 290 cpu = newCPU / secondsSinceLastUpdate 291 } 292 if io.ReadBytes > p.lastReadBytes { 293 newRead := io.ReadBytes - p.lastReadBytes 294 read = float64(newRead) / secondsSinceLastUpdate 295 } 296 if io.WriteBytes > p.lastWriteBytes { 297 newWrite := io.WriteBytes - p.lastWriteBytes 298 write = float64(newWrite) / secondsSinceLastUpdate 299 } 300 } 301 302 p.initialized = true 303 p.lastTotalCPU = totalCPU 304 p.numReads = io.ReadCount 305 p.lastReadBytes = io.ReadBytes 306 p.numWrites = io.WriteCount 307 p.lastWriteBytes = io.WriteBytes 308 309 return cpu, read, write 310 } 311 312 // getSampleWeights converts the frequency of CPU sampling and the halflife of 313 // the CPU sample's usefulness into weights to scale the newly sampled point and 314 // previously samples. 315 func getSampleWeights(frequency, halflife time.Duration) (float64, float64) { 316 halflifeInSamples := float64(halflife) / float64(frequency) 317 oldWeight := math.Exp(lnHalf / halflifeInSamples) 318 newWeight := 1 - oldWeight 319 return newWeight, oldWeight 320 }