github.com/livekit/protocol@v1.39.3/utils/hwstats/cpu.go (about) 1 // Copyright 2023 LiveKit, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hwstats 16 17 import ( 18 "time" 19 20 "github.com/frostbyte73/core" 21 "github.com/prometheus/procfs" 22 "go.uber.org/atomic" 23 24 "github.com/livekit/protocol/logger" 25 ) 26 27 // This object returns cgroup quota aware cpu stats. On other systems than Linux, 28 // it falls back to full system stats 29 30 type platformCPUMonitor interface { 31 getCPUIdle() (float64, error) 32 numCPU() float64 33 } 34 35 type CPUStats struct { 36 idleCPUs atomic.Float64 37 platform platformCPUMonitor 38 39 idleCallback func(idle float64) 40 procCallback func(*ProcStats) 41 warningThrottle core.Throttle 42 closeChan chan struct{} 43 } 44 45 type ProcStats struct { 46 CpuIdle float64 47 Cpu map[int]float64 48 MemoryTotal int 49 Memory map[int]int 50 } 51 52 func NewCPUStats(idleUpdateCallback func(idle float64)) (*CPUStats, error) { 53 p, err := newPlatformCPUMonitor() 54 if err != nil { 55 return nil, err 56 } 57 58 c := &CPUStats{ 59 platform: p, 60 warningThrottle: core.NewThrottle(time.Minute), 61 idleCallback: idleUpdateCallback, 62 closeChan: make(chan struct{}), 63 } 64 65 go c.monitorCPULoad() 66 67 return c, nil 68 } 69 70 func NewProcMonitor(onUpdate func(*ProcStats)) (*CPUStats, error) { 71 p, err := newPlatformCPUMonitor() 72 if err != nil { 73 return nil, err 74 } 75 76 c := &CPUStats{ 77 platform: p, 78 warningThrottle: core.NewThrottle(time.Minute), 79 procCallback: onUpdate, 80 closeChan: make(chan struct{}), 81 } 82 83 go c.monitorProcesses() 84 85 return c, nil 86 } 87 88 func (c *CPUStats) GetCPUIdle() float64 { 89 return c.idleCPUs.Load() 90 } 91 92 func (c *CPUStats) NumCPU() float64 { 93 return c.platform.numCPU() 94 } 95 96 func (c *CPUStats) GetCPULoad() float64 { 97 var cpuLoad float64 98 cpuIdle := c.GetCPUIdle() 99 nCPU := c.NumCPU() 100 if nCPU > 0 && cpuIdle > 0 { 101 cpuLoad = 1 - (cpuIdle / c.NumCPU()) 102 } 103 return cpuLoad 104 } 105 106 func (c *CPUStats) Stop() { 107 close(c.closeChan) 108 } 109 110 func (c *CPUStats) monitorCPULoad() { 111 ticker := time.NewTicker(time.Second) 112 defer ticker.Stop() 113 114 for { 115 select { 116 case <-c.closeChan: 117 return 118 case <-ticker.C: 119 idle, err := c.platform.getCPUIdle() 120 if err != nil { 121 logger.Errorw("failed retrieving CPU idle", err) 122 continue 123 } 124 125 c.idleCPUs.Store(idle) 126 idleRatio := idle / c.platform.numCPU() 127 128 if idleRatio < 0.1 { 129 c.warningThrottle(func() { logger.Infow("high cpu load", "load", 1-idleRatio) }) 130 } 131 132 if c.idleCallback != nil { 133 c.idleCallback(idle) 134 } 135 } 136 } 137 } 138 139 func (c *CPUStats) monitorProcesses() { 140 numCPU := c.platform.numCPU() 141 pageSize := getPageSize() 142 143 fs, err := procfs.NewFS(procfs.DefaultMountPoint) 144 if err != nil { 145 logger.Errorw("failed to read proc fs", err) 146 return 147 } 148 hostCPU, err := getHostCPUCount(fs) 149 if err != nil { 150 logger.Errorw("failed to read pod cpu count", err) 151 return 152 } 153 154 self, err := fs.Self() 155 if err != nil { 156 logger.Errorw("failed to read self", err) 157 return 158 } 159 160 ticker := time.NewTicker(time.Second) 161 defer ticker.Stop() 162 163 var prevTotalTime float64 164 var prevStats map[int]procfs.ProcStat 165 for { 166 select { 167 case <-c.closeChan: 168 return 169 case <-ticker.C: 170 procStats := make(map[int]procfs.ProcStat) 171 procs, err := procfs.AllProcs() 172 if err != nil { 173 logger.Errorw("failed to read processes", err) 174 continue 175 } 176 177 total, err := fs.Stat() 178 if err != nil { 179 logger.Errorw("failed to read stats", err) 180 continue 181 } 182 183 ppids := make(map[int]int) 184 for _, proc := range procs { 185 stat, err := proc.Stat() 186 if err != nil { 187 continue 188 } 189 190 procStats[proc.PID] = stat 191 if proc.PID != self.PID { 192 ppids[proc.PID] = stat.PPID 193 } 194 } 195 196 totalHostTime := total.CPUTotal.Idle + total.CPUTotal.Iowait + 197 total.CPUTotal.User + total.CPUTotal.Nice + total.CPUTotal.System + 198 total.CPUTotal.IRQ + total.CPUTotal.SoftIRQ + total.CPUTotal.Steal 199 200 stats := &ProcStats{ 201 CpuIdle: numCPU, 202 Cpu: make(map[int]float64), 203 MemoryTotal: 0, 204 Memory: make(map[int]int), 205 } 206 207 for pid, stat := range procStats { 208 // process usage as percent of total host cpu 209 procPercentUsage := float64(stat.UTime + stat.STime - prevStats[pid].UTime - prevStats[pid].STime) 210 if procPercentUsage == 0 { 211 continue 212 } 213 214 for ppids[pid] != self.PID && ppids[pid] != 0 { 215 // bundle usage up to first child of main go process 216 pid = ppids[pid] 217 } 218 219 cpu := hostCPU * procPercentUsage / 100 / (totalHostTime - prevTotalTime) 220 stats.Cpu[pid] += cpu 221 stats.CpuIdle -= cpu 222 223 memory := stat.RSS * pageSize 224 stats.Memory[pid] += memory 225 stats.MemoryTotal += memory 226 } 227 228 c.idleCPUs.Store(stats.CpuIdle) 229 230 if c.procCallback != nil { 231 c.procCallback(stats) 232 } 233 234 prevTotalTime = totalHostTime 235 prevStats = procStats 236 } 237 } 238 }