github.com/livekit/protocol@v1.16.1-0.20240517185851-47e4c6bba773/utils/hwstats/cpu.go (about) 1 // Copyright 2023 LiveKit, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hwstats 16 17 import ( 18 "time" 19 20 "github.com/frostbyte73/core" 21 "github.com/prometheus/procfs" 22 "go.uber.org/atomic" 23 24 "github.com/livekit/protocol/logger" 25 ) 26 27 // This object returns cgroup quota aware cpu stats. On other systems than Linux, 28 // it falls back to full system stats 29 30 type platformCPUMonitor interface { 31 getCPUIdle() (float64, error) 32 numCPU() float64 33 } 34 35 type CPUStats struct { 36 idleCPUs atomic.Float64 37 platform platformCPUMonitor 38 39 idleCallback func(idle float64) 40 procCallback func(idle float64, usage map[int]float64) 41 warningThrottle core.Throttle 42 closeChan chan struct{} 43 } 44 45 func NewCPUStats(idleUpdateCallback func(idle float64)) (*CPUStats, error) { 46 p, err := newPlatformCPUMonitor() 47 if err != nil { 48 return nil, err 49 } 50 51 c := &CPUStats{ 52 platform: p, 53 warningThrottle: core.NewThrottle(time.Minute), 54 idleCallback: idleUpdateCallback, 55 closeChan: make(chan struct{}), 56 } 57 58 go c.monitorCPULoad() 59 60 return c, nil 61 } 62 63 func NewProcCPUStats(procUpdateCallback func(idle float64, usage map[int]float64)) (*CPUStats, error) { 64 p, err := newPlatformCPUMonitor() 65 if err != nil { 66 return nil, err 67 } 68 69 c := &CPUStats{ 70 platform: p, 71 warningThrottle: core.NewThrottle(time.Minute), 72 procCallback: procUpdateCallback, 73 closeChan: make(chan struct{}), 74 } 75 76 go c.monitorProcCPULoad() 77 78 return c, nil 79 } 80 81 func (c *CPUStats) GetCPUIdle() float64 { 82 return c.idleCPUs.Load() 83 } 84 85 func (c *CPUStats) NumCPU() float64 { 86 return c.platform.numCPU() 87 } 88 89 func (c *CPUStats) Stop() { 90 close(c.closeChan) 91 } 92 93 func (c *CPUStats) monitorCPULoad() { 94 ticker := time.NewTicker(time.Second) 95 defer ticker.Stop() 96 97 for { 98 select { 99 case <-c.closeChan: 100 return 101 case <-ticker.C: 102 idle, err := c.platform.getCPUIdle() 103 if err != nil { 104 logger.Errorw("failed retrieving CPU idle", err) 105 continue 106 } 107 108 c.idleCPUs.Store(idle) 109 idleRatio := idle / c.platform.numCPU() 110 111 if idleRatio < 0.1 { 112 c.warningThrottle(func() { logger.Infow("high cpu load", "load", 1-idleRatio) }) 113 } 114 115 if c.idleCallback != nil { 116 c.idleCallback(idle) 117 } 118 } 119 } 120 } 121 122 func (c *CPUStats) monitorProcCPULoad() { 123 numCPU := c.platform.numCPU() 124 125 fs, err := procfs.NewFS(procfs.DefaultMountPoint) 126 if err != nil { 127 logger.Errorw("failed to read proc fs", err) 128 return 129 } 130 hostCPU, err := getHostCPUCount(fs) 131 if err != nil { 132 logger.Errorw("failed to read pod cpu count", err) 133 return 134 } 135 136 self, err := fs.Self() 137 if err != nil { 138 logger.Errorw("failed to read self", err) 139 return 140 } 141 142 ticker := time.NewTicker(time.Second) 143 defer ticker.Stop() 144 145 var prevTotalTime float64 146 var prevStats map[int]procfs.ProcStat 147 for { 148 select { 149 case <-c.closeChan: 150 return 151 case <-ticker.C: 152 procStats := make(map[int]procfs.ProcStat) 153 procs, err := procfs.AllProcs() 154 if err != nil { 155 logger.Errorw("failed to read processes", err) 156 continue 157 } 158 159 total, err := fs.Stat() 160 if err != nil { 161 logger.Errorw("failed to read stats", err) 162 continue 163 } 164 165 ppids := make(map[int]int) 166 for _, proc := range procs { 167 stat, err := proc.Stat() 168 if err != nil { 169 continue 170 } 171 172 procStats[proc.PID] = stat 173 if proc.PID != self.PID { 174 ppids[proc.PID] = stat.PPID 175 } 176 } 177 178 totalHostTime := total.CPUTotal.Idle + total.CPUTotal.Iowait + 179 total.CPUTotal.User + total.CPUTotal.Nice + total.CPUTotal.System + 180 total.CPUTotal.IRQ + total.CPUTotal.SoftIRQ + total.CPUTotal.Steal 181 182 usage := make(map[int]float64) 183 podUsage := 0.0 184 for pid, stat := range procStats { 185 // process usage as percent of total host cpu 186 procPercentUsage := float64(stat.UTime + stat.STime - prevStats[pid].UTime - prevStats[pid].STime) 187 if procPercentUsage == 0 { 188 continue 189 } 190 191 for ppids[pid] != self.PID && ppids[pid] != 0 { 192 // bundle usage up to first child of main go process 193 pid = ppids[pid] 194 } 195 196 procUsage := hostCPU * procPercentUsage / 100 / (totalHostTime - prevTotalTime) 197 usage[pid] += procUsage 198 podUsage += procUsage 199 } 200 201 idle := numCPU - podUsage 202 c.idleCPUs.Store(idle) 203 204 if c.procCallback != nil { 205 c.procCallback(idle, usage) 206 } 207 208 prevTotalTime = totalHostTime 209 prevStats = procStats 210 } 211 } 212 }