github.com/livekit/protocol@v1.39.3/utils/hwstats/cpu.go (about)

     1  // Copyright 2023 LiveKit, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hwstats
    16  
    17  import (
    18  	"time"
    19  
    20  	"github.com/frostbyte73/core"
    21  	"github.com/prometheus/procfs"
    22  	"go.uber.org/atomic"
    23  
    24  	"github.com/livekit/protocol/logger"
    25  )
    26  
    27  // This object returns cgroup quota aware cpu stats. On other systems than Linux,
    28  // it falls back to full system stats
    29  
    30  type platformCPUMonitor interface {
    31  	getCPUIdle() (float64, error)
    32  	numCPU() float64
    33  }
    34  
    35  type CPUStats struct {
    36  	idleCPUs atomic.Float64
    37  	platform platformCPUMonitor
    38  
    39  	idleCallback    func(idle float64)
    40  	procCallback    func(*ProcStats)
    41  	warningThrottle core.Throttle
    42  	closeChan       chan struct{}
    43  }
    44  
    45  type ProcStats struct {
    46  	CpuIdle     float64
    47  	Cpu         map[int]float64
    48  	MemoryTotal int
    49  	Memory      map[int]int
    50  }
    51  
    52  func NewCPUStats(idleUpdateCallback func(idle float64)) (*CPUStats, error) {
    53  	p, err := newPlatformCPUMonitor()
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	c := &CPUStats{
    59  		platform:        p,
    60  		warningThrottle: core.NewThrottle(time.Minute),
    61  		idleCallback:    idleUpdateCallback,
    62  		closeChan:       make(chan struct{}),
    63  	}
    64  
    65  	go c.monitorCPULoad()
    66  
    67  	return c, nil
    68  }
    69  
    70  func NewProcMonitor(onUpdate func(*ProcStats)) (*CPUStats, error) {
    71  	p, err := newPlatformCPUMonitor()
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  
    76  	c := &CPUStats{
    77  		platform:        p,
    78  		warningThrottle: core.NewThrottle(time.Minute),
    79  		procCallback:    onUpdate,
    80  		closeChan:       make(chan struct{}),
    81  	}
    82  
    83  	go c.monitorProcesses()
    84  
    85  	return c, nil
    86  }
    87  
    88  func (c *CPUStats) GetCPUIdle() float64 {
    89  	return c.idleCPUs.Load()
    90  }
    91  
    92  func (c *CPUStats) NumCPU() float64 {
    93  	return c.platform.numCPU()
    94  }
    95  
    96  func (c *CPUStats) GetCPULoad() float64 {
    97  	var cpuLoad float64
    98  	cpuIdle := c.GetCPUIdle()
    99  	nCPU := c.NumCPU()
   100  	if nCPU > 0 && cpuIdle > 0 {
   101  		cpuLoad = 1 - (cpuIdle / c.NumCPU())
   102  	}
   103  	return cpuLoad
   104  }
   105  
   106  func (c *CPUStats) Stop() {
   107  	close(c.closeChan)
   108  }
   109  
   110  func (c *CPUStats) monitorCPULoad() {
   111  	ticker := time.NewTicker(time.Second)
   112  	defer ticker.Stop()
   113  
   114  	for {
   115  		select {
   116  		case <-c.closeChan:
   117  			return
   118  		case <-ticker.C:
   119  			idle, err := c.platform.getCPUIdle()
   120  			if err != nil {
   121  				logger.Errorw("failed retrieving CPU idle", err)
   122  				continue
   123  			}
   124  
   125  			c.idleCPUs.Store(idle)
   126  			idleRatio := idle / c.platform.numCPU()
   127  
   128  			if idleRatio < 0.1 {
   129  				c.warningThrottle(func() { logger.Infow("high cpu load", "load", 1-idleRatio) })
   130  			}
   131  
   132  			if c.idleCallback != nil {
   133  				c.idleCallback(idle)
   134  			}
   135  		}
   136  	}
   137  }
   138  
   139  func (c *CPUStats) monitorProcesses() {
   140  	numCPU := c.platform.numCPU()
   141  	pageSize := getPageSize()
   142  
   143  	fs, err := procfs.NewFS(procfs.DefaultMountPoint)
   144  	if err != nil {
   145  		logger.Errorw("failed to read proc fs", err)
   146  		return
   147  	}
   148  	hostCPU, err := getHostCPUCount(fs)
   149  	if err != nil {
   150  		logger.Errorw("failed to read pod cpu count", err)
   151  		return
   152  	}
   153  
   154  	self, err := fs.Self()
   155  	if err != nil {
   156  		logger.Errorw("failed to read self", err)
   157  		return
   158  	}
   159  
   160  	ticker := time.NewTicker(time.Second)
   161  	defer ticker.Stop()
   162  
   163  	var prevTotalTime float64
   164  	var prevStats map[int]procfs.ProcStat
   165  	for {
   166  		select {
   167  		case <-c.closeChan:
   168  			return
   169  		case <-ticker.C:
   170  			procStats := make(map[int]procfs.ProcStat)
   171  			procs, err := procfs.AllProcs()
   172  			if err != nil {
   173  				logger.Errorw("failed to read processes", err)
   174  				continue
   175  			}
   176  
   177  			total, err := fs.Stat()
   178  			if err != nil {
   179  				logger.Errorw("failed to read stats", err)
   180  				continue
   181  			}
   182  
   183  			ppids := make(map[int]int)
   184  			for _, proc := range procs {
   185  				stat, err := proc.Stat()
   186  				if err != nil {
   187  					continue
   188  				}
   189  
   190  				procStats[proc.PID] = stat
   191  				if proc.PID != self.PID {
   192  					ppids[proc.PID] = stat.PPID
   193  				}
   194  			}
   195  
   196  			totalHostTime := total.CPUTotal.Idle + total.CPUTotal.Iowait +
   197  				total.CPUTotal.User + total.CPUTotal.Nice + total.CPUTotal.System +
   198  				total.CPUTotal.IRQ + total.CPUTotal.SoftIRQ + total.CPUTotal.Steal
   199  
   200  			stats := &ProcStats{
   201  				CpuIdle:     numCPU,
   202  				Cpu:         make(map[int]float64),
   203  				MemoryTotal: 0,
   204  				Memory:      make(map[int]int),
   205  			}
   206  
   207  			for pid, stat := range procStats {
   208  				// process usage as percent of total host cpu
   209  				procPercentUsage := float64(stat.UTime + stat.STime - prevStats[pid].UTime - prevStats[pid].STime)
   210  				if procPercentUsage == 0 {
   211  					continue
   212  				}
   213  
   214  				for ppids[pid] != self.PID && ppids[pid] != 0 {
   215  					// bundle usage up to first child of main go process
   216  					pid = ppids[pid]
   217  				}
   218  
   219  				cpu := hostCPU * procPercentUsage / 100 / (totalHostTime - prevTotalTime)
   220  				stats.Cpu[pid] += cpu
   221  				stats.CpuIdle -= cpu
   222  
   223  				memory := stat.RSS * pageSize
   224  				stats.Memory[pid] += memory
   225  				stats.MemoryTotal += memory
   226  			}
   227  
   228  			c.idleCPUs.Store(stats.CpuIdle)
   229  
   230  			if c.procCallback != nil {
   231  				c.procCallback(stats)
   232  			}
   233  
   234  			prevTotalTime = totalHostTime
   235  			prevStats = procStats
   236  		}
   237  	}
   238  }