github.com/livekit/protocol@v1.39.3/utils/hwstats/cpu_linux.go (about)

     1  // Copyright 2023 LiveKit, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build linux
    16  
    17  package hwstats
    18  
    19  import (
    20  	"errors"
    21  	"os"
    22  	"regexp"
    23  	"runtime"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/prometheus/procfs"
    29  	"golang.org/x/sys/unix"
    30  
    31  	"github.com/livekit/protocol/logger"
    32  )
    33  
    34  var (
    35  	usageRegex = regexp.MustCompile("usage_usec ([0-9]+)")
    36  )
    37  
    38  const (
    39  	cpuStatsPathV1 = "/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage"
    40  	cpuStatsPathV2 = "/sys/fs/cgroup/cpu.stat"
    41  
    42  	numCPUPathV1Period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
    43  	numCPUPathV1Quota  = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
    44  	numCPUPathV2       = "/sys/fs/cgroup/cpu.max"
    45  )
    46  
    47  type cpuInfoGetter interface {
    48  	getTotalCPUTime() (int64, error)
    49  	numCPU() (float64, error)
    50  }
    51  
    52  type cgroupCPUMonitor struct {
    53  	lastSampleTime   int64
    54  	lastTotalCPUTime int64
    55  	nCPU             float64
    56  
    57  	cg cpuInfoGetter
    58  }
    59  
    60  func newPlatformCPUMonitor() (platformCPUMonitor, error) {
    61  	// probe for the cgroup version
    62  	var cg cpuInfoGetter
    63  	for k, v := range map[string]func() cpuInfoGetter{
    64  		cpuStatsPathV1: newCpuInfoGetterV1,
    65  		cpuStatsPathV2: newCpuInfoGetterV2,
    66  	} {
    67  		e, err := fileExists(k)
    68  		if err != nil {
    69  			return nil, err
    70  		}
    71  		if e {
    72  			cg = v()
    73  			break
    74  		}
    75  	}
    76  	if cg == nil {
    77  		logger.Infow("failed reading cgroup specific cpu stats, falling back to system wide implementation")
    78  		return newOSStatCPUMonitor()
    79  	}
    80  
    81  	cpu, err := cg.getTotalCPUTime()
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	nCPU, err := cg.numCPU()
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  
    91  	return &cgroupCPUMonitor{
    92  		lastSampleTime:   time.Now().UnixNano(),
    93  		lastTotalCPUTime: cpu,
    94  		nCPU:             nCPU,
    95  		cg:               cg,
    96  	}, nil
    97  }
    98  
    99  func (p *cgroupCPUMonitor) getCPUIdle() (float64, error) {
   100  	next, err := p.cg.getTotalCPUTime()
   101  	if err != nil {
   102  		return 0, err
   103  	}
   104  	t := time.Now().UnixNano()
   105  
   106  	duration := t - p.lastSampleTime
   107  	cpuTime := next - p.lastTotalCPUTime
   108  
   109  	busyRatio := float64(cpuTime) / float64(duration)
   110  	idleRatio := p.nCPU - busyRatio
   111  
   112  	// Clamp the value as we do not get all the timestamps at the same time
   113  	if idleRatio > p.nCPU {
   114  		idleRatio = p.nCPU
   115  	} else if idleRatio < 0 {
   116  		idleRatio = 0
   117  	}
   118  
   119  	p.lastSampleTime = t
   120  	p.lastTotalCPUTime = next
   121  
   122  	return idleRatio, nil
   123  }
   124  
   125  func (p *cgroupCPUMonitor) numCPU() float64 {
   126  	return p.nCPU
   127  }
   128  
   129  type cpuInfoGetterV1 struct{}
   130  
   131  func newCpuInfoGetterV1() cpuInfoGetter {
   132  	return &cpuInfoGetterV1{}
   133  }
   134  
   135  func (cg *cpuInfoGetterV1) getTotalCPUTime() (int64, error) {
   136  	b, err := os.ReadFile(cpuStatsPathV1)
   137  	if err != nil {
   138  		return 0, err
   139  	}
   140  
   141  	// Skip the trailing EOL
   142  	i, err := strconv.ParseInt(string(b[:len(b)-1]), 10, 64)
   143  	if err != nil {
   144  		return 0, err
   145  	}
   146  
   147  	return i, nil
   148  }
   149  
   150  func (cg *cpuInfoGetterV1) numCPU() (float64, error) {
   151  	quota, err := readIntFromFile(numCPUPathV1Quota)
   152  	if err != nil {
   153  		if errors.Is(err, os.ErrNotExist) {
   154  			// File may not exist in case of no quota
   155  			return float64(runtime.NumCPU()), nil
   156  		}
   157  
   158  		return 0, err
   159  	}
   160  
   161  	if quota < 0 {
   162  		// default
   163  		return float64(runtime.NumCPU()), nil
   164  	}
   165  
   166  	period, err := readIntFromFile(numCPUPathV1Period)
   167  	if err != nil {
   168  		return 0, err
   169  	}
   170  
   171  	if period <= 0 {
   172  		// default
   173  		return float64(runtime.NumCPU()), nil
   174  	}
   175  
   176  	return float64(quota) / float64(period), nil
   177  }
   178  
   179  type cpuInfoGetterV2 struct{}
   180  
   181  func newCpuInfoGetterV2() cpuInfoGetter {
   182  	return &cpuInfoGetterV2{}
   183  }
   184  
   185  func (cg *cpuInfoGetterV2) getTotalCPUTime() (int64, error) {
   186  	b, err := os.ReadFile(cpuStatsPathV2)
   187  	if err != nil {
   188  		return 0, err
   189  	}
   190  
   191  	m := usageRegex.FindSubmatch(b)
   192  	if len(m) <= 1 {
   193  		return 0, errors.New("could not parse cpu stats")
   194  	}
   195  
   196  	i, err := strconv.ParseInt(string(m[1]), 10, 64)
   197  	if err != nil {
   198  		return 0, err
   199  	}
   200  
   201  	// Caller expects time in ns
   202  	return i * 1000, nil
   203  }
   204  
   205  func (cg *cpuInfoGetterV2) numCPU() (float64, error) {
   206  	b, err := os.ReadFile(numCPUPathV2)
   207  	if err != nil {
   208  		if errors.Is(err, os.ErrNotExist) {
   209  			// File may not exist in case of no quota
   210  			return float64(runtime.NumCPU()), nil
   211  		}
   212  		return 0, err
   213  	}
   214  
   215  	s := strings.TrimSuffix(string(b), "\n")
   216  
   217  	m := strings.Split(s, " ")
   218  	if len(m) <= 1 {
   219  		return 0, errors.New("could not parse cpu stats")
   220  	}
   221  
   222  	if m[0] == "max" {
   223  		// No quota
   224  		return float64(runtime.NumCPU()), nil
   225  	}
   226  
   227  	quota, err := strconv.ParseInt(string(m[0]), 10, 64)
   228  	if err != nil {
   229  		return 0, err
   230  	}
   231  
   232  	period, err := strconv.ParseInt(string(m[1]), 10, 64)
   233  	if err != nil {
   234  		return 0, err
   235  	}
   236  
   237  	return float64(quota) / float64(period), nil
   238  }
   239  
   240  func fileExists(path string) (bool, error) {
   241  	_, err := os.Lstat(path)
   242  	switch {
   243  	case err == nil:
   244  		return true, nil
   245  	case errors.Is(err, os.ErrNotExist):
   246  		return false, nil
   247  	default:
   248  		return false, err
   249  	}
   250  }
   251  
   252  func readIntFromFile(filename string) (int, error) {
   253  	b, err := os.ReadFile(filename)
   254  	if err != nil {
   255  		return 0, err
   256  	}
   257  
   258  	// Remove trailing new line if any
   259  	s := strings.TrimSuffix(string(b), "\n")
   260  
   261  	// Remove trailing space if any
   262  	s = strings.TrimSuffix(s, " ")
   263  
   264  	return strconv.Atoi(s)
   265  }
   266  
   267  func getHostCPUCount(fs procfs.FS) (float64, error) {
   268  	cpuInfo, err := fs.CPUInfo()
   269  	if err != nil {
   270  		return 0, err
   271  	}
   272  	return float64(len(cpuInfo)), nil
   273  }
   274  
   275  func getPageSize() int {
   276  	return unix.Getpagesize()
   277  }