github.com/livekit/protocol@v1.16.1-0.20240517185851-47e4c6bba773/utils/hwstats/cpu_linux.go (about)

     1  // Copyright 2023 LiveKit, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build linux
    16  
    17  package hwstats
    18  
    19  import (
    20  	"errors"
    21  	"os"
    22  	"regexp"
    23  	"runtime"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"github.com/prometheus/procfs"
    29  
    30  	"github.com/livekit/protocol/logger"
    31  )
    32  
    33  var (
    34  	usageRegex = regexp.MustCompile("usage_usec ([0-9]+)")
    35  )
    36  
    37  const (
    38  	cpuStatsPathV1 = "/sys/fs/cgroup/cpu,cpuacct/cpuacct.usage"
    39  	cpuStatsPathV2 = "/sys/fs/cgroup/cpu.stat"
    40  
    41  	numCPUPathV1Period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"
    42  	numCPUPathV1Quota  = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
    43  	numCPUPathV2       = "/sys/fs/cgroup/cpu.max"
    44  )
    45  
    46  type cpuInfoGetter interface {
    47  	getTotalCPUTime() (int64, error)
    48  	numCPU() (float64, error)
    49  }
    50  
    51  type cgroupCPUMonitor struct {
    52  	lastSampleTime   int64
    53  	lastTotalCPUTime int64
    54  	nCPU             float64
    55  
    56  	cg cpuInfoGetter
    57  }
    58  
    59  func newPlatformCPUMonitor() (platformCPUMonitor, error) {
    60  	// probe for the cgroup version
    61  	var cg cpuInfoGetter
    62  	for k, v := range map[string]func() cpuInfoGetter{
    63  		cpuStatsPathV1: newCpuInfoGetterV1,
    64  		cpuStatsPathV2: newCpuInfoGetterV2,
    65  	} {
    66  		e, err := fileExists(k)
    67  		if err != nil {
    68  			return nil, err
    69  		}
    70  		if e {
    71  			cg = v()
    72  			break
    73  		}
    74  	}
    75  	if cg == nil {
    76  		logger.Infow("failed reading cgroup specific cpu stats, falling back to system wide implementation")
    77  		return newOSStatCPUMonitor()
    78  	}
    79  
    80  	cpu, err := cg.getTotalCPUTime()
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  
    85  	nCPU, err := cg.numCPU()
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	return &cgroupCPUMonitor{
    91  		lastSampleTime:   time.Now().UnixNano(),
    92  		lastTotalCPUTime: cpu,
    93  		nCPU:             nCPU,
    94  		cg:               cg,
    95  	}, nil
    96  }
    97  
    98  func (p *cgroupCPUMonitor) getCPUIdle() (float64, error) {
    99  	next, err := p.cg.getTotalCPUTime()
   100  	if err != nil {
   101  		return 0, err
   102  	}
   103  	t := time.Now().UnixNano()
   104  
   105  	duration := t - p.lastSampleTime
   106  	cpuTime := next - p.lastTotalCPUTime
   107  
   108  	busyRatio := float64(cpuTime) / float64(duration)
   109  	idleRatio := p.nCPU - busyRatio
   110  
   111  	// Clamp the value as we do not get all the timestamps at the same time
   112  	if idleRatio > p.nCPU {
   113  		idleRatio = p.nCPU
   114  	} else if idleRatio < 0 {
   115  		idleRatio = 0
   116  	}
   117  
   118  	p.lastSampleTime = t
   119  	p.lastTotalCPUTime = next
   120  
   121  	return idleRatio, nil
   122  }
   123  
   124  func (p *cgroupCPUMonitor) numCPU() float64 {
   125  	return p.nCPU
   126  }
   127  
   128  type cpuInfoGetterV1 struct{}
   129  
   130  func newCpuInfoGetterV1() cpuInfoGetter {
   131  	return &cpuInfoGetterV1{}
   132  }
   133  
   134  func (cg *cpuInfoGetterV1) getTotalCPUTime() (int64, error) {
   135  	b, err := os.ReadFile(cpuStatsPathV1)
   136  	if err != nil {
   137  		return 0, err
   138  	}
   139  
   140  	// Skip the trailing EOL
   141  	i, err := strconv.ParseInt(string(b[:len(b)-1]), 10, 64)
   142  	if err != nil {
   143  		return 0, err
   144  	}
   145  
   146  	return i, nil
   147  }
   148  
   149  func (cg *cpuInfoGetterV1) numCPU() (float64, error) {
   150  	quota, err := readIntFromFile(numCPUPathV1Quota)
   151  	if err != nil {
   152  		if errors.Is(err, os.ErrNotExist) {
   153  			// File may not exist in case of no quota
   154  			return float64(runtime.NumCPU()), nil
   155  		}
   156  
   157  		return 0, err
   158  	}
   159  
   160  	if quota < 0 {
   161  		// default
   162  		return float64(runtime.NumCPU()), nil
   163  	}
   164  
   165  	period, err := readIntFromFile(numCPUPathV1Period)
   166  	if err != nil {
   167  		return 0, err
   168  	}
   169  
   170  	if period <= 0 {
   171  		// default
   172  		return float64(runtime.NumCPU()), nil
   173  	}
   174  
   175  	return float64(quota) / float64(period), nil
   176  }
   177  
   178  type cpuInfoGetterV2 struct{}
   179  
   180  func newCpuInfoGetterV2() cpuInfoGetter {
   181  	return &cpuInfoGetterV2{}
   182  }
   183  
   184  func (cg *cpuInfoGetterV2) getTotalCPUTime() (int64, error) {
   185  	b, err := os.ReadFile(cpuStatsPathV2)
   186  	if err != nil {
   187  		return 0, err
   188  	}
   189  
   190  	m := usageRegex.FindSubmatch(b)
   191  	if len(m) <= 1 {
   192  		return 0, errors.New("could not parse cpu stats")
   193  	}
   194  
   195  	i, err := strconv.ParseInt(string(m[1]), 10, 64)
   196  	if err != nil {
   197  		return 0, err
   198  	}
   199  
   200  	// Caller expects time in ns
   201  	return i * 1000, nil
   202  }
   203  
   204  func (cg *cpuInfoGetterV2) numCPU() (float64, error) {
   205  	b, err := os.ReadFile(numCPUPathV2)
   206  	if err != nil {
   207  		if errors.Is(err, os.ErrNotExist) {
   208  			// File may not exist in case of no quota
   209  			return float64(runtime.NumCPU()), nil
   210  		}
   211  		return 0, err
   212  	}
   213  
   214  	s := strings.TrimSuffix(string(b), "\n")
   215  
   216  	m := strings.Split(s, " ")
   217  	if len(m) <= 1 {
   218  		return 0, errors.New("could not parse cpu stats")
   219  	}
   220  
   221  	if m[0] == "max" {
   222  		// No quota
   223  		return float64(runtime.NumCPU()), nil
   224  	}
   225  
   226  	quota, err := strconv.ParseInt(string(m[0]), 10, 64)
   227  	if err != nil {
   228  		return 0, err
   229  	}
   230  
   231  	period, err := strconv.ParseInt(string(m[1]), 10, 64)
   232  	if err != nil {
   233  		return 0, err
   234  	}
   235  
   236  	return float64(quota) / float64(period), nil
   237  }
   238  
   239  func fileExists(path string) (bool, error) {
   240  	_, err := os.Lstat(path)
   241  	switch {
   242  	case err == nil:
   243  		return true, nil
   244  	case errors.Is(err, os.ErrNotExist):
   245  		return false, nil
   246  	default:
   247  		return false, err
   248  	}
   249  }
   250  
   251  func readIntFromFile(filename string) (int, error) {
   252  	b, err := os.ReadFile(filename)
   253  	if err != nil {
   254  		return 0, err
   255  	}
   256  
   257  	// Remove trailing new line if any
   258  	s := strings.TrimSuffix(string(b), "\n")
   259  
   260  	// Remove trailing space if any
   261  	s = strings.TrimSuffix(s, " ")
   262  
   263  	return strconv.Atoi(s)
   264  }
   265  
   266  func getHostCPUCount(fs procfs.FS) (float64, error) {
   267  	cpuInfo, err := fs.CPUInfo()
   268  	if err != nil {
   269  		return 0, err
   270  	}
   271  	return float64(len(cpuInfo)), nil
   272  }