github.com/MetalBlockchain/metalgo@v1.11.9/utils/resource/usage.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package resource
     5  
     6  import (
     7  	"math"
     8  	"strconv"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"github.com/shirou/gopsutil/cpu"
    14  	"github.com/shirou/gopsutil/process"
    15  	"go.uber.org/zap"
    16  
    17  	"github.com/MetalBlockchain/metalgo/utils/logging"
    18  	"github.com/MetalBlockchain/metalgo/utils/storage"
    19  )
    20  
    21  var (
    22  	lnHalf = math.Log(.5)
    23  
    24  	_ Manager = (*manager)(nil)
    25  )
    26  
    27  type CPUUser interface {
    28  	// CPUUsage returns the number of CPU cores of usage this user has attributed
    29  	// to it.
    30  	//
    31  	// For example, if this user is reporting a process's CPU utilization and
    32  	// that process is currently using 150% CPU (i.e. one and a half cores of
    33  	// compute) then the return value will be 1.5.
    34  	CPUUsage() float64
    35  }
    36  
    37  type DiskUser interface {
    38  	// DiskUsage returns the number of bytes per second read from/written to
    39  	// disk recently.
    40  	DiskUsage() (read float64, write float64)
    41  
    42  	// returns number of bytes available in the db volume
    43  	AvailableDiskBytes() uint64
    44  }
    45  
    46  type User interface {
    47  	CPUUser
    48  	DiskUser
    49  }
    50  
    51  type ProcessTracker interface {
    52  	// TrackProcess adds [pid] to the list of processes that this tracker is
    53  	// currently managing. Duplicate requests are dropped.
    54  	TrackProcess(pid int)
    55  
    56  	// UntrackProcess removes [pid] from the list of processes that this tracker
    57  	// is currently managing. Untracking a currently untracked [pid] is a noop.
    58  	UntrackProcess(pid int)
    59  }
    60  
    61  type Manager interface {
    62  	User
    63  	ProcessTracker
    64  
    65  	// Shutdown allocated resources and stop tracking all processes.
    66  	Shutdown()
    67  }
    68  
    69  type manager struct {
    70  	log            logging.Logger
    71  	processMetrics *metrics
    72  
    73  	processesLock sync.Mutex
    74  	processes     map[int]*proc
    75  
    76  	usageLock sync.RWMutex
    77  	cpuUsage  float64
    78  	// [readUsage] is the number of bytes/second read from disk recently.
    79  	readUsage float64
    80  	// [writeUsage] is the number of bytes/second written to disk recently.
    81  	writeUsage float64
    82  
    83  	availableDiskBytes uint64
    84  
    85  	closeOnce sync.Once
    86  	onClose   chan struct{}
    87  }
    88  
    89  func NewManager(
    90  	log logging.Logger,
    91  	diskPath string,
    92  	frequency,
    93  	cpuHalflife,
    94  	diskHalflife time.Duration,
    95  	metricsRegisterer prometheus.Registerer,
    96  ) (Manager, error) {
    97  	processMetrics, err := newMetrics(metricsRegisterer)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	m := &manager{
   103  		log:                log,
   104  		processMetrics:     processMetrics,
   105  		processes:          make(map[int]*proc),
   106  		onClose:            make(chan struct{}),
   107  		availableDiskBytes: math.MaxUint64,
   108  	}
   109  
   110  	go m.update(diskPath, frequency, cpuHalflife, diskHalflife)
   111  	return m, nil
   112  }
   113  
   114  func (m *manager) CPUUsage() float64 {
   115  	m.usageLock.RLock()
   116  	defer m.usageLock.RUnlock()
   117  
   118  	return m.cpuUsage
   119  }
   120  
   121  func (m *manager) DiskUsage() (float64, float64) {
   122  	m.usageLock.RLock()
   123  	defer m.usageLock.RUnlock()
   124  
   125  	return m.readUsage, m.writeUsage
   126  }
   127  
   128  func (m *manager) AvailableDiskBytes() uint64 {
   129  	m.usageLock.RLock()
   130  	defer m.usageLock.RUnlock()
   131  
   132  	return m.availableDiskBytes
   133  }
   134  
   135  func (m *manager) TrackProcess(pid int) {
   136  	p, err := process.NewProcess(int32(pid))
   137  	if err != nil {
   138  		return
   139  	}
   140  
   141  	process := &proc{
   142  		p:   p,
   143  		log: m.log,
   144  	}
   145  
   146  	m.processesLock.Lock()
   147  	m.processes[pid] = process
   148  	m.processesLock.Unlock()
   149  }
   150  
   151  func (m *manager) UntrackProcess(pid int) {
   152  	m.processesLock.Lock()
   153  	delete(m.processes, pid)
   154  	m.processesLock.Unlock()
   155  }
   156  
   157  func (m *manager) Shutdown() {
   158  	m.closeOnce.Do(func() {
   159  		close(m.onClose)
   160  	})
   161  }
   162  
   163  func (m *manager) update(diskPath string, frequency, cpuHalflife, diskHalflife time.Duration) {
   164  	ticker := time.NewTicker(frequency)
   165  	defer ticker.Stop()
   166  
   167  	newCPUWeight, oldCPUWeight := getSampleWeights(frequency, cpuHalflife)
   168  	newDiskWeight, oldDiskWeight := getSampleWeights(frequency, diskHalflife)
   169  
   170  	frequencyInSeconds := frequency.Seconds()
   171  	for {
   172  		currentCPUUsage, currentReadUsage, currentWriteUsage := m.getActiveUsage(frequencyInSeconds)
   173  		currentScaledCPUUsage := newCPUWeight * currentCPUUsage
   174  		currentScaledReadUsage := newDiskWeight * currentReadUsage
   175  		currentScaledWriteUsage := newDiskWeight * currentWriteUsage
   176  
   177  		availableBytes, getBytesErr := storage.AvailableBytes(diskPath)
   178  		if getBytesErr != nil {
   179  			m.log.Verbo("failed to lookup resource",
   180  				zap.String("resource", "system disk"),
   181  				zap.String("path", diskPath),
   182  				zap.Error(getBytesErr),
   183  			)
   184  		}
   185  
   186  		m.usageLock.Lock()
   187  		m.cpuUsage = oldCPUWeight*m.cpuUsage + currentScaledCPUUsage
   188  		m.readUsage = oldDiskWeight*m.readUsage + currentScaledReadUsage
   189  		m.writeUsage = oldDiskWeight*m.writeUsage + currentScaledWriteUsage
   190  
   191  		if getBytesErr == nil {
   192  			m.availableDiskBytes = availableBytes
   193  		}
   194  
   195  		m.usageLock.Unlock()
   196  
   197  		select {
   198  		case <-ticker.C:
   199  		case <-m.onClose:
   200  			return
   201  		}
   202  	}
   203  }
   204  
   205  // Returns:
   206  // 1. Current CPU usage by all processes.
   207  // 2. Current bytes/sec read from disk by all processes.
   208  // 3. Current bytes/sec written to disk by all processes.
   209  func (m *manager) getActiveUsage(secondsSinceLastUpdate float64) (float64, float64, float64) {
   210  	m.processesLock.Lock()
   211  	defer m.processesLock.Unlock()
   212  
   213  	var (
   214  		totalCPU   float64
   215  		totalRead  float64
   216  		totalWrite float64
   217  	)
   218  	for _, p := range m.processes {
   219  		cpu, read, write := p.getActiveUsage(secondsSinceLastUpdate)
   220  		totalCPU += cpu
   221  		totalRead += read
   222  		totalWrite += write
   223  
   224  		processIDStr := strconv.Itoa(int(p.p.Pid))
   225  		m.processMetrics.numCPUCycles.WithLabelValues(processIDStr).Set(p.lastTotalCPU)
   226  		m.processMetrics.numDiskReads.WithLabelValues(processIDStr).Set(float64(p.numReads))
   227  		m.processMetrics.numDiskReadBytes.WithLabelValues(processIDStr).Set(float64(p.lastReadBytes))
   228  		m.processMetrics.numDiskWrites.WithLabelValues(processIDStr).Set(float64(p.numWrites))
   229  		m.processMetrics.numDiskWritesBytes.WithLabelValues(processIDStr).Set(float64(p.lastWriteBytes))
   230  	}
   231  
   232  	return totalCPU, totalRead, totalWrite
   233  }
   234  
   235  type proc struct {
   236  	p   *process.Process
   237  	log logging.Logger
   238  
   239  	initialized bool
   240  
   241  	// [lastTotalCPU] is the most recent measurement of total CPU usage.
   242  	lastTotalCPU float64
   243  
   244  	// [numReads] is the total number of disk reads performed.
   245  	numReads uint64
   246  	// [lastReadBytes] is the most recent measurement of total disk bytes read.
   247  	lastReadBytes uint64
   248  
   249  	// [numWrites] is the total number of disk writes performed.
   250  	numWrites uint64
   251  	// [lastWriteBytes] is the most recent measurement of total disk bytes
   252  	// written.
   253  	lastWriteBytes uint64
   254  }
   255  
   256  func (p *proc) getActiveUsage(secondsSinceLastUpdate float64) (float64, float64, float64) {
   257  	// If there is an error tracking the CPU/disk utilization of a process,
   258  	// assume that the utilization is 0.
   259  	times, err := p.p.Times()
   260  	if err != nil {
   261  		p.log.Verbo("failed to lookup resource",
   262  			zap.String("resource", "process CPU"),
   263  			zap.Int32("pid", p.p.Pid),
   264  			zap.Error(err),
   265  		)
   266  		times = &cpu.TimesStat{}
   267  	}
   268  
   269  	// Note: IOCounters is not implemented on macos and therefore always returns
   270  	// an error on macos.
   271  	io, err := p.p.IOCounters()
   272  	if err != nil {
   273  		p.log.Verbo("failed to lookup resource",
   274  			zap.String("resource", "process IO"),
   275  			zap.Int32("pid", p.p.Pid),
   276  			zap.Error(err),
   277  		)
   278  		io = &process.IOCountersStat{}
   279  	}
   280  
   281  	var (
   282  		cpu   float64
   283  		read  float64
   284  		write float64
   285  	)
   286  	totalCPU := times.Total()
   287  	if p.initialized {
   288  		if totalCPU > p.lastTotalCPU {
   289  			newCPU := totalCPU - p.lastTotalCPU
   290  			cpu = newCPU / secondsSinceLastUpdate
   291  		}
   292  		if io.ReadBytes > p.lastReadBytes {
   293  			newRead := io.ReadBytes - p.lastReadBytes
   294  			read = float64(newRead) / secondsSinceLastUpdate
   295  		}
   296  		if io.WriteBytes > p.lastWriteBytes {
   297  			newWrite := io.WriteBytes - p.lastWriteBytes
   298  			write = float64(newWrite) / secondsSinceLastUpdate
   299  		}
   300  	}
   301  
   302  	p.initialized = true
   303  	p.lastTotalCPU = totalCPU
   304  	p.numReads = io.ReadCount
   305  	p.lastReadBytes = io.ReadBytes
   306  	p.numWrites = io.WriteCount
   307  	p.lastWriteBytes = io.WriteBytes
   308  
   309  	return cpu, read, write
   310  }
   311  
   312  // getSampleWeights converts the frequency of CPU sampling and the halflife of
   313  // the CPU sample's usefulness into weights to scale the newly sampled point and
   314  // previously samples.
   315  func getSampleWeights(frequency, halflife time.Duration) (float64, float64) {
   316  	halflifeInSamples := float64(halflife) / float64(frequency)
   317  	oldWeight := math.Exp(lnHalf / halflifeInSamples)
   318  	newWeight := 1 - oldWeight
   319  	return newWeight, oldWeight
   320  }