github.com/hernad/nomad@v1.6.112/drivers/shared/executor/pid_collector.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package executor
     5  
     6  import (
     7  	"os"
     8  	"strconv"
     9  	"sync"
    10  	"time"
    11  
    12  	hclog "github.com/hashicorp/go-hclog"
    13  	"github.com/hernad/nomad/client/lib/resources"
    14  	"github.com/hernad/nomad/helper/stats"
    15  	"github.com/hernad/nomad/plugins/drivers"
    16  	ps "github.com/mitchellh/go-ps"
    17  	"github.com/shirou/gopsutil/v3/process"
    18  )
    19  
    20  var (
    21  	// pidScanInterval is the interval at which the executor scans the process
    22  	// tree for finding out the pids that the executor and it's child processes
    23  	// have forked
    24  	pidScanInterval = 5 * time.Second
    25  )
    26  
    27  // pidCollector is a utility that can be embedded in an executor to collect pid
    28  // stats
    29  type pidCollector struct {
    30  	pids    map[int]*resources.PID
    31  	pidLock sync.RWMutex
    32  	logger  hclog.Logger
    33  }
    34  
    35  // allPidGetter is a func which is used by the pid collector to gather
    36  // stats on
    37  type allPidGetter func() (resources.PIDs, error)
    38  
    39  func newPidCollector(logger hclog.Logger) *pidCollector {
    40  	return &pidCollector{
    41  		pids:   make(map[int]*resources.PID),
    42  		logger: logger.Named("pid_collector"),
    43  	}
    44  }
    45  
    46  // collectPids collects the pids of the child processes that the executor is
    47  // running every 5 seconds
    48  func (c *pidCollector) collectPids(stopCh chan interface{}, pidGetter allPidGetter) {
    49  	// Fire the timer right away when the executor starts from there on the pids
    50  	// are collected every scan interval
    51  	timer := time.NewTimer(0)
    52  	defer timer.Stop()
    53  	for {
    54  		select {
    55  		case <-timer.C:
    56  			pids, err := pidGetter()
    57  			if err != nil {
    58  				c.logger.Debug("error collecting pids", "error", err)
    59  			}
    60  			c.pidLock.Lock()
    61  
    62  			// Adding pids which are not being tracked
    63  			for pid, np := range pids {
    64  				if _, ok := c.pids[pid]; !ok {
    65  					c.pids[pid] = np
    66  				}
    67  			}
    68  			// Removing pids which are no longer present
    69  			for pid := range c.pids {
    70  				if _, ok := pids[pid]; !ok {
    71  					delete(c.pids, pid)
    72  				}
    73  			}
    74  			c.pidLock.Unlock()
    75  			timer.Reset(pidScanInterval)
    76  		case <-stopCh:
    77  			return
    78  		}
    79  	}
    80  }
    81  
    82  // scanPids scans all the pids on the machine running the current executor and
    83  // returns the child processes of the executor.
    84  func scanPids(parentPid int, allPids []ps.Process) (map[int]*resources.PID, error) {
    85  	processFamily := make(map[int]struct{})
    86  	processFamily[parentPid] = struct{}{}
    87  
    88  	// A mapping of pids to their parent pids. It is used to build the process
    89  	// tree of the executing task
    90  	pidsRemaining := make(map[int]int, len(allPids))
    91  	for _, pid := range allPids {
    92  		pidsRemaining[pid.Pid()] = pid.PPid()
    93  	}
    94  
    95  	for {
    96  		// flag to indicate if we have found a match
    97  		foundNewPid := false
    98  
    99  		for pid, ppid := range pidsRemaining {
   100  			_, childPid := processFamily[ppid]
   101  
   102  			// checking if the pid is a child of any of the parents
   103  			if childPid {
   104  				processFamily[pid] = struct{}{}
   105  				delete(pidsRemaining, pid)
   106  				foundNewPid = true
   107  			}
   108  		}
   109  
   110  		// not scanning anymore if we couldn't find a single match
   111  		if !foundNewPid {
   112  			break
   113  		}
   114  	}
   115  
   116  	res := make(map[int]*resources.PID)
   117  	for pid := range processFamily {
   118  		res[pid] = &resources.PID{
   119  			PID:           pid,
   120  			StatsTotalCPU: stats.NewCpuStats(),
   121  			StatsUserCPU:  stats.NewCpuStats(),
   122  			StatsSysCPU:   stats.NewCpuStats(),
   123  		}
   124  	}
   125  	return res, nil
   126  }
   127  
   128  // pidStats returns the resource usage stats per pid
   129  func (c *pidCollector) pidStats() (map[string]*drivers.ResourceUsage, error) {
   130  	stats := make(map[string]*drivers.ResourceUsage)
   131  	c.pidLock.RLock()
   132  	pids := make(map[int]*resources.PID, len(c.pids))
   133  	for k, v := range c.pids {
   134  		pids[k] = v
   135  	}
   136  	c.pidLock.RUnlock()
   137  	for pid, np := range pids {
   138  		p, err := process.NewProcess(int32(pid))
   139  		if err != nil {
   140  			c.logger.Trace("unable to create new process", "pid", pid, "error", err)
   141  			continue
   142  		}
   143  		ms := &drivers.MemoryStats{}
   144  		if memInfo, err := p.MemoryInfo(); err == nil {
   145  			ms.RSS = memInfo.RSS
   146  			ms.Swap = memInfo.Swap
   147  			ms.Measured = ExecutorBasicMeasuredMemStats
   148  		}
   149  
   150  		cs := &drivers.CpuStats{}
   151  		if cpuStats, err := p.Times(); err == nil {
   152  			cs.SystemMode = np.StatsSysCPU.Percent(cpuStats.System * float64(time.Second))
   153  			cs.UserMode = np.StatsUserCPU.Percent(cpuStats.User * float64(time.Second))
   154  			cs.Measured = ExecutorBasicMeasuredCpuStats
   155  
   156  			// calculate cpu usage percent
   157  			cs.Percent = np.StatsTotalCPU.Percent(cpuStats.Total() * float64(time.Second))
   158  		}
   159  		stats[strconv.Itoa(pid)] = &drivers.ResourceUsage{MemoryStats: ms, CpuStats: cs}
   160  	}
   161  
   162  	return stats, nil
   163  }
   164  
   165  // aggregatedResourceUsage aggregates the resource usage of all the pids and
   166  // returns a TaskResourceUsage data point
   167  func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string]*drivers.ResourceUsage) *drivers.TaskResourceUsage {
   168  	ts := time.Now().UTC().UnixNano()
   169  	var (
   170  		systemModeCPU, userModeCPU, percent float64
   171  		totalRSS, totalSwap                 uint64
   172  	)
   173  
   174  	for _, pidStat := range pidStats {
   175  		systemModeCPU += pidStat.CpuStats.SystemMode
   176  		userModeCPU += pidStat.CpuStats.UserMode
   177  		percent += pidStat.CpuStats.Percent
   178  
   179  		totalRSS += pidStat.MemoryStats.RSS
   180  		totalSwap += pidStat.MemoryStats.Swap
   181  	}
   182  
   183  	totalCPU := &drivers.CpuStats{
   184  		SystemMode: systemModeCPU,
   185  		UserMode:   userModeCPU,
   186  		Percent:    percent,
   187  		Measured:   ExecutorBasicMeasuredCpuStats,
   188  		TotalTicks: systemCpuStats.TicksConsumed(percent),
   189  	}
   190  
   191  	totalMemory := &drivers.MemoryStats{
   192  		RSS:      totalRSS,
   193  		Swap:     totalSwap,
   194  		Measured: ExecutorBasicMeasuredMemStats,
   195  	}
   196  
   197  	resourceUsage := drivers.ResourceUsage{
   198  		MemoryStats: totalMemory,
   199  		CpuStats:    totalCPU,
   200  	}
   201  	return &drivers.TaskResourceUsage{
   202  		ResourceUsage: &resourceUsage,
   203  		Timestamp:     ts,
   204  		Pids:          pidStats,
   205  	}
   206  }
   207  
   208  func getAllPidsByScanning() (resources.PIDs, error) {
   209  	allProcesses, err := ps.Processes()
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  	return scanPids(os.Getpid(), allProcesses)
   214  }