github.com/hernad/nomad@v1.6.112/drivers/nix/_executor/pid_collector.go (about)

     1  package executor
     2  
     3  import (
     4  	"os"
     5  	"strconv"
     6  	"sync"
     7  	"time"
     8  
     9  	hclog "github.com/hashicorp/go-hclog"
    10  	"github.com/hernad/nomad/client/lib/resources"
    11  	"github.com/hernad/nomad/client/stats"
    12  	"github.com/hernad/nomad/plugins/drivers"
    13  	ps "github.com/mitchellh/go-ps"
    14  	"github.com/shirou/gopsutil/v3/process"
    15  )
    16  
    17  var (
    18  	// pidScanInterval is the interval at which the executor scans the process
    19  	// tree for finding out the pids that the executor and it's child processes
    20  	// have forked
    21  	pidScanInterval = 5 * time.Second
    22  )
    23  
    24  // pidCollector is a utility that can be embedded in an executor to collect pid
    25  // stats
    26  type pidCollector struct {
    27  	pids    map[int]*resources.PID
    28  	pidLock sync.RWMutex
    29  	logger  hclog.Logger
    30  }
    31  
    32  // allPidGetter is a func which is used by the pid collector to gather
    33  // stats on
    34  type allPidGetter func() (resources.PIDs, error)
    35  
    36  func newPidCollector(logger hclog.Logger) *pidCollector {
    37  	return &pidCollector{
    38  		pids:   make(map[int]*resources.PID),
    39  		logger: logger.Named("pid_collector"),
    40  	}
    41  }
    42  
    43  // collectPids collects the pids of the child processes that the executor is
    44  // running every 5 seconds
    45  func (c *pidCollector) collectPids(stopCh chan interface{}, pidGetter allPidGetter) {
    46  	// Fire the timer right away when the executor starts from there on the pids
    47  	// are collected every scan interval
    48  	timer := time.NewTimer(0)
    49  	defer timer.Stop()
    50  	for {
    51  		select {
    52  		case <-timer.C:
    53  			pids, err := pidGetter()
    54  			if err != nil {
    55  				c.logger.Debug("error collecting pids", "error", err)
    56  			}
    57  			c.pidLock.Lock()
    58  
    59  			// Adding pids which are not being tracked
    60  			for pid, np := range pids {
    61  				if _, ok := c.pids[pid]; !ok {
    62  					c.pids[pid] = np
    63  				}
    64  			}
    65  			// Removing pids which are no longer present
    66  			for pid := range c.pids {
    67  				if _, ok := pids[pid]; !ok {
    68  					delete(c.pids, pid)
    69  				}
    70  			}
    71  			c.pidLock.Unlock()
    72  			timer.Reset(pidScanInterval)
    73  		case <-stopCh:
    74  			return
    75  		}
    76  	}
    77  }
    78  
    79  // scanPids scans all the pids on the machine running the current executor and
    80  // returns the child processes of the executor.
    81  func scanPids(parentPid int, allPids []ps.Process) (map[int]*resources.PID, error) {
    82  	processFamily := make(map[int]struct{})
    83  	processFamily[parentPid] = struct{}{}
    84  
    85  	// A mapping of pids to their parent pids. It is used to build the process
    86  	// tree of the executing task
    87  	pidsRemaining := make(map[int]int, len(allPids))
    88  	for _, pid := range allPids {
    89  		pidsRemaining[pid.Pid()] = pid.PPid()
    90  	}
    91  
    92  	for {
    93  		// flag to indicate if we have found a match
    94  		foundNewPid := false
    95  
    96  		for pid, ppid := range pidsRemaining {
    97  			_, childPid := processFamily[ppid]
    98  
    99  			// checking if the pid is a child of any of the parents
   100  			if childPid {
   101  				processFamily[pid] = struct{}{}
   102  				delete(pidsRemaining, pid)
   103  				foundNewPid = true
   104  			}
   105  		}
   106  
   107  		// not scanning anymore if we couldn't find a single match
   108  		if !foundNewPid {
   109  			break
   110  		}
   111  	}
   112  
   113  	res := make(map[int]*resources.PID)
   114  	for pid := range processFamily {
   115  		res[pid] = &resources.PID{
   116  			PID:           pid,
   117  			StatsTotalCPU: stats.NewCpuStats(),
   118  			StatsUserCPU:  stats.NewCpuStats(),
   119  			StatsSysCPU:   stats.NewCpuStats(),
   120  		}
   121  	}
   122  	return res, nil
   123  }
   124  
   125  // pidStats returns the resource usage stats per pid
   126  func (c *pidCollector) pidStats() (map[string]*drivers.ResourceUsage, error) {
   127  	stats := make(map[string]*drivers.ResourceUsage)
   128  	c.pidLock.RLock()
   129  	pids := make(map[int]*resources.PID, len(c.pids))
   130  	for k, v := range c.pids {
   131  		pids[k] = v
   132  	}
   133  	c.pidLock.RUnlock()
   134  	for pid, np := range pids {
   135  		p, err := process.NewProcess(int32(pid))
   136  		if err != nil {
   137  			c.logger.Trace("unable to create new process", "pid", pid, "error", err)
   138  			continue
   139  		}
   140  		ms := &drivers.MemoryStats{}
   141  		if memInfo, err := p.MemoryInfo(); err == nil {
   142  			ms.RSS = memInfo.RSS
   143  			ms.Swap = memInfo.Swap
   144  			ms.Measured = ExecutorBasicMeasuredMemStats
   145  		}
   146  
   147  		cs := &drivers.CpuStats{}
   148  		if cpuStats, err := p.Times(); err == nil {
   149  			cs.SystemMode = np.StatsSysCPU.Percent(cpuStats.System * float64(time.Second))
   150  			cs.UserMode = np.StatsUserCPU.Percent(cpuStats.User * float64(time.Second))
   151  			cs.Measured = ExecutorBasicMeasuredCpuStats
   152  
   153  			// calculate cpu usage percent
   154  			cs.Percent = np.StatsTotalCPU.Percent(cpuStats.Total() * float64(time.Second))
   155  		}
   156  		stats[strconv.Itoa(pid)] = &drivers.ResourceUsage{MemoryStats: ms, CpuStats: cs}
   157  	}
   158  
   159  	return stats, nil
   160  }
   161  
   162  // aggregatedResourceUsage aggregates the resource usage of all the pids and
   163  // returns a TaskResourceUsage data point
   164  func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string]*drivers.ResourceUsage) *drivers.TaskResourceUsage {
   165  	ts := time.Now().UTC().UnixNano()
   166  	var (
   167  		systemModeCPU, userModeCPU, percent float64
   168  		totalRSS, totalSwap                 uint64
   169  	)
   170  
   171  	for _, pidStat := range pidStats {
   172  		systemModeCPU += pidStat.CpuStats.SystemMode
   173  		userModeCPU += pidStat.CpuStats.UserMode
   174  		percent += pidStat.CpuStats.Percent
   175  
   176  		totalRSS += pidStat.MemoryStats.RSS
   177  		totalSwap += pidStat.MemoryStats.Swap
   178  	}
   179  
   180  	totalCPU := &drivers.CpuStats{
   181  		SystemMode: systemModeCPU,
   182  		UserMode:   userModeCPU,
   183  		Percent:    percent,
   184  		Measured:   ExecutorBasicMeasuredCpuStats,
   185  		TotalTicks: systemCpuStats.TicksConsumed(percent),
   186  	}
   187  
   188  	totalMemory := &drivers.MemoryStats{
   189  		RSS:      totalRSS,
   190  		Swap:     totalSwap,
   191  		Measured: ExecutorBasicMeasuredMemStats,
   192  	}
   193  
   194  	resourceUsage := drivers.ResourceUsage{
   195  		MemoryStats: totalMemory,
   196  		CpuStats:    totalCPU,
   197  	}
   198  	return &drivers.TaskResourceUsage{
   199  		ResourceUsage: &resourceUsage,
   200  		Timestamp:     ts,
   201  		Pids:          pidStats,
   202  	}
   203  }
   204  
   205  func getAllPidsByScanning() (resources.PIDs, error) {
   206  	allProcesses, err := ps.Processes()
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  	return scanPids(os.Getpid(), allProcesses)
   211  }