github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/drivers/shared/executor/pid_collector.go (about)

     1  package executor
     2  
     3  import (
     4  	"os"
     5  	"strconv"
     6  	"sync"
     7  	"time"
     8  
     9  	hclog "github.com/hashicorp/go-hclog"
    10  	"github.com/hashicorp/nomad/client/stats"
    11  	"github.com/hashicorp/nomad/plugins/drivers"
    12  	ps "github.com/mitchellh/go-ps"
    13  	"github.com/shirou/gopsutil/process"
    14  )
    15  
    16  var (
    17  	// pidScanInterval is the interval at which the executor scans the process
    18  	// tree for finding out the pids that the executor and it's child processes
    19  	// have forked
    20  	pidScanInterval = 5 * time.Second
    21  )
    22  
    23  // pidCollector is a utility that can be embedded in an executor to collect pid
    24  // stats
    25  type pidCollector struct {
    26  	pids    map[int]*nomadPid
    27  	pidLock sync.RWMutex
    28  	logger  hclog.Logger
    29  }
    30  
    31  // nomadPid holds a pid and it's cpu percentage calculator
    32  type nomadPid struct {
    33  	pid           int
    34  	cpuStatsTotal *stats.CpuStats
    35  	cpuStatsUser  *stats.CpuStats
    36  	cpuStatsSys   *stats.CpuStats
    37  }
    38  
    39  // allPidGetter is a func which is used by the pid collector to gather
    40  // stats on
    41  type allPidGetter func() (map[int]*nomadPid, error)
    42  
    43  func newPidCollector(logger hclog.Logger) *pidCollector {
    44  	return &pidCollector{
    45  		pids:   make(map[int]*nomadPid),
    46  		logger: logger.Named("pid_collector"),
    47  	}
    48  }
    49  
    50  // collectPids collects the pids of the child processes that the executor is
    51  // running every 5 seconds
    52  func (c *pidCollector) collectPids(stopCh chan interface{}, pidGetter allPidGetter) {
    53  	// Fire the timer right away when the executor starts from there on the pids
    54  	// are collected every scan interval
    55  	timer := time.NewTimer(0)
    56  	defer timer.Stop()
    57  	for {
    58  		select {
    59  		case <-timer.C:
    60  			pids, err := pidGetter()
    61  			if err != nil {
    62  				c.logger.Debug("error collecting pids", "error", err)
    63  			}
    64  			c.pidLock.Lock()
    65  
    66  			// Adding pids which are not being tracked
    67  			for pid, np := range pids {
    68  				if _, ok := c.pids[pid]; !ok {
    69  					c.pids[pid] = np
    70  				}
    71  			}
    72  			// Removing pids which are no longer present
    73  			for pid := range c.pids {
    74  				if _, ok := pids[pid]; !ok {
    75  					delete(c.pids, pid)
    76  				}
    77  			}
    78  			c.pidLock.Unlock()
    79  			timer.Reset(pidScanInterval)
    80  		case <-stopCh:
    81  			return
    82  		}
    83  	}
    84  }
    85  
    86  // scanPids scans all the pids on the machine running the current executor and
    87  // returns the child processes of the executor.
    88  func scanPids(parentPid int, allPids []ps.Process) (map[int]*nomadPid, error) {
    89  	processFamily := make(map[int]struct{})
    90  	processFamily[parentPid] = struct{}{}
    91  
    92  	// A mapping of pids to their parent pids. It is used to build the process
    93  	// tree of the executing task
    94  	pidsRemaining := make(map[int]int, len(allPids))
    95  	for _, pid := range allPids {
    96  		pidsRemaining[pid.Pid()] = pid.PPid()
    97  	}
    98  
    99  	for {
   100  		// flag to indicate if we have found a match
   101  		foundNewPid := false
   102  
   103  		for pid, ppid := range pidsRemaining {
   104  			_, childPid := processFamily[ppid]
   105  
   106  			// checking if the pid is a child of any of the parents
   107  			if childPid {
   108  				processFamily[pid] = struct{}{}
   109  				delete(pidsRemaining, pid)
   110  				foundNewPid = true
   111  			}
   112  		}
   113  
   114  		// not scanning anymore if we couldn't find a single match
   115  		if !foundNewPid {
   116  			break
   117  		}
   118  	}
   119  
   120  	res := make(map[int]*nomadPid)
   121  	for pid := range processFamily {
   122  		np := nomadPid{
   123  			pid:           pid,
   124  			cpuStatsTotal: stats.NewCpuStats(),
   125  			cpuStatsUser:  stats.NewCpuStats(),
   126  			cpuStatsSys:   stats.NewCpuStats(),
   127  		}
   128  		res[pid] = &np
   129  	}
   130  	return res, nil
   131  }
   132  
   133  // pidStats returns the resource usage stats per pid
   134  func (c *pidCollector) pidStats() (map[string]*drivers.ResourceUsage, error) {
   135  	stats := make(map[string]*drivers.ResourceUsage)
   136  	c.pidLock.RLock()
   137  	pids := make(map[int]*nomadPid, len(c.pids))
   138  	for k, v := range c.pids {
   139  		pids[k] = v
   140  	}
   141  	c.pidLock.RUnlock()
   142  	for pid, np := range pids {
   143  		p, err := process.NewProcess(int32(pid))
   144  		if err != nil {
   145  			c.logger.Trace("unable to create new process", "pid", pid, "error", err)
   146  			continue
   147  		}
   148  		ms := &drivers.MemoryStats{}
   149  		if memInfo, err := p.MemoryInfo(); err == nil {
   150  			ms.RSS = memInfo.RSS
   151  			ms.Swap = memInfo.Swap
   152  			ms.Measured = ExecutorBasicMeasuredMemStats
   153  		}
   154  
   155  		cs := &drivers.CpuStats{}
   156  		if cpuStats, err := p.Times(); err == nil {
   157  			cs.SystemMode = np.cpuStatsSys.Percent(cpuStats.System * float64(time.Second))
   158  			cs.UserMode = np.cpuStatsUser.Percent(cpuStats.User * float64(time.Second))
   159  			cs.Measured = ExecutorBasicMeasuredCpuStats
   160  
   161  			// calculate cpu usage percent
   162  			cs.Percent = np.cpuStatsTotal.Percent(cpuStats.Total() * float64(time.Second))
   163  		}
   164  		stats[strconv.Itoa(pid)] = &drivers.ResourceUsage{MemoryStats: ms, CpuStats: cs}
   165  	}
   166  
   167  	return stats, nil
   168  }
   169  
   170  // aggregatedResourceUsage aggregates the resource usage of all the pids and
   171  // returns a TaskResourceUsage data point
   172  func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string]*drivers.ResourceUsage) *drivers.TaskResourceUsage {
   173  	ts := time.Now().UTC().UnixNano()
   174  	var (
   175  		systemModeCPU, userModeCPU, percent float64
   176  		totalRSS, totalSwap                 uint64
   177  	)
   178  
   179  	for _, pidStat := range pidStats {
   180  		systemModeCPU += pidStat.CpuStats.SystemMode
   181  		userModeCPU += pidStat.CpuStats.UserMode
   182  		percent += pidStat.CpuStats.Percent
   183  
   184  		totalRSS += pidStat.MemoryStats.RSS
   185  		totalSwap += pidStat.MemoryStats.Swap
   186  	}
   187  
   188  	totalCPU := &drivers.CpuStats{
   189  		SystemMode: systemModeCPU,
   190  		UserMode:   userModeCPU,
   191  		Percent:    percent,
   192  		Measured:   ExecutorBasicMeasuredCpuStats,
   193  		TotalTicks: systemCpuStats.TicksConsumed(percent),
   194  	}
   195  
   196  	totalMemory := &drivers.MemoryStats{
   197  		RSS:      totalRSS,
   198  		Swap:     totalSwap,
   199  		Measured: ExecutorBasicMeasuredMemStats,
   200  	}
   201  
   202  	resourceUsage := drivers.ResourceUsage{
   203  		MemoryStats: totalMemory,
   204  		CpuStats:    totalCPU,
   205  	}
   206  	return &drivers.TaskResourceUsage{
   207  		ResourceUsage: &resourceUsage,
   208  		Timestamp:     ts,
   209  		Pids:          pidStats,
   210  	}
   211  }
   212  
   213  func getAllPidsByScanning() (map[int]*nomadPid, error) {
   214  	allProcesses, err := ps.Processes()
   215  	if err != nil {
   216  		return nil, err
   217  	}
   218  	return scanPids(os.Getpid(), allProcesses)
   219  }