github.com/hernad/nomad@v1.6.112/drivers/nix/_executor/pid_collector.go (about) 1 package executor 2 3 import ( 4 "os" 5 "strconv" 6 "sync" 7 "time" 8 9 hclog "github.com/hashicorp/go-hclog" 10 "github.com/hernad/nomad/client/lib/resources" 11 "github.com/hernad/nomad/client/stats" 12 "github.com/hernad/nomad/plugins/drivers" 13 ps "github.com/mitchellh/go-ps" 14 "github.com/shirou/gopsutil/v3/process" 15 ) 16 17 var ( 18 // pidScanInterval is the interval at which the executor scans the process 19 // tree for finding out the pids that the executor and it's child processes 20 // have forked 21 pidScanInterval = 5 * time.Second 22 ) 23 24 // pidCollector is a utility that can be embedded in an executor to collect pid 25 // stats 26 type pidCollector struct { 27 pids map[int]*resources.PID 28 pidLock sync.RWMutex 29 logger hclog.Logger 30 } 31 32 // allPidGetter is a func which is used by the pid collector to gather 33 // stats on 34 type allPidGetter func() (resources.PIDs, error) 35 36 func newPidCollector(logger hclog.Logger) *pidCollector { 37 return &pidCollector{ 38 pids: make(map[int]*resources.PID), 39 logger: logger.Named("pid_collector"), 40 } 41 } 42 43 // collectPids collects the pids of the child processes that the executor is 44 // running every 5 seconds 45 func (c *pidCollector) collectPids(stopCh chan interface{}, pidGetter allPidGetter) { 46 // Fire the timer right away when the executor starts from there on the pids 47 // are collected every scan interval 48 timer := time.NewTimer(0) 49 defer timer.Stop() 50 for { 51 select { 52 case <-timer.C: 53 pids, err := pidGetter() 54 if err != nil { 55 c.logger.Debug("error collecting pids", "error", err) 56 } 57 c.pidLock.Lock() 58 59 // Adding pids which are not being tracked 60 for pid, np := range pids { 61 if _, ok := c.pids[pid]; !ok { 62 c.pids[pid] = np 63 } 64 } 65 // Removing pids which are no longer present 66 for pid := range c.pids { 67 if _, ok := pids[pid]; !ok { 68 delete(c.pids, pid) 69 } 70 } 71 c.pidLock.Unlock() 72 timer.Reset(pidScanInterval) 73 case <-stopCh: 74 return 75 } 76 } 77 } 78 79 // scanPids scans all the pids on the machine running the current executor and 80 // returns the child processes of the executor. 81 func scanPids(parentPid int, allPids []ps.Process) (map[int]*resources.PID, error) { 82 processFamily := make(map[int]struct{}) 83 processFamily[parentPid] = struct{}{} 84 85 // A mapping of pids to their parent pids. It is used to build the process 86 // tree of the executing task 87 pidsRemaining := make(map[int]int, len(allPids)) 88 for _, pid := range allPids { 89 pidsRemaining[pid.Pid()] = pid.PPid() 90 } 91 92 for { 93 // flag to indicate if we have found a match 94 foundNewPid := false 95 96 for pid, ppid := range pidsRemaining { 97 _, childPid := processFamily[ppid] 98 99 // checking if the pid is a child of any of the parents 100 if childPid { 101 processFamily[pid] = struct{}{} 102 delete(pidsRemaining, pid) 103 foundNewPid = true 104 } 105 } 106 107 // not scanning anymore if we couldn't find a single match 108 if !foundNewPid { 109 break 110 } 111 } 112 113 res := make(map[int]*resources.PID) 114 for pid := range processFamily { 115 res[pid] = &resources.PID{ 116 PID: pid, 117 StatsTotalCPU: stats.NewCpuStats(), 118 StatsUserCPU: stats.NewCpuStats(), 119 StatsSysCPU: stats.NewCpuStats(), 120 } 121 } 122 return res, nil 123 } 124 125 // pidStats returns the resource usage stats per pid 126 func (c *pidCollector) pidStats() (map[string]*drivers.ResourceUsage, error) { 127 stats := make(map[string]*drivers.ResourceUsage) 128 c.pidLock.RLock() 129 pids := make(map[int]*resources.PID, len(c.pids)) 130 for k, v := range c.pids { 131 pids[k] = v 132 } 133 c.pidLock.RUnlock() 134 for pid, np := range pids { 135 p, err := process.NewProcess(int32(pid)) 136 if err != nil { 137 c.logger.Trace("unable to create new process", "pid", pid, "error", err) 138 continue 139 } 140 ms := &drivers.MemoryStats{} 141 if memInfo, err := p.MemoryInfo(); err == nil { 142 ms.RSS = memInfo.RSS 143 ms.Swap = memInfo.Swap 144 ms.Measured = ExecutorBasicMeasuredMemStats 145 } 146 147 cs := &drivers.CpuStats{} 148 if cpuStats, err := p.Times(); err == nil { 149 cs.SystemMode = np.StatsSysCPU.Percent(cpuStats.System * float64(time.Second)) 150 cs.UserMode = np.StatsUserCPU.Percent(cpuStats.User * float64(time.Second)) 151 cs.Measured = ExecutorBasicMeasuredCpuStats 152 153 // calculate cpu usage percent 154 cs.Percent = np.StatsTotalCPU.Percent(cpuStats.Total() * float64(time.Second)) 155 } 156 stats[strconv.Itoa(pid)] = &drivers.ResourceUsage{MemoryStats: ms, CpuStats: cs} 157 } 158 159 return stats, nil 160 } 161 162 // aggregatedResourceUsage aggregates the resource usage of all the pids and 163 // returns a TaskResourceUsage data point 164 func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string]*drivers.ResourceUsage) *drivers.TaskResourceUsage { 165 ts := time.Now().UTC().UnixNano() 166 var ( 167 systemModeCPU, userModeCPU, percent float64 168 totalRSS, totalSwap uint64 169 ) 170 171 for _, pidStat := range pidStats { 172 systemModeCPU += pidStat.CpuStats.SystemMode 173 userModeCPU += pidStat.CpuStats.UserMode 174 percent += pidStat.CpuStats.Percent 175 176 totalRSS += pidStat.MemoryStats.RSS 177 totalSwap += pidStat.MemoryStats.Swap 178 } 179 180 totalCPU := &drivers.CpuStats{ 181 SystemMode: systemModeCPU, 182 UserMode: userModeCPU, 183 Percent: percent, 184 Measured: ExecutorBasicMeasuredCpuStats, 185 TotalTicks: systemCpuStats.TicksConsumed(percent), 186 } 187 188 totalMemory := &drivers.MemoryStats{ 189 RSS: totalRSS, 190 Swap: totalSwap, 191 Measured: ExecutorBasicMeasuredMemStats, 192 } 193 194 resourceUsage := drivers.ResourceUsage{ 195 MemoryStats: totalMemory, 196 CpuStats: totalCPU, 197 } 198 return &drivers.TaskResourceUsage{ 199 ResourceUsage: &resourceUsage, 200 Timestamp: ts, 201 Pids: pidStats, 202 } 203 } 204 205 func getAllPidsByScanning() (resources.PIDs, error) { 206 allProcesses, err := ps.Processes() 207 if err != nil { 208 return nil, err 209 } 210 return scanPids(os.Getpid(), allProcesses) 211 }