github.com/hernad/nomad@v1.6.112/drivers/shared/executor/pid_collector.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package executor 5 6 import ( 7 "os" 8 "strconv" 9 "sync" 10 "time" 11 12 hclog "github.com/hashicorp/go-hclog" 13 "github.com/hernad/nomad/client/lib/resources" 14 "github.com/hernad/nomad/helper/stats" 15 "github.com/hernad/nomad/plugins/drivers" 16 ps "github.com/mitchellh/go-ps" 17 "github.com/shirou/gopsutil/v3/process" 18 ) 19 20 var ( 21 // pidScanInterval is the interval at which the executor scans the process 22 // tree for finding out the pids that the executor and it's child processes 23 // have forked 24 pidScanInterval = 5 * time.Second 25 ) 26 27 // pidCollector is a utility that can be embedded in an executor to collect pid 28 // stats 29 type pidCollector struct { 30 pids map[int]*resources.PID 31 pidLock sync.RWMutex 32 logger hclog.Logger 33 } 34 35 // allPidGetter is a func which is used by the pid collector to gather 36 // stats on 37 type allPidGetter func() (resources.PIDs, error) 38 39 func newPidCollector(logger hclog.Logger) *pidCollector { 40 return &pidCollector{ 41 pids: make(map[int]*resources.PID), 42 logger: logger.Named("pid_collector"), 43 } 44 } 45 46 // collectPids collects the pids of the child processes that the executor is 47 // running every 5 seconds 48 func (c *pidCollector) collectPids(stopCh chan interface{}, pidGetter allPidGetter) { 49 // Fire the timer right away when the executor starts from there on the pids 50 // are collected every scan interval 51 timer := time.NewTimer(0) 52 defer timer.Stop() 53 for { 54 select { 55 case <-timer.C: 56 pids, err := pidGetter() 57 if err != nil { 58 c.logger.Debug("error collecting pids", "error", err) 59 } 60 c.pidLock.Lock() 61 62 // Adding pids which are not being tracked 63 for pid, np := range pids { 64 if _, ok := c.pids[pid]; !ok { 65 c.pids[pid] = np 66 } 67 } 68 // Removing pids which are no longer present 69 for pid := range c.pids { 70 if _, ok := pids[pid]; !ok { 71 delete(c.pids, pid) 72 } 73 } 74 c.pidLock.Unlock() 75 timer.Reset(pidScanInterval) 76 case <-stopCh: 77 return 78 } 79 } 80 } 81 82 // scanPids scans all the pids on the machine running the current executor and 83 // returns the child processes of the executor. 84 func scanPids(parentPid int, allPids []ps.Process) (map[int]*resources.PID, error) { 85 processFamily := make(map[int]struct{}) 86 processFamily[parentPid] = struct{}{} 87 88 // A mapping of pids to their parent pids. It is used to build the process 89 // tree of the executing task 90 pidsRemaining := make(map[int]int, len(allPids)) 91 for _, pid := range allPids { 92 pidsRemaining[pid.Pid()] = pid.PPid() 93 } 94 95 for { 96 // flag to indicate if we have found a match 97 foundNewPid := false 98 99 for pid, ppid := range pidsRemaining { 100 _, childPid := processFamily[ppid] 101 102 // checking if the pid is a child of any of the parents 103 if childPid { 104 processFamily[pid] = struct{}{} 105 delete(pidsRemaining, pid) 106 foundNewPid = true 107 } 108 } 109 110 // not scanning anymore if we couldn't find a single match 111 if !foundNewPid { 112 break 113 } 114 } 115 116 res := make(map[int]*resources.PID) 117 for pid := range processFamily { 118 res[pid] = &resources.PID{ 119 PID: pid, 120 StatsTotalCPU: stats.NewCpuStats(), 121 StatsUserCPU: stats.NewCpuStats(), 122 StatsSysCPU: stats.NewCpuStats(), 123 } 124 } 125 return res, nil 126 } 127 128 // pidStats returns the resource usage stats per pid 129 func (c *pidCollector) pidStats() (map[string]*drivers.ResourceUsage, error) { 130 stats := make(map[string]*drivers.ResourceUsage) 131 c.pidLock.RLock() 132 pids := make(map[int]*resources.PID, len(c.pids)) 133 for k, v := range c.pids { 134 pids[k] = v 135 } 136 c.pidLock.RUnlock() 137 for pid, np := range pids { 138 p, err := process.NewProcess(int32(pid)) 139 if err != nil { 140 c.logger.Trace("unable to create new process", "pid", pid, "error", err) 141 continue 142 } 143 ms := &drivers.MemoryStats{} 144 if memInfo, err := p.MemoryInfo(); err == nil { 145 ms.RSS = memInfo.RSS 146 ms.Swap = memInfo.Swap 147 ms.Measured = ExecutorBasicMeasuredMemStats 148 } 149 150 cs := &drivers.CpuStats{} 151 if cpuStats, err := p.Times(); err == nil { 152 cs.SystemMode = np.StatsSysCPU.Percent(cpuStats.System * float64(time.Second)) 153 cs.UserMode = np.StatsUserCPU.Percent(cpuStats.User * float64(time.Second)) 154 cs.Measured = ExecutorBasicMeasuredCpuStats 155 156 // calculate cpu usage percent 157 cs.Percent = np.StatsTotalCPU.Percent(cpuStats.Total() * float64(time.Second)) 158 } 159 stats[strconv.Itoa(pid)] = &drivers.ResourceUsage{MemoryStats: ms, CpuStats: cs} 160 } 161 162 return stats, nil 163 } 164 165 // aggregatedResourceUsage aggregates the resource usage of all the pids and 166 // returns a TaskResourceUsage data point 167 func aggregatedResourceUsage(systemCpuStats *stats.CpuStats, pidStats map[string]*drivers.ResourceUsage) *drivers.TaskResourceUsage { 168 ts := time.Now().UTC().UnixNano() 169 var ( 170 systemModeCPU, userModeCPU, percent float64 171 totalRSS, totalSwap uint64 172 ) 173 174 for _, pidStat := range pidStats { 175 systemModeCPU += pidStat.CpuStats.SystemMode 176 userModeCPU += pidStat.CpuStats.UserMode 177 percent += pidStat.CpuStats.Percent 178 179 totalRSS += pidStat.MemoryStats.RSS 180 totalSwap += pidStat.MemoryStats.Swap 181 } 182 183 totalCPU := &drivers.CpuStats{ 184 SystemMode: systemModeCPU, 185 UserMode: userModeCPU, 186 Percent: percent, 187 Measured: ExecutorBasicMeasuredCpuStats, 188 TotalTicks: systemCpuStats.TicksConsumed(percent), 189 } 190 191 totalMemory := &drivers.MemoryStats{ 192 RSS: totalRSS, 193 Swap: totalSwap, 194 Measured: ExecutorBasicMeasuredMemStats, 195 } 196 197 resourceUsage := drivers.ResourceUsage{ 198 MemoryStats: totalMemory, 199 CpuStats: totalCPU, 200 } 201 return &drivers.TaskResourceUsage{ 202 ResourceUsage: &resourceUsage, 203 Timestamp: ts, 204 Pids: pidStats, 205 } 206 } 207 208 func getAllPidsByScanning() (resources.PIDs, error) { 209 allProcesses, err := ps.Processes() 210 if err != nil { 211 return nil, err 212 } 213 return scanPids(os.Getpid(), allProcesses) 214 }