github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/stats/host.go (about) 1 package stats 2 3 import ( 4 "math" 5 "runtime" 6 "sync" 7 "time" 8 9 hclog "github.com/hashicorp/go-hclog" 10 "github.com/hashicorp/nomad/plugins/device" 11 "github.com/shirou/gopsutil/cpu" 12 "github.com/shirou/gopsutil/disk" 13 "github.com/shirou/gopsutil/host" 14 "github.com/shirou/gopsutil/mem" 15 ) 16 17 // HostStats represents resource usage stats of the host running a Nomad client 18 type HostStats struct { 19 Memory *MemoryStats 20 CPU []*CPUStats 21 DiskStats []*DiskStats 22 AllocDirStats *DiskStats 23 DeviceStats []*DeviceGroupStats 24 Uptime uint64 25 Timestamp int64 26 CPUTicksConsumed float64 27 } 28 29 // MemoryStats represents stats related to virtual memory usage 30 type MemoryStats struct { 31 Total uint64 32 Available uint64 33 Used uint64 34 Free uint64 35 } 36 37 // CPUStats represents stats related to cpu usage 38 type CPUStats struct { 39 CPU string 40 User float64 41 System float64 42 Idle float64 43 Total float64 44 } 45 46 // DiskStats represents stats related to disk usage 47 type DiskStats struct { 48 Device string 49 Mountpoint string 50 Size uint64 51 Used uint64 52 Available uint64 53 UsedPercent float64 54 InodesUsedPercent float64 55 } 56 57 // DeviceGroupStats represents stats related to device group 58 type DeviceGroupStats = device.DeviceGroupStats 59 60 // DeviceStatsCollector is used to retrieve all the latest statistics for all devices. 61 type DeviceStatsCollector func() []*DeviceGroupStats 62 63 // NodeStatsCollector is an interface which is used for the purposes of mocking 64 // the HostStatsCollector in the tests 65 type NodeStatsCollector interface { 66 Collect() error 67 Stats() *HostStats 68 } 69 70 // HostStatsCollector collects host resource usage stats 71 type HostStatsCollector struct { 72 numCores int 73 statsCalculator map[string]*HostCpuStatsCalculator 74 hostStats *HostStats 75 hostStatsLock sync.RWMutex 76 allocDir string 77 deviceStatsCollector DeviceStatsCollector 78 79 // badParts is a set of partitions whose usage cannot be read; used to 80 // squelch logspam. 81 badParts map[string]struct{} 82 83 logger hclog.Logger 84 } 85 86 // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in 87 // so that we can present the disk related statistics for the mountpoint where 88 // the allocation directory lives 89 func NewHostStatsCollector(logger hclog.Logger, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector { 90 logger = logger.Named("host_stats") 91 numCores := runtime.NumCPU() 92 statsCalculator := make(map[string]*HostCpuStatsCalculator) 93 collector := &HostStatsCollector{ 94 statsCalculator: statsCalculator, 95 numCores: numCores, 96 logger: logger, 97 allocDir: allocDir, 98 badParts: make(map[string]struct{}), 99 deviceStatsCollector: deviceStatsCollector, 100 } 101 return collector 102 } 103 104 // Collect collects stats related to resource usage of a host 105 func (h *HostStatsCollector) Collect() error { 106 h.hostStatsLock.Lock() 107 defer h.hostStatsLock.Unlock() 108 return h.collectLocked() 109 } 110 111 // collectLocked collects stats related to resource usage of the host but should 112 // be called with the lock held. 113 func (h *HostStatsCollector) collectLocked() error { 114 hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()} 115 116 // Determine up-time 117 uptime, err := host.Uptime() 118 if err != nil { 119 h.logger.Error("failed to collect upstime stats", "error", err) 120 uptime = 0 121 } 122 hs.Uptime = uptime 123 124 // Collect memory stats 125 mstats, err := h.collectMemoryStats() 126 if err != nil { 127 h.logger.Error("failed to collect memory stats", "error", err) 128 mstats = &MemoryStats{} 129 } 130 hs.Memory = mstats 131 132 // Collect cpu stats 133 cpus, ticks, err := h.collectCPUStats() 134 if err != nil { 135 h.logger.Error("failed to collect cpu stats", "error", err) 136 cpus = []*CPUStats{} 137 ticks = 0 138 } 139 hs.CPU = cpus 140 hs.CPUTicksConsumed = ticks 141 142 // Collect disk stats 143 diskStats, err := h.collectDiskStats() 144 if err != nil { 145 h.logger.Error("failed to collect disk stats", "error", err) 146 hs.DiskStats = []*DiskStats{} 147 } 148 hs.DiskStats = diskStats 149 150 // Getting the disk stats for the allocation directory 151 usage, err := disk.Usage(h.allocDir) 152 if err != nil { 153 h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err) 154 hs.AllocDirStats = &DiskStats{} 155 } else { 156 hs.AllocDirStats = h.toDiskStats(usage, nil) 157 } 158 // Collect devices stats 159 deviceStats := h.collectDeviceGroupStats() 160 hs.DeviceStats = deviceStats 161 162 // Update the collected status object. 163 h.hostStats = hs 164 165 return nil 166 } 167 168 func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) { 169 memStats, err := mem.VirtualMemory() 170 if err != nil { 171 return nil, err 172 } 173 mem := &MemoryStats{ 174 Total: memStats.Total, 175 Available: memStats.Available, 176 Used: memStats.Used, 177 Free: memStats.Free, 178 } 179 180 return mem, nil 181 } 182 183 func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) { 184 partitions, err := disk.Partitions(false) 185 if err != nil { 186 return nil, err 187 } 188 189 var diskStats []*DiskStats 190 for _, partition := range partitions { 191 usage, err := disk.Usage(partition.Mountpoint) 192 if err != nil { 193 if _, ok := h.badParts[partition.Mountpoint]; ok { 194 // already known bad, don't log again 195 continue 196 } 197 198 h.badParts[partition.Mountpoint] = struct{}{} 199 h.logger.Warn("error fetching host disk usage stats", "error", err, "partition", partition.Mountpoint) 200 continue 201 } 202 delete(h.badParts, partition.Mountpoint) 203 204 ds := h.toDiskStats(usage, &partition) 205 diskStats = append(diskStats, ds) 206 } 207 208 return diskStats, nil 209 } 210 211 func (h *HostStatsCollector) collectDeviceGroupStats() []*DeviceGroupStats { 212 if h.deviceStatsCollector == nil { 213 return []*DeviceGroupStats{} 214 } 215 216 return h.deviceStatsCollector() 217 } 218 219 // Stats returns the host stats that has been collected 220 func (h *HostStatsCollector) Stats() *HostStats { 221 h.hostStatsLock.RLock() 222 defer h.hostStatsLock.RUnlock() 223 224 if h.hostStats == nil { 225 if err := h.collectLocked(); err != nil { 226 h.logger.Warn("error fetching host resource usage stats", "error", err) 227 } 228 } 229 230 return h.hostStats 231 } 232 233 // toDiskStats merges UsageStat and PartitionStat to create a DiskStat 234 func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats { 235 ds := DiskStats{ 236 Size: usage.Total, 237 Used: usage.Used, 238 Available: usage.Free, 239 UsedPercent: usage.UsedPercent, 240 InodesUsedPercent: usage.InodesUsedPercent, 241 } 242 if math.IsNaN(ds.UsedPercent) { 243 ds.UsedPercent = 0.0 244 } 245 if math.IsNaN(ds.InodesUsedPercent) { 246 ds.InodesUsedPercent = 0.0 247 } 248 249 if partitionStat != nil { 250 ds.Device = partitionStat.Device 251 ds.Mountpoint = partitionStat.Mountpoint 252 } 253 254 return &ds 255 } 256 257 // HostCpuStatsCalculator calculates cpu usage percentages 258 type HostCpuStatsCalculator struct { 259 prevIdle float64 260 prevUser float64 261 prevSystem float64 262 prevBusy float64 263 prevTotal float64 264 } 265 266 // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator 267 func NewHostCpuStatsCalculator() *HostCpuStatsCalculator { 268 return &HostCpuStatsCalculator{} 269 } 270 271 // Calculate calculates the current cpu usage percentages 272 func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) { 273 currentIdle := times.Idle 274 currentUser := times.User 275 currentSystem := times.System 276 currentTotal := times.Total() 277 currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq + 278 times.Softirq + times.Steal + times.Guest + times.GuestNice 279 280 deltaTotal := currentTotal - h.prevTotal 281 idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100 282 user = ((currentUser - h.prevUser) / deltaTotal) * 100 283 system = ((currentSystem - h.prevSystem) / deltaTotal) * 100 284 total = ((currentBusy - h.prevBusy) / deltaTotal) * 100 285 286 // Protect against any invalid values 287 if math.IsNaN(idle) || math.IsInf(idle, 0) { 288 idle = 100.0 289 } 290 if math.IsNaN(user) || math.IsInf(user, 0) { 291 user = 0.0 292 } 293 if math.IsNaN(system) || math.IsInf(system, 0) { 294 system = 0.0 295 } 296 if math.IsNaN(total) || math.IsInf(total, 0) { 297 total = 0.0 298 } 299 300 h.prevIdle = currentIdle 301 h.prevUser = currentUser 302 h.prevSystem = currentSystem 303 h.prevTotal = currentTotal 304 h.prevBusy = currentBusy 305 return 306 }