github.com/manicqin/nomad@v0.9.5/client/stats/host.go (about) 1 package stats 2 3 import ( 4 "math" 5 "runtime" 6 "sync" 7 "time" 8 9 hclog "github.com/hashicorp/go-hclog" 10 "github.com/hashicorp/nomad/plugins/device" 11 "github.com/shirou/gopsutil/cpu" 12 "github.com/shirou/gopsutil/disk" 13 "github.com/shirou/gopsutil/host" 14 "github.com/shirou/gopsutil/mem" 15 ) 16 17 // HostStats represents resource usage stats of the host running a Nomad client 18 type HostStats struct { 19 Memory *MemoryStats 20 CPU []*CPUStats 21 DiskStats []*DiskStats 22 AllocDirStats *DiskStats 23 DeviceStats []*DeviceGroupStats 24 Uptime uint64 25 Timestamp int64 26 CPUTicksConsumed float64 27 } 28 29 // MemoryStats represents stats related to virtual memory usage 30 type MemoryStats struct { 31 Total uint64 32 Available uint64 33 Used uint64 34 Free uint64 35 } 36 37 // CPUStats represents stats related to cpu usage 38 type CPUStats struct { 39 CPU string 40 User float64 41 System float64 42 Idle float64 43 Total float64 44 } 45 46 // DiskStats represents stats related to disk usage 47 type DiskStats struct { 48 Device string 49 Mountpoint string 50 Size uint64 51 Used uint64 52 Available uint64 53 UsedPercent float64 54 InodesUsedPercent float64 55 } 56 57 // DeviceGroupStats represents stats related to device group 58 type DeviceGroupStats = device.DeviceGroupStats 59 60 // DeviceStatsCollector is used to retrieve all the latest statistics for all devices. 61 type DeviceStatsCollector func() []*DeviceGroupStats 62 63 // NodeStatsCollector is an interface which is used for the purposes of mocking 64 // the HostStatsCollector in the tests 65 type NodeStatsCollector interface { 66 Collect() error 67 Stats() *HostStats 68 } 69 70 // HostStatsCollector collects host resource usage stats 71 type HostStatsCollector struct { 72 numCores int 73 statsCalculator map[string]*HostCpuStatsCalculator 74 hostStats *HostStats 75 hostStatsLock sync.RWMutex 76 allocDir string 77 deviceStatsCollector DeviceStatsCollector 78 79 // badParts is a set of partitions whose usage cannot be read; used to 80 // squelch logspam. 81 badParts map[string]struct{} 82 83 logger hclog.Logger 84 } 85 86 // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in 87 // so that we can present the disk related statistics for the mountpoint where 88 // the allocation directory lives 89 // POI 90 func NewHostStatsCollector(logger hclog.Logger, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector { 91 logger = logger.Named("host_stats") 92 numCores := runtime.NumCPU() 93 statsCalculator := make(map[string]*HostCpuStatsCalculator) 94 collector := &HostStatsCollector{ 95 statsCalculator: statsCalculator, 96 numCores: numCores, 97 logger: logger, 98 allocDir: allocDir, 99 badParts: make(map[string]struct{}), 100 deviceStatsCollector: deviceStatsCollector, 101 } 102 return collector 103 } 104 105 // Collect collects stats related to resource usage of a host 106 func (h *HostStatsCollector) Collect() error { 107 h.hostStatsLock.Lock() 108 defer h.hostStatsLock.Unlock() 109 return h.collectLocked() 110 } 111 112 // collectLocked collects stats related to resource usage of the host but should 113 // be called with the lock held. 114 func (h *HostStatsCollector) collectLocked() error { 115 hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()} 116 117 // Determine up-time 118 uptime, err := host.Uptime() 119 if err != nil { 120 h.logger.Error("failed to collect upstime stats", "error", err) 121 uptime = 0 122 } 123 hs.Uptime = uptime 124 125 // Collect memory stats 126 mstats, err := h.collectMemoryStats() 127 if err != nil { 128 129 h.logger.Error("failed to collect memory stats", "error", err) 130 mstats = &MemoryStats{} 131 } 132 hs.Memory = mstats 133 134 // Collect cpu stats 135 cpus, ticks, err := h.collectCPUStats() 136 if err != nil { 137 138 h.logger.Error("failed to collect cpu stats", "error", err) 139 cpus = []*CPUStats{} 140 ticks = 0 141 } 142 hs.CPU = cpus 143 hs.CPUTicksConsumed = ticks 144 145 // Collect disk stats 146 diskStats, err := h.collectDiskStats() 147 if err != nil { 148 149 h.logger.Error("failed to collect disk stats", "error", err) 150 hs.DiskStats = []*DiskStats{} 151 } 152 hs.DiskStats = diskStats 153 154 // Getting the disk stats for the allocation directory 155 usage, err := disk.Usage(h.allocDir) 156 if err != nil { 157 h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err) 158 hs.AllocDirStats = &DiskStats{} 159 } else { 160 hs.AllocDirStats = h.toDiskStats(usage, nil) 161 } 162 // Collect devices stats 163 deviceStats := h.collectDeviceGroupStats() 164 hs.DeviceStats = deviceStats 165 166 // Update the collected status object. 167 h.hostStats = hs 168 169 return nil 170 } 171 172 func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) { 173 memStats, err := mem.VirtualMemory() 174 if err != nil { 175 return nil, err 176 } 177 mem := &MemoryStats{ 178 Total: memStats.Total, 179 Available: memStats.Available, 180 Used: memStats.Used, 181 Free: memStats.Free, 182 } 183 184 return mem, nil 185 } 186 187 func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) { 188 partitions, err := disk.Partitions(false) 189 if err != nil { 190 return nil, err 191 } 192 193 var diskStats []*DiskStats 194 for _, partition := range partitions { 195 usage, err := disk.Usage(partition.Mountpoint) 196 if err != nil { 197 if _, ok := h.badParts[partition.Mountpoint]; ok { 198 // already known bad, don't log again 199 continue 200 } 201 202 h.badParts[partition.Mountpoint] = struct{}{} 203 h.logger.Warn("error fetching host disk usage stats", "error", err, "partition", partition.Mountpoint) 204 continue 205 } 206 delete(h.badParts, partition.Mountpoint) 207 208 ds := h.toDiskStats(usage, &partition) 209 diskStats = append(diskStats, ds) 210 } 211 212 return diskStats, nil 213 } 214 215 func (h *HostStatsCollector) collectDeviceGroupStats() []*DeviceGroupStats { 216 if h.deviceStatsCollector == nil { 217 return []*DeviceGroupStats{} 218 } 219 220 return h.deviceStatsCollector() 221 } 222 223 // Stats returns the host stats that has been collected 224 func (h *HostStatsCollector) Stats() *HostStats { 225 h.hostStatsLock.RLock() 226 defer h.hostStatsLock.RUnlock() 227 228 if h.hostStats == nil { 229 if err := h.collectLocked(); err != nil { 230 h.logger.Warn("error fetching host resource usage stats", "error", err) 231 } 232 } 233 234 return h.hostStats 235 } 236 237 // toDiskStats merges UsageStat and PartitionStat to create a DiskStat 238 func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats { 239 ds := DiskStats{ 240 Size: usage.Total, 241 Used: usage.Used, 242 Available: usage.Free, 243 UsedPercent: usage.UsedPercent, 244 InodesUsedPercent: usage.InodesUsedPercent, 245 } 246 if math.IsNaN(ds.UsedPercent) { 247 ds.UsedPercent = 0.0 248 } 249 if math.IsNaN(ds.InodesUsedPercent) { 250 ds.InodesUsedPercent = 0.0 251 } 252 253 if partitionStat != nil { 254 ds.Device = partitionStat.Device 255 ds.Mountpoint = partitionStat.Mountpoint 256 } 257 258 return &ds 259 } 260 261 // HostCpuStatsCalculator calculates cpu usage percentages 262 type HostCpuStatsCalculator struct { 263 prevIdle float64 264 prevUser float64 265 prevSystem float64 266 prevBusy float64 267 prevTotal float64 268 } 269 270 // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator 271 func NewHostCpuStatsCalculator() *HostCpuStatsCalculator { 272 return &HostCpuStatsCalculator{} 273 } 274 275 // Calculate calculates the current cpu usage percentages 276 func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) { 277 currentIdle := times.Idle 278 currentUser := times.User 279 currentSystem := times.System 280 currentTotal := times.Total() 281 currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq + 282 times.Softirq + times.Steal + times.Guest + times.GuestNice 283 284 deltaTotal := currentTotal - h.prevTotal 285 idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100 286 user = ((currentUser - h.prevUser) / deltaTotal) * 100 287 system = ((currentSystem - h.prevSystem) / deltaTotal) * 100 288 total = ((currentBusy - h.prevBusy) / deltaTotal) * 100 289 290 // Protect against any invalid values 291 if math.IsNaN(idle) || math.IsInf(idle, 0) { 292 idle = 100.0 293 } 294 if math.IsNaN(user) || math.IsInf(user, 0) { 295 user = 0.0 296 } 297 if math.IsNaN(system) || math.IsInf(system, 0) { 298 system = 0.0 299 } 300 if math.IsNaN(total) || math.IsInf(total, 0) { 301 total = 0.0 302 } 303 304 h.prevIdle = currentIdle 305 h.prevUser = currentUser 306 h.prevSystem = currentSystem 307 h.prevTotal = currentTotal 308 h.prevBusy = currentBusy 309 return 310 }