github.com/bigcommerce/nomad@v0.9.3-bc/client/stats/host.go (about) 1 package stats 2 3 import ( 4 "fmt" 5 "math" 6 "runtime" 7 "sync" 8 "time" 9 10 hclog "github.com/hashicorp/go-hclog" 11 "github.com/hashicorp/nomad/plugins/device" 12 "github.com/shirou/gopsutil/cpu" 13 "github.com/shirou/gopsutil/disk" 14 "github.com/shirou/gopsutil/host" 15 "github.com/shirou/gopsutil/mem" 16 ) 17 18 // HostStats represents resource usage stats of the host running a Nomad client 19 type HostStats struct { 20 Memory *MemoryStats 21 CPU []*CPUStats 22 DiskStats []*DiskStats 23 AllocDirStats *DiskStats 24 DeviceStats []*DeviceGroupStats 25 Uptime uint64 26 Timestamp int64 27 CPUTicksConsumed float64 28 } 29 30 // MemoryStats represents stats related to virtual memory usage 31 type MemoryStats struct { 32 Total uint64 33 Available uint64 34 Used uint64 35 Free uint64 36 } 37 38 // CPUStats represents stats related to cpu usage 39 type CPUStats struct { 40 CPU string 41 User float64 42 System float64 43 Idle float64 44 Total float64 45 } 46 47 // DiskStats represents stats related to disk usage 48 type DiskStats struct { 49 Device string 50 Mountpoint string 51 Size uint64 52 Used uint64 53 Available uint64 54 UsedPercent float64 55 InodesUsedPercent float64 56 } 57 58 // DeviceGroupStats represents stats related to device group 59 type DeviceGroupStats = device.DeviceGroupStats 60 61 // DeviceStatsCollector is used to retrieve all the latest statistics for all devices. 62 type DeviceStatsCollector func() []*DeviceGroupStats 63 64 // NodeStatsCollector is an interface which is used for the purposes of mocking 65 // the HostStatsCollector in the tests 66 type NodeStatsCollector interface { 67 Collect() error 68 Stats() *HostStats 69 } 70 71 // HostStatsCollector collects host resource usage stats 72 type HostStatsCollector struct { 73 numCores int 74 statsCalculator map[string]*HostCpuStatsCalculator 75 hostStats *HostStats 76 hostStatsLock sync.RWMutex 77 allocDir string 78 deviceStatsCollector DeviceStatsCollector 79 80 // badParts is a set of partitions whose usage cannot be read; used to 81 // squelch logspam. 82 badParts map[string]struct{} 83 84 logger hclog.Logger 85 } 86 87 // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in 88 // so that we can present the disk related statistics for the mountpoint where 89 // the allocation directory lives 90 func NewHostStatsCollector(logger hclog.Logger, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector { 91 logger = logger.Named("host_stats") 92 numCores := runtime.NumCPU() 93 statsCalculator := make(map[string]*HostCpuStatsCalculator) 94 collector := &HostStatsCollector{ 95 statsCalculator: statsCalculator, 96 numCores: numCores, 97 logger: logger, 98 allocDir: allocDir, 99 badParts: make(map[string]struct{}), 100 deviceStatsCollector: deviceStatsCollector, 101 } 102 return collector 103 } 104 105 // Collect collects stats related to resource usage of a host 106 func (h *HostStatsCollector) Collect() error { 107 h.hostStatsLock.Lock() 108 defer h.hostStatsLock.Unlock() 109 return h.collectLocked() 110 } 111 112 // collectLocked collects stats related to resource usage of the host but should 113 // be called with the lock held. 114 func (h *HostStatsCollector) collectLocked() error { 115 hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()} 116 117 // Determine up-time 118 uptime, err := host.Uptime() 119 if err != nil { 120 return err 121 } 122 hs.Uptime = uptime 123 124 // Collect memory stats 125 mstats, err := h.collectMemoryStats() 126 if err != nil { 127 return err 128 } 129 hs.Memory = mstats 130 131 // Collect cpu stats 132 cpus, ticks, err := h.collectCPUStats() 133 if err != nil { 134 return err 135 } 136 hs.CPU = cpus 137 hs.CPUTicksConsumed = ticks 138 139 // Collect disk stats 140 diskStats, err := h.collectDiskStats() 141 if err != nil { 142 return err 143 } 144 hs.DiskStats = diskStats 145 146 // Getting the disk stats for the allocation directory 147 usage, err := disk.Usage(h.allocDir) 148 if err != nil { 149 return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err) 150 } 151 hs.AllocDirStats = h.toDiskStats(usage, nil) 152 153 // Collect devices stats 154 deviceStats := h.collectDeviceGroupStats() 155 hs.DeviceStats = deviceStats 156 157 // Update the collected status object. 158 h.hostStats = hs 159 160 return nil 161 } 162 163 func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) { 164 memStats, err := mem.VirtualMemory() 165 if err != nil { 166 return nil, err 167 } 168 mem := &MemoryStats{ 169 Total: memStats.Total, 170 Available: memStats.Available, 171 Used: memStats.Used, 172 Free: memStats.Free, 173 } 174 175 return mem, nil 176 } 177 178 func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) { 179 partitions, err := disk.Partitions(false) 180 if err != nil { 181 return nil, err 182 } 183 184 var diskStats []*DiskStats 185 for _, partition := range partitions { 186 usage, err := disk.Usage(partition.Mountpoint) 187 if err != nil { 188 if _, ok := h.badParts[partition.Mountpoint]; ok { 189 // already known bad, don't log again 190 continue 191 } 192 193 h.badParts[partition.Mountpoint] = struct{}{} 194 h.logger.Warn("error fetching host disk usage stats", "error", err, "partition", partition.Mountpoint) 195 continue 196 } 197 delete(h.badParts, partition.Mountpoint) 198 199 ds := h.toDiskStats(usage, &partition) 200 diskStats = append(diskStats, ds) 201 } 202 203 return diskStats, nil 204 } 205 206 func (h *HostStatsCollector) collectDeviceGroupStats() []*DeviceGroupStats { 207 if h.deviceStatsCollector == nil { 208 return []*DeviceGroupStats{} 209 } 210 211 return h.deviceStatsCollector() 212 } 213 214 // Stats returns the host stats that has been collected 215 func (h *HostStatsCollector) Stats() *HostStats { 216 h.hostStatsLock.RLock() 217 defer h.hostStatsLock.RUnlock() 218 219 if h.hostStats == nil { 220 if err := h.collectLocked(); err != nil { 221 h.logger.Warn("error fetching host resource usage stats", "error", err) 222 } 223 } 224 225 return h.hostStats 226 } 227 228 // toDiskStats merges UsageStat and PartitionStat to create a DiskStat 229 func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats { 230 ds := DiskStats{ 231 Size: usage.Total, 232 Used: usage.Used, 233 Available: usage.Free, 234 UsedPercent: usage.UsedPercent, 235 InodesUsedPercent: usage.InodesUsedPercent, 236 } 237 if math.IsNaN(ds.UsedPercent) { 238 ds.UsedPercent = 0.0 239 } 240 if math.IsNaN(ds.InodesUsedPercent) { 241 ds.InodesUsedPercent = 0.0 242 } 243 244 if partitionStat != nil { 245 ds.Device = partitionStat.Device 246 ds.Mountpoint = partitionStat.Mountpoint 247 } 248 249 return &ds 250 } 251 252 // HostCpuStatsCalculator calculates cpu usage percentages 253 type HostCpuStatsCalculator struct { 254 prevIdle float64 255 prevUser float64 256 prevSystem float64 257 prevBusy float64 258 prevTotal float64 259 } 260 261 // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator 262 func NewHostCpuStatsCalculator() *HostCpuStatsCalculator { 263 return &HostCpuStatsCalculator{} 264 } 265 266 // Calculate calculates the current cpu usage percentages 267 func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) { 268 currentIdle := times.Idle 269 currentUser := times.User 270 currentSystem := times.System 271 currentTotal := times.Total() 272 currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq + 273 times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen 274 275 deltaTotal := currentTotal - h.prevTotal 276 idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100 277 user = ((currentUser - h.prevUser) / deltaTotal) * 100 278 system = ((currentSystem - h.prevSystem) / deltaTotal) * 100 279 total = ((currentBusy - h.prevBusy) / deltaTotal) * 100 280 281 // Protect against any invalid values 282 if math.IsNaN(idle) || math.IsInf(idle, 0) { 283 idle = 100.0 284 } 285 if math.IsNaN(user) || math.IsInf(user, 0) { 286 user = 0.0 287 } 288 if math.IsNaN(system) || math.IsInf(system, 0) { 289 system = 0.0 290 } 291 if math.IsNaN(total) || math.IsInf(total, 0) { 292 total = 0.0 293 } 294 295 h.prevIdle = currentIdle 296 h.prevUser = currentUser 297 h.prevSystem = currentSystem 298 h.prevTotal = currentTotal 299 h.prevBusy = currentBusy 300 return 301 }