github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/stats/host.go (about)

     1  package stats
     2  
     3  import (
     4  	"math"
     5  	"runtime"
     6  	"sync"
     7  	"time"
     8  
     9  	hclog "github.com/hashicorp/go-hclog"
    10  	"github.com/hashicorp/nomad/plugins/device"
    11  	"github.com/shirou/gopsutil/cpu"
    12  	"github.com/shirou/gopsutil/disk"
    13  	"github.com/shirou/gopsutil/host"
    14  	"github.com/shirou/gopsutil/mem"
    15  )
    16  
    17  // HostStats represents resource usage stats of the host running a Nomad client
    18  type HostStats struct {
    19  	Memory           *MemoryStats
    20  	CPU              []*CPUStats
    21  	DiskStats        []*DiskStats
    22  	AllocDirStats    *DiskStats
    23  	DeviceStats      []*DeviceGroupStats
    24  	Uptime           uint64
    25  	Timestamp        int64
    26  	CPUTicksConsumed float64
    27  }
    28  
    29  // MemoryStats represents stats related to virtual memory usage
    30  type MemoryStats struct {
    31  	Total     uint64
    32  	Available uint64
    33  	Used      uint64
    34  	Free      uint64
    35  }
    36  
    37  // CPUStats represents stats related to cpu usage
    38  type CPUStats struct {
    39  	CPU    string
    40  	User   float64
    41  	System float64
    42  	Idle   float64
    43  	Total  float64
    44  }
    45  
    46  // DiskStats represents stats related to disk usage
    47  type DiskStats struct {
    48  	Device            string
    49  	Mountpoint        string
    50  	Size              uint64
    51  	Used              uint64
    52  	Available         uint64
    53  	UsedPercent       float64
    54  	InodesUsedPercent float64
    55  }
    56  
    57  // DeviceGroupStats represents stats related to device group
    58  type DeviceGroupStats = device.DeviceGroupStats
    59  
    60  // DeviceStatsCollector is used to retrieve all the latest statistics for all devices.
    61  type DeviceStatsCollector func() []*DeviceGroupStats
    62  
    63  // NodeStatsCollector is an interface which is used for the purposes of mocking
    64  // the HostStatsCollector in the tests
    65  type NodeStatsCollector interface {
    66  	Collect() error
    67  	Stats() *HostStats
    68  }
    69  
    70  // HostStatsCollector collects host resource usage stats
    71  type HostStatsCollector struct {
    72  	numCores             int
    73  	statsCalculator      map[string]*HostCpuStatsCalculator
    74  	hostStats            *HostStats
    75  	hostStatsLock        sync.RWMutex
    76  	allocDir             string
    77  	deviceStatsCollector DeviceStatsCollector
    78  
    79  	// badParts is a set of partitions whose usage cannot be read; used to
    80  	// squelch logspam.
    81  	badParts map[string]struct{}
    82  
    83  	logger hclog.Logger
    84  }
    85  
    86  // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in
    87  // so that we can present the disk related statistics for the mountpoint where
    88  // the allocation directory lives
    89  func NewHostStatsCollector(logger hclog.Logger, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector {
    90  	logger = logger.Named("host_stats")
    91  	numCores := runtime.NumCPU()
    92  	statsCalculator := make(map[string]*HostCpuStatsCalculator)
    93  	collector := &HostStatsCollector{
    94  		statsCalculator:      statsCalculator,
    95  		numCores:             numCores,
    96  		logger:               logger,
    97  		allocDir:             allocDir,
    98  		badParts:             make(map[string]struct{}),
    99  		deviceStatsCollector: deviceStatsCollector,
   100  	}
   101  	return collector
   102  }
   103  
   104  // Collect collects stats related to resource usage of a host
   105  func (h *HostStatsCollector) Collect() error {
   106  	h.hostStatsLock.Lock()
   107  	defer h.hostStatsLock.Unlock()
   108  	return h.collectLocked()
   109  }
   110  
   111  // collectLocked collects stats related to resource usage of the host but should
   112  // be called with the lock held.
   113  func (h *HostStatsCollector) collectLocked() error {
   114  	hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
   115  
   116  	// Determine up-time
   117  	uptime, err := host.Uptime()
   118  	if err != nil {
   119  		h.logger.Error("failed to collect upstime stats", "error", err)
   120  		uptime = 0
   121  	}
   122  	hs.Uptime = uptime
   123  
   124  	// Collect memory stats
   125  	mstats, err := h.collectMemoryStats()
   126  	if err != nil {
   127  		h.logger.Error("failed to collect memory stats", "error", err)
   128  		mstats = &MemoryStats{}
   129  	}
   130  	hs.Memory = mstats
   131  
   132  	// Collect cpu stats
   133  	cpus, ticks, err := h.collectCPUStats()
   134  	if err != nil {
   135  		h.logger.Error("failed to collect cpu stats", "error", err)
   136  		cpus = []*CPUStats{}
   137  		ticks = 0
   138  	}
   139  	hs.CPU = cpus
   140  	hs.CPUTicksConsumed = ticks
   141  
   142  	// Collect disk stats
   143  	diskStats, err := h.collectDiskStats()
   144  	if err != nil {
   145  		h.logger.Error("failed to collect disk stats", "error", err)
   146  		hs.DiskStats = []*DiskStats{}
   147  	}
   148  	hs.DiskStats = diskStats
   149  
   150  	// Getting the disk stats for the allocation directory
   151  	usage, err := disk.Usage(h.allocDir)
   152  	if err != nil {
   153  		h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
   154  		hs.AllocDirStats = &DiskStats{}
   155  	} else {
   156  		hs.AllocDirStats = h.toDiskStats(usage, nil)
   157  	}
   158  	// Collect devices stats
   159  	deviceStats := h.collectDeviceGroupStats()
   160  	hs.DeviceStats = deviceStats
   161  
   162  	// Update the collected status object.
   163  	h.hostStats = hs
   164  
   165  	return nil
   166  }
   167  
   168  func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
   169  	memStats, err := mem.VirtualMemory()
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  	mem := &MemoryStats{
   174  		Total:     memStats.Total,
   175  		Available: memStats.Available,
   176  		Used:      memStats.Used,
   177  		Free:      memStats.Free,
   178  	}
   179  
   180  	return mem, nil
   181  }
   182  
   183  func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
   184  	partitions, err := disk.Partitions(false)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	var diskStats []*DiskStats
   190  	for _, partition := range partitions {
   191  		usage, err := disk.Usage(partition.Mountpoint)
   192  		if err != nil {
   193  			if _, ok := h.badParts[partition.Mountpoint]; ok {
   194  				// already known bad, don't log again
   195  				continue
   196  			}
   197  
   198  			h.badParts[partition.Mountpoint] = struct{}{}
   199  			h.logger.Warn("error fetching host disk usage stats", "error", err, "partition", partition.Mountpoint)
   200  			continue
   201  		}
   202  		delete(h.badParts, partition.Mountpoint)
   203  
   204  		ds := h.toDiskStats(usage, &partition)
   205  		diskStats = append(diskStats, ds)
   206  	}
   207  
   208  	return diskStats, nil
   209  }
   210  
   211  func (h *HostStatsCollector) collectDeviceGroupStats() []*DeviceGroupStats {
   212  	if h.deviceStatsCollector == nil {
   213  		return []*DeviceGroupStats{}
   214  	}
   215  
   216  	return h.deviceStatsCollector()
   217  }
   218  
   219  // Stats returns the host stats that has been collected
   220  func (h *HostStatsCollector) Stats() *HostStats {
   221  	h.hostStatsLock.RLock()
   222  	defer h.hostStatsLock.RUnlock()
   223  
   224  	if h.hostStats == nil {
   225  		if err := h.collectLocked(); err != nil {
   226  			h.logger.Warn("error fetching host resource usage stats", "error", err)
   227  		}
   228  	}
   229  
   230  	return h.hostStats
   231  }
   232  
   233  // toDiskStats merges UsageStat and PartitionStat to create a DiskStat
   234  func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats {
   235  	ds := DiskStats{
   236  		Size:              usage.Total,
   237  		Used:              usage.Used,
   238  		Available:         usage.Free,
   239  		UsedPercent:       usage.UsedPercent,
   240  		InodesUsedPercent: usage.InodesUsedPercent,
   241  	}
   242  	if math.IsNaN(ds.UsedPercent) {
   243  		ds.UsedPercent = 0.0
   244  	}
   245  	if math.IsNaN(ds.InodesUsedPercent) {
   246  		ds.InodesUsedPercent = 0.0
   247  	}
   248  
   249  	if partitionStat != nil {
   250  		ds.Device = partitionStat.Device
   251  		ds.Mountpoint = partitionStat.Mountpoint
   252  	}
   253  
   254  	return &ds
   255  }
   256  
   257  // HostCpuStatsCalculator calculates cpu usage percentages
   258  type HostCpuStatsCalculator struct {
   259  	prevIdle   float64
   260  	prevUser   float64
   261  	prevSystem float64
   262  	prevBusy   float64
   263  	prevTotal  float64
   264  }
   265  
   266  // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator
   267  func NewHostCpuStatsCalculator() *HostCpuStatsCalculator {
   268  	return &HostCpuStatsCalculator{}
   269  }
   270  
   271  // Calculate calculates the current cpu usage percentages
   272  func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) {
   273  	currentIdle := times.Idle
   274  	currentUser := times.User
   275  	currentSystem := times.System
   276  	currentTotal := times.Total()
   277  	currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
   278  		times.Softirq + times.Steal + times.Guest + times.GuestNice
   279  
   280  	deltaTotal := currentTotal - h.prevTotal
   281  	idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
   282  	user = ((currentUser - h.prevUser) / deltaTotal) * 100
   283  	system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
   284  	total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
   285  
   286  	// Protect against any invalid values
   287  	if math.IsNaN(idle) || math.IsInf(idle, 0) {
   288  		idle = 100.0
   289  	}
   290  	if math.IsNaN(user) || math.IsInf(user, 0) {
   291  		user = 0.0
   292  	}
   293  	if math.IsNaN(system) || math.IsInf(system, 0) {
   294  		system = 0.0
   295  	}
   296  	if math.IsNaN(total) || math.IsInf(total, 0) {
   297  		total = 0.0
   298  	}
   299  
   300  	h.prevIdle = currentIdle
   301  	h.prevUser = currentUser
   302  	h.prevSystem = currentSystem
   303  	h.prevTotal = currentTotal
   304  	h.prevBusy = currentBusy
   305  	return
   306  }