github.com/manicqin/nomad@v0.9.5/client/stats/host.go (about)

     1  package stats
     2  
     3  import (
     4  	"math"
     5  	"runtime"
     6  	"sync"
     7  	"time"
     8  
     9  	hclog "github.com/hashicorp/go-hclog"
    10  	"github.com/hashicorp/nomad/plugins/device"
    11  	"github.com/shirou/gopsutil/cpu"
    12  	"github.com/shirou/gopsutil/disk"
    13  	"github.com/shirou/gopsutil/host"
    14  	"github.com/shirou/gopsutil/mem"
    15  )
    16  
    17  // HostStats represents resource usage stats of the host running a Nomad client
    18  type HostStats struct {
    19  	Memory           *MemoryStats
    20  	CPU              []*CPUStats
    21  	DiskStats        []*DiskStats
    22  	AllocDirStats    *DiskStats
    23  	DeviceStats      []*DeviceGroupStats
    24  	Uptime           uint64
    25  	Timestamp        int64
    26  	CPUTicksConsumed float64
    27  }
    28  
    29  // MemoryStats represents stats related to virtual memory usage
    30  type MemoryStats struct {
    31  	Total     uint64
    32  	Available uint64
    33  	Used      uint64
    34  	Free      uint64
    35  }
    36  
    37  // CPUStats represents stats related to cpu usage
    38  type CPUStats struct {
    39  	CPU    string
    40  	User   float64
    41  	System float64
    42  	Idle   float64
    43  	Total  float64
    44  }
    45  
    46  // DiskStats represents stats related to disk usage
    47  type DiskStats struct {
    48  	Device            string
    49  	Mountpoint        string
    50  	Size              uint64
    51  	Used              uint64
    52  	Available         uint64
    53  	UsedPercent       float64
    54  	InodesUsedPercent float64
    55  }
    56  
    57  // DeviceGroupStats represents stats related to device group
    58  type DeviceGroupStats = device.DeviceGroupStats
    59  
    60  // DeviceStatsCollector is used to retrieve all the latest statistics for all devices.
    61  type DeviceStatsCollector func() []*DeviceGroupStats
    62  
    63  // NodeStatsCollector is an interface which is used for the purposes of mocking
    64  // the HostStatsCollector in the tests
    65  type NodeStatsCollector interface {
    66  	Collect() error
    67  	Stats() *HostStats
    68  }
    69  
    70  // HostStatsCollector collects host resource usage stats
    71  type HostStatsCollector struct {
    72  	numCores             int
    73  	statsCalculator      map[string]*HostCpuStatsCalculator
    74  	hostStats            *HostStats
    75  	hostStatsLock        sync.RWMutex
    76  	allocDir             string
    77  	deviceStatsCollector DeviceStatsCollector
    78  
    79  	// badParts is a set of partitions whose usage cannot be read; used to
    80  	// squelch logspam.
    81  	badParts map[string]struct{}
    82  
    83  	logger hclog.Logger
    84  }
    85  
    86  // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in
    87  // so that we can present the disk related statistics for the mountpoint where
    88  // the allocation directory lives
    89  // POI
    90  func NewHostStatsCollector(logger hclog.Logger, allocDir string, deviceStatsCollector DeviceStatsCollector) *HostStatsCollector {
    91  	logger = logger.Named("host_stats")
    92  	numCores := runtime.NumCPU()
    93  	statsCalculator := make(map[string]*HostCpuStatsCalculator)
    94  	collector := &HostStatsCollector{
    95  		statsCalculator:      statsCalculator,
    96  		numCores:             numCores,
    97  		logger:               logger,
    98  		allocDir:             allocDir,
    99  		badParts:             make(map[string]struct{}),
   100  		deviceStatsCollector: deviceStatsCollector,
   101  	}
   102  	return collector
   103  }
   104  
   105  // Collect collects stats related to resource usage of a host
   106  func (h *HostStatsCollector) Collect() error {
   107  	h.hostStatsLock.Lock()
   108  	defer h.hostStatsLock.Unlock()
   109  	return h.collectLocked()
   110  }
   111  
   112  // collectLocked collects stats related to resource usage of the host but should
   113  // be called with the lock held.
   114  func (h *HostStatsCollector) collectLocked() error {
   115  	hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
   116  
   117  	// Determine up-time
   118  	uptime, err := host.Uptime()
   119  	if err != nil {
   120  		h.logger.Error("failed to collect upstime stats", "error", err)
   121  		uptime = 0
   122  	}
   123  	hs.Uptime = uptime
   124  
   125  	// Collect memory stats
   126  	mstats, err := h.collectMemoryStats()
   127  	if err != nil {
   128  
   129  		h.logger.Error("failed to collect memory stats", "error", err)
   130  		mstats = &MemoryStats{}
   131  	}
   132  	hs.Memory = mstats
   133  
   134  	// Collect cpu stats
   135  	cpus, ticks, err := h.collectCPUStats()
   136  	if err != nil {
   137  
   138  		h.logger.Error("failed to collect cpu stats", "error", err)
   139  		cpus = []*CPUStats{}
   140  		ticks = 0
   141  	}
   142  	hs.CPU = cpus
   143  	hs.CPUTicksConsumed = ticks
   144  
   145  	// Collect disk stats
   146  	diskStats, err := h.collectDiskStats()
   147  	if err != nil {
   148  
   149  		h.logger.Error("failed to collect disk stats", "error", err)
   150  		hs.DiskStats = []*DiskStats{}
   151  	}
   152  	hs.DiskStats = diskStats
   153  
   154  	// Getting the disk stats for the allocation directory
   155  	usage, err := disk.Usage(h.allocDir)
   156  	if err != nil {
   157  		h.logger.Error("failed to find disk usage of alloc", "alloc_dir", h.allocDir, "error", err)
   158  		hs.AllocDirStats = &DiskStats{}
   159  	} else {
   160  		hs.AllocDirStats = h.toDiskStats(usage, nil)
   161  	}
   162  	// Collect devices stats
   163  	deviceStats := h.collectDeviceGroupStats()
   164  	hs.DeviceStats = deviceStats
   165  
   166  	// Update the collected status object.
   167  	h.hostStats = hs
   168  
   169  	return nil
   170  }
   171  
   172  func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
   173  	memStats, err := mem.VirtualMemory()
   174  	if err != nil {
   175  		return nil, err
   176  	}
   177  	mem := &MemoryStats{
   178  		Total:     memStats.Total,
   179  		Available: memStats.Available,
   180  		Used:      memStats.Used,
   181  		Free:      memStats.Free,
   182  	}
   183  
   184  	return mem, nil
   185  }
   186  
   187  func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
   188  	partitions, err := disk.Partitions(false)
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  
   193  	var diskStats []*DiskStats
   194  	for _, partition := range partitions {
   195  		usage, err := disk.Usage(partition.Mountpoint)
   196  		if err != nil {
   197  			if _, ok := h.badParts[partition.Mountpoint]; ok {
   198  				// already known bad, don't log again
   199  				continue
   200  			}
   201  
   202  			h.badParts[partition.Mountpoint] = struct{}{}
   203  			h.logger.Warn("error fetching host disk usage stats", "error", err, "partition", partition.Mountpoint)
   204  			continue
   205  		}
   206  		delete(h.badParts, partition.Mountpoint)
   207  
   208  		ds := h.toDiskStats(usage, &partition)
   209  		diskStats = append(diskStats, ds)
   210  	}
   211  
   212  	return diskStats, nil
   213  }
   214  
   215  func (h *HostStatsCollector) collectDeviceGroupStats() []*DeviceGroupStats {
   216  	if h.deviceStatsCollector == nil {
   217  		return []*DeviceGroupStats{}
   218  	}
   219  
   220  	return h.deviceStatsCollector()
   221  }
   222  
   223  // Stats returns the host stats that has been collected
   224  func (h *HostStatsCollector) Stats() *HostStats {
   225  	h.hostStatsLock.RLock()
   226  	defer h.hostStatsLock.RUnlock()
   227  
   228  	if h.hostStats == nil {
   229  		if err := h.collectLocked(); err != nil {
   230  			h.logger.Warn("error fetching host resource usage stats", "error", err)
   231  		}
   232  	}
   233  
   234  	return h.hostStats
   235  }
   236  
   237  // toDiskStats merges UsageStat and PartitionStat to create a DiskStat
   238  func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats {
   239  	ds := DiskStats{
   240  		Size:              usage.Total,
   241  		Used:              usage.Used,
   242  		Available:         usage.Free,
   243  		UsedPercent:       usage.UsedPercent,
   244  		InodesUsedPercent: usage.InodesUsedPercent,
   245  	}
   246  	if math.IsNaN(ds.UsedPercent) {
   247  		ds.UsedPercent = 0.0
   248  	}
   249  	if math.IsNaN(ds.InodesUsedPercent) {
   250  		ds.InodesUsedPercent = 0.0
   251  	}
   252  
   253  	if partitionStat != nil {
   254  		ds.Device = partitionStat.Device
   255  		ds.Mountpoint = partitionStat.Mountpoint
   256  	}
   257  
   258  	return &ds
   259  }
   260  
   261  // HostCpuStatsCalculator calculates cpu usage percentages
   262  type HostCpuStatsCalculator struct {
   263  	prevIdle   float64
   264  	prevUser   float64
   265  	prevSystem float64
   266  	prevBusy   float64
   267  	prevTotal  float64
   268  }
   269  
   270  // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator
   271  func NewHostCpuStatsCalculator() *HostCpuStatsCalculator {
   272  	return &HostCpuStatsCalculator{}
   273  }
   274  
   275  // Calculate calculates the current cpu usage percentages
   276  func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) {
   277  	currentIdle := times.Idle
   278  	currentUser := times.User
   279  	currentSystem := times.System
   280  	currentTotal := times.Total()
   281  	currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
   282  		times.Softirq + times.Steal + times.Guest + times.GuestNice
   283  
   284  	deltaTotal := currentTotal - h.prevTotal
   285  	idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
   286  	user = ((currentUser - h.prevUser) / deltaTotal) * 100
   287  	system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
   288  	total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
   289  
   290  	// Protect against any invalid values
   291  	if math.IsNaN(idle) || math.IsInf(idle, 0) {
   292  		idle = 100.0
   293  	}
   294  	if math.IsNaN(user) || math.IsInf(user, 0) {
   295  		user = 0.0
   296  	}
   297  	if math.IsNaN(system) || math.IsInf(system, 0) {
   298  		system = 0.0
   299  	}
   300  	if math.IsNaN(total) || math.IsInf(total, 0) {
   301  		total = 0.0
   302  	}
   303  
   304  	h.prevIdle = currentIdle
   305  	h.prevUser = currentUser
   306  	h.prevSystem = currentSystem
   307  	h.prevTotal = currentTotal
   308  	h.prevBusy = currentBusy
   309  	return
   310  }