github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/client/stats/host.go (about)

     1  package stats
     2  
     3  import (
     4  	"log"
     5  	"math"
     6  	"runtime"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/shirou/gopsutil/cpu"
    11  	"github.com/shirou/gopsutil/disk"
    12  	"github.com/shirou/gopsutil/host"
    13  	"github.com/shirou/gopsutil/mem"
    14  )
    15  
    16  // HostStats represents resource usage stats of the host running a Nomad client
    17  type HostStats struct {
    18  	Memory           *MemoryStats
    19  	CPU              []*CPUStats
    20  	DiskStats        []*DiskStats
    21  	AllocDirStats    *DiskStats
    22  	Uptime           uint64
    23  	Timestamp        int64
    24  	CPUTicksConsumed float64
    25  }
    26  
    27  // MemoryStats represnts stats related to virtual memory usage
    28  type MemoryStats struct {
    29  	Total     uint64
    30  	Available uint64
    31  	Used      uint64
    32  	Free      uint64
    33  }
    34  
    35  // CPUStats represents stats related to cpu usage
    36  type CPUStats struct {
    37  	CPU    string
    38  	User   float64
    39  	System float64
    40  	Idle   float64
    41  	Total  float64
    42  }
    43  
    44  // DiskStats represents stats related to disk usage
    45  type DiskStats struct {
    46  	Device            string
    47  	Mountpoint        string
    48  	Size              uint64
    49  	Used              uint64
    50  	Available         uint64
    51  	UsedPercent       float64
    52  	InodesUsedPercent float64
    53  }
    54  
    55  // NodeStatsCollector is an interface which is used for the puproses of mocking
    56  // the HostStatsCollector in the tests
    57  type NodeStatsCollector interface {
    58  	Collect() error
    59  	Stats() *HostStats
    60  }
    61  
    62  // HostStatsCollector collects host resource usage stats
    63  type HostStatsCollector struct {
    64  	numCores        int
    65  	statsCalculator map[string]*HostCpuStatsCalculator
    66  	logger          *log.Logger
    67  	hostStats       *HostStats
    68  	hostStatsLock   sync.RWMutex
    69  	allocDir        string
    70  
    71  	// badParts is a set of partitions whose usage cannot be read; used to
    72  	// squelch logspam.
    73  	badParts map[string]struct{}
    74  }
    75  
    76  // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in
    77  // so that we can present the disk related statistics for the mountpoint where
    78  // the allocation directory lives
    79  func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollector {
    80  	numCores := runtime.NumCPU()
    81  	statsCalculator := make(map[string]*HostCpuStatsCalculator)
    82  	collector := &HostStatsCollector{
    83  		statsCalculator: statsCalculator,
    84  		numCores:        numCores,
    85  		logger:          logger,
    86  		allocDir:        allocDir,
    87  		badParts:        make(map[string]struct{}),
    88  	}
    89  	return collector
    90  }
    91  
    92  // Collect collects stats related to resource usage of a host
    93  func (h *HostStatsCollector) Collect() error {
    94  	h.hostStatsLock.Lock()
    95  	defer h.hostStatsLock.Unlock()
    96  
    97  	hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
    98  
    99  	// Determine up-time
   100  	uptime, err := host.Uptime()
   101  	if err != nil {
   102  		return err
   103  	}
   104  	hs.Uptime = uptime
   105  
   106  	// Collect memory stats
   107  	mstats, err := h.collectMemoryStats()
   108  	if err != nil {
   109  		return err
   110  	}
   111  	hs.Memory = mstats
   112  
   113  	// Collect cpu stats
   114  	cpus, ticks, err := h.collectCPUStats()
   115  	if err != nil {
   116  		return err
   117  	}
   118  	hs.CPU = cpus
   119  	hs.CPUTicksConsumed = ticks
   120  
   121  	// Collect disk stats
   122  	diskStats, err := h.collectDiskStats()
   123  	if err != nil {
   124  		return err
   125  	}
   126  	hs.DiskStats = diskStats
   127  
   128  	// Getting the disk stats for the allocation directory
   129  	usage, err := disk.Usage(h.allocDir)
   130  	if err != nil {
   131  		return err
   132  	}
   133  	hs.AllocDirStats = h.toDiskStats(usage, nil)
   134  
   135  	// Update the collected status object.
   136  	h.hostStats = hs
   137  
   138  	return nil
   139  }
   140  
   141  func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
   142  	memStats, err := mem.VirtualMemory()
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  	mem := &MemoryStats{
   147  		Total:     memStats.Total,
   148  		Available: memStats.Available,
   149  		Used:      memStats.Used,
   150  		Free:      memStats.Free,
   151  	}
   152  
   153  	return mem, nil
   154  }
   155  
   156  func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
   157  	partitions, err := disk.Partitions(false)
   158  	if err != nil {
   159  		return nil, err
   160  	}
   161  
   162  	var diskStats []*DiskStats
   163  	for _, partition := range partitions {
   164  		usage, err := disk.Usage(partition.Mountpoint)
   165  		if err != nil {
   166  			if _, ok := h.badParts[partition.Mountpoint]; ok {
   167  				// already known bad, don't log again
   168  				continue
   169  			}
   170  
   171  			h.badParts[partition.Mountpoint] = struct{}{}
   172  			h.logger.Printf("[WARN] client: error fetching host disk usage stats for %v: %v", partition.Mountpoint, err)
   173  			continue
   174  		}
   175  		delete(h.badParts, partition.Mountpoint)
   176  
   177  		ds := h.toDiskStats(usage, &partition)
   178  		diskStats = append(diskStats, ds)
   179  	}
   180  
   181  	return diskStats, nil
   182  }
   183  
   184  // Stats returns the host stats that has been collected
   185  func (h *HostStatsCollector) Stats() *HostStats {
   186  	h.hostStatsLock.RLock()
   187  	defer h.hostStatsLock.RUnlock()
   188  	return h.hostStats
   189  }
   190  
   191  // toDiskStats merges UsageStat and PartitionStat to create a DiskStat
   192  func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats {
   193  	ds := DiskStats{
   194  		Size:              usage.Total,
   195  		Used:              usage.Used,
   196  		Available:         usage.Free,
   197  		UsedPercent:       usage.UsedPercent,
   198  		InodesUsedPercent: usage.InodesUsedPercent,
   199  	}
   200  	if math.IsNaN(ds.UsedPercent) {
   201  		ds.UsedPercent = 0.0
   202  	}
   203  	if math.IsNaN(ds.InodesUsedPercent) {
   204  		ds.InodesUsedPercent = 0.0
   205  	}
   206  
   207  	if partitionStat != nil {
   208  		ds.Device = partitionStat.Device
   209  		ds.Mountpoint = partitionStat.Mountpoint
   210  	}
   211  
   212  	return &ds
   213  }
   214  
   215  // HostCpuStatsCalculator calculates cpu usage percentages
   216  type HostCpuStatsCalculator struct {
   217  	prevIdle   float64
   218  	prevUser   float64
   219  	prevSystem float64
   220  	prevBusy   float64
   221  	prevTotal  float64
   222  }
   223  
   224  // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator
   225  func NewHostCpuStatsCalculator() *HostCpuStatsCalculator {
   226  	return &HostCpuStatsCalculator{}
   227  }
   228  
   229  // Calculate calculates the current cpu usage percentages
   230  func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) {
   231  	currentIdle := times.Idle
   232  	currentUser := times.User
   233  	currentSystem := times.System
   234  	currentTotal := times.Total()
   235  	currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
   236  		times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen
   237  
   238  	deltaTotal := currentTotal - h.prevTotal
   239  	idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
   240  	user = ((currentUser - h.prevUser) / deltaTotal) * 100
   241  	system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
   242  	total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
   243  
   244  	// Protect against any invalid values
   245  	if math.IsNaN(idle) || math.IsInf(idle, 0) {
   246  		idle = 100.0
   247  	}
   248  	if math.IsNaN(user) || math.IsInf(user, 0) {
   249  		user = 0.0
   250  	}
   251  	if math.IsNaN(system) || math.IsInf(system, 0) {
   252  		system = 0.0
   253  	}
   254  	if math.IsNaN(total) || math.IsInf(total, 0) {
   255  		total = 0.0
   256  	}
   257  
   258  	h.prevIdle = currentIdle
   259  	h.prevUser = currentUser
   260  	h.prevSystem = currentSystem
   261  	h.prevTotal = currentTotal
   262  	h.prevBusy = currentBusy
   263  	return
   264  }