github.com/anuvu/nomad@v0.8.7-atom1/client/stats/host.go (about)

     1  package stats
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"math"
     7  	"runtime"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/shirou/gopsutil/cpu"
    12  	"github.com/shirou/gopsutil/disk"
    13  	"github.com/shirou/gopsutil/host"
    14  	"github.com/shirou/gopsutil/mem"
    15  )
    16  
    17  // HostStats represents resource usage stats of the host running a Nomad client
    18  type HostStats struct {
    19  	Memory           *MemoryStats
    20  	CPU              []*CPUStats
    21  	DiskStats        []*DiskStats
    22  	AllocDirStats    *DiskStats
    23  	Uptime           uint64
    24  	Timestamp        int64
    25  	CPUTicksConsumed float64
    26  }
    27  
    28  // MemoryStats represents stats related to virtual memory usage
    29  type MemoryStats struct {
    30  	Total     uint64
    31  	Available uint64
    32  	Used      uint64
    33  	Free      uint64
    34  }
    35  
    36  // CPUStats represents stats related to cpu usage
    37  type CPUStats struct {
    38  	CPU    string
    39  	User   float64
    40  	System float64
    41  	Idle   float64
    42  	Total  float64
    43  }
    44  
    45  // DiskStats represents stats related to disk usage
    46  type DiskStats struct {
    47  	Device            string
    48  	Mountpoint        string
    49  	Size              uint64
    50  	Used              uint64
    51  	Available         uint64
    52  	UsedPercent       float64
    53  	InodesUsedPercent float64
    54  }
    55  
    56  // NodeStatsCollector is an interface which is used for the purposes of mocking
    57  // the HostStatsCollector in the tests
    58  type NodeStatsCollector interface {
    59  	Collect() error
    60  	Stats() *HostStats
    61  }
    62  
    63  // HostStatsCollector collects host resource usage stats
    64  type HostStatsCollector struct {
    65  	numCores        int
    66  	statsCalculator map[string]*HostCpuStatsCalculator
    67  	logger          *log.Logger
    68  	hostStats       *HostStats
    69  	hostStatsLock   sync.RWMutex
    70  	allocDir        string
    71  
    72  	// badParts is a set of partitions whose usage cannot be read; used to
    73  	// squelch logspam.
    74  	badParts map[string]struct{}
    75  }
    76  
    77  // NewHostStatsCollector returns a HostStatsCollector. The allocDir is passed in
    78  // so that we can present the disk related statistics for the mountpoint where
    79  // the allocation directory lives
    80  func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollector {
    81  	numCores := runtime.NumCPU()
    82  	statsCalculator := make(map[string]*HostCpuStatsCalculator)
    83  	collector := &HostStatsCollector{
    84  		statsCalculator: statsCalculator,
    85  		numCores:        numCores,
    86  		logger:          logger,
    87  		allocDir:        allocDir,
    88  		badParts:        make(map[string]struct{}),
    89  	}
    90  	return collector
    91  }
    92  
    93  // Collect collects stats related to resource usage of a host
    94  func (h *HostStatsCollector) Collect() error {
    95  	h.hostStatsLock.Lock()
    96  	defer h.hostStatsLock.Unlock()
    97  	return h.collectLocked()
    98  }
    99  
   100  // collectLocked collects stats related to resource usage of the host but should
   101  // be called with the lock held.
   102  func (h *HostStatsCollector) collectLocked() error {
   103  	hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
   104  
   105  	// Determine up-time
   106  	uptime, err := host.Uptime()
   107  	if err != nil {
   108  		return err
   109  	}
   110  	hs.Uptime = uptime
   111  
   112  	// Collect memory stats
   113  	mstats, err := h.collectMemoryStats()
   114  	if err != nil {
   115  		return err
   116  	}
   117  	hs.Memory = mstats
   118  
   119  	// Collect cpu stats
   120  	cpus, ticks, err := h.collectCPUStats()
   121  	if err != nil {
   122  		return err
   123  	}
   124  	hs.CPU = cpus
   125  	hs.CPUTicksConsumed = ticks
   126  
   127  	// Collect disk stats
   128  	diskStats, err := h.collectDiskStats()
   129  	if err != nil {
   130  		return err
   131  	}
   132  	hs.DiskStats = diskStats
   133  
   134  	// Getting the disk stats for the allocation directory
   135  	usage, err := disk.Usage(h.allocDir)
   136  	if err != nil {
   137  		return fmt.Errorf("failed to find disk usage of alloc_dir %q: %v", h.allocDir, err)
   138  	}
   139  	hs.AllocDirStats = h.toDiskStats(usage, nil)
   140  
   141  	// Update the collected status object.
   142  	h.hostStats = hs
   143  
   144  	return nil
   145  }
   146  
   147  func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
   148  	memStats, err := mem.VirtualMemory()
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	mem := &MemoryStats{
   153  		Total:     memStats.Total,
   154  		Available: memStats.Available,
   155  		Used:      memStats.Used,
   156  		Free:      memStats.Free,
   157  	}
   158  
   159  	return mem, nil
   160  }
   161  
   162  func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
   163  	partitions, err := disk.Partitions(false)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  
   168  	var diskStats []*DiskStats
   169  	for _, partition := range partitions {
   170  		usage, err := disk.Usage(partition.Mountpoint)
   171  		if err != nil {
   172  			if _, ok := h.badParts[partition.Mountpoint]; ok {
   173  				// already known bad, don't log again
   174  				continue
   175  			}
   176  
   177  			h.badParts[partition.Mountpoint] = struct{}{}
   178  			h.logger.Printf("[WARN] client: error fetching host disk usage stats for %v: %v", partition.Mountpoint, err)
   179  			continue
   180  		}
   181  		delete(h.badParts, partition.Mountpoint)
   182  
   183  		ds := h.toDiskStats(usage, &partition)
   184  		diskStats = append(diskStats, ds)
   185  	}
   186  
   187  	return diskStats, nil
   188  }
   189  
   190  // Stats returns the host stats that has been collected
   191  func (h *HostStatsCollector) Stats() *HostStats {
   192  	h.hostStatsLock.RLock()
   193  	defer h.hostStatsLock.RUnlock()
   194  
   195  	if h.hostStats == nil {
   196  		if err := h.collectLocked(); err != nil {
   197  			h.logger.Printf("[WARN] client: error fetching host resource usage stats: %v", err)
   198  		}
   199  	}
   200  
   201  	return h.hostStats
   202  }
   203  
   204  // toDiskStats merges UsageStat and PartitionStat to create a DiskStat
   205  func (h *HostStatsCollector) toDiskStats(usage *disk.UsageStat, partitionStat *disk.PartitionStat) *DiskStats {
   206  	ds := DiskStats{
   207  		Size:              usage.Total,
   208  		Used:              usage.Used,
   209  		Available:         usage.Free,
   210  		UsedPercent:       usage.UsedPercent,
   211  		InodesUsedPercent: usage.InodesUsedPercent,
   212  	}
   213  	if math.IsNaN(ds.UsedPercent) {
   214  		ds.UsedPercent = 0.0
   215  	}
   216  	if math.IsNaN(ds.InodesUsedPercent) {
   217  		ds.InodesUsedPercent = 0.0
   218  	}
   219  
   220  	if partitionStat != nil {
   221  		ds.Device = partitionStat.Device
   222  		ds.Mountpoint = partitionStat.Mountpoint
   223  	}
   224  
   225  	return &ds
   226  }
   227  
   228  // HostCpuStatsCalculator calculates cpu usage percentages
   229  type HostCpuStatsCalculator struct {
   230  	prevIdle   float64
   231  	prevUser   float64
   232  	prevSystem float64
   233  	prevBusy   float64
   234  	prevTotal  float64
   235  }
   236  
   237  // NewHostCpuStatsCalculator returns a HostCpuStatsCalculator
   238  func NewHostCpuStatsCalculator() *HostCpuStatsCalculator {
   239  	return &HostCpuStatsCalculator{}
   240  }
   241  
   242  // Calculate calculates the current cpu usage percentages
   243  func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, user float64, system float64, total float64) {
   244  	currentIdle := times.Idle
   245  	currentUser := times.User
   246  	currentSystem := times.System
   247  	currentTotal := times.Total()
   248  	currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
   249  		times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen
   250  
   251  	deltaTotal := currentTotal - h.prevTotal
   252  	idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
   253  	user = ((currentUser - h.prevUser) / deltaTotal) * 100
   254  	system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
   255  	total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
   256  
   257  	// Protect against any invalid values
   258  	if math.IsNaN(idle) || math.IsInf(idle, 0) {
   259  		idle = 100.0
   260  	}
   261  	if math.IsNaN(user) || math.IsInf(user, 0) {
   262  		user = 0.0
   263  	}
   264  	if math.IsNaN(system) || math.IsInf(system, 0) {
   265  		system = 0.0
   266  	}
   267  	if math.IsNaN(total) || math.IsInf(total, 0) {
   268  		total = 0.0
   269  	}
   270  
   271  	h.prevIdle = currentIdle
   272  	h.prevUser = currentUser
   273  	h.prevSystem = currentSystem
   274  	h.prevTotal = currentTotal
   275  	h.prevBusy = currentBusy
   276  	return
   277  }