github.com/djenriquez/nomad-1@v0.8.1/command/check.go (about)

     1  package command
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/posener/complete"
    10  )
    11  
    12  const (
    13  	HealthCritical = 2
    14  	HealthWarn     = 1
    15  	HealthPass     = 0
    16  	HealthUnknown  = 3
    17  )
    18  
    19  type AgentCheckCommand struct {
    20  	Meta
    21  }
    22  
    23  func (c *AgentCheckCommand) Help() string {
    24  	helpText := `
    25  Usage: nomad check [options]
    26  
    27    Display state of the Nomad agent. The exit code of the command is Nagios
    28    compatible and could be used with alerting systems.
    29  
    30  General Options:
    31  
    32    ` + generalOptionsUsage() + `
    33  
    34  Agent Check Options:
    35  
    36    -min-peers
    37       Minimum number of peers that a server is expected to know.
    38  
    39    -min-servers
    40       Minimum number of servers that a client is expected to know.
    41  `
    42  
    43  	return strings.TrimSpace(helpText)
    44  }
    45  
    46  func (c *AgentCheckCommand) Synopsis() string {
    47  	return "Displays health of the local Nomad agent"
    48  }
    49  
    50  func (c *AgentCheckCommand) Run(args []string) int {
    51  	var minPeers, minServers int
    52  
    53  	flags := c.Meta.FlagSet("check", FlagSetClient)
    54  	flags.Usage = func() { c.Ui.Output(c.Help()) }
    55  	flags.IntVar(&minPeers, "min-peers", 0, "")
    56  	flags.IntVar(&minServers, "min-servers", 1, "")
    57  
    58  	if err := flags.Parse(args); err != nil {
    59  		return 1
    60  	}
    61  
    62  	client, err := c.Meta.Client()
    63  	if err != nil {
    64  		c.Ui.Error(fmt.Sprintf("error initializing client: %s", err))
    65  		return HealthCritical
    66  	}
    67  
    68  	info, err := client.Agent().Self()
    69  	if err != nil {
    70  		c.Ui.Output(fmt.Sprintf("unable to query agent info: %v", err))
    71  		return HealthCritical
    72  	}
    73  	if _, ok := info.Stats["nomad"]; ok {
    74  		return c.checkServerHealth(info.Stats, minPeers)
    75  	}
    76  
    77  	if clientStats, ok := info.Stats["client"]; ok {
    78  		return c.checkClientHealth(clientStats, minServers)
    79  	}
    80  	return HealthWarn
    81  }
    82  
    83  // checkServerHealth returns the health of a server.
    84  // TODO Add more rules for determining server health
    85  func (c *AgentCheckCommand) checkServerHealth(info map[string]map[string]string, minPeers int) int {
    86  	raft := info["raft"]
    87  	knownPeers, err := strconv.Atoi(raft["num_peers"])
    88  	if err != nil {
    89  		c.Ui.Output(fmt.Sprintf("unable to get known peers: %v", err))
    90  		return HealthCritical
    91  	}
    92  
    93  	if knownPeers < minPeers {
    94  		c.Ui.Output(fmt.Sprintf("known peers: %v, is less than expected number of peers: %v", knownPeers, minPeers))
    95  		return HealthCritical
    96  	}
    97  	return HealthPass
    98  }
    99  
   100  // checkClientHealth returns the health of a client
   101  func (c *AgentCheckCommand) checkClientHealth(clientStats map[string]string, minServers int) int {
   102  	knownServers, err := strconv.Atoi(clientStats["known_servers"])
   103  	if err != nil {
   104  		c.Ui.Output(fmt.Sprintf("unable to get known servers: %v", err))
   105  		return HealthCritical
   106  	}
   107  
   108  	heartbeatTTL, err := time.ParseDuration(clientStats["heartbeat_ttl"])
   109  	if err != nil {
   110  		c.Ui.Output(fmt.Sprintf("unable to parse heartbeat TTL: %v", err))
   111  		return HealthCritical
   112  	}
   113  
   114  	lastHeartbeat, err := time.ParseDuration(clientStats["last_heartbeat"])
   115  	if err != nil {
   116  		c.Ui.Output(fmt.Sprintf("unable to parse last heartbeat: %v", err))
   117  		return HealthCritical
   118  	}
   119  
   120  	if lastHeartbeat > heartbeatTTL {
   121  		c.Ui.Output(fmt.Sprintf("last heartbeat was %q time ago, expected heartbeat ttl: %q", lastHeartbeat, heartbeatTTL))
   122  		return HealthCritical
   123  	}
   124  
   125  	if knownServers < minServers {
   126  		c.Ui.Output(fmt.Sprintf("known servers: %v, is less than expected number of servers: %v", knownServers, minServers))
   127  		return HealthCritical
   128  	}
   129  
   130  	return HealthPass
   131  }
   132  
   133  func (c *AgentCheckCommand) AutocompleteFlags() complete.Flags {
   134  	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
   135  		complete.Flags{
   136  			"-min-peers":   complete.PredictAnything,
   137  			"-min-servers": complete.PredictAnything,
   138  		})
   139  }
   140  
   141  func (c *AgentCheckCommand) AutocompleteArgs() complete.Predictor {
   142  	return complete.PredictNothing
   143  }