github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/command/check.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "strconv" 6 "strings" 7 "time" 8 9 "github.com/posener/complete" 10 ) 11 12 const ( 13 HealthCritical = 2 14 HealthWarn = 1 15 HealthPass = 0 16 HealthUnknown = 3 17 ) 18 19 type AgentCheckCommand struct { 20 Meta 21 } 22 23 func (c *AgentCheckCommand) Help() string { 24 helpText := ` 25 Usage: nomad check [options] 26 27 Display state of the Nomad agent. The exit code of the command is Nagios 28 compatible and could be used with alerting systems. 29 30 General Options: 31 32 ` + generalOptionsUsage() + ` 33 34 Agent Check Options: 35 36 -min-peers 37 Minimum number of peers that a server is expected to know. 38 39 -min-servers 40 Minimum number of servers that a client is expected to know. 41 ` 42 43 return strings.TrimSpace(helpText) 44 } 45 46 func (c *AgentCheckCommand) Synopsis() string { 47 return "Displays health of the local Nomad agent" 48 } 49 50 func (c *AgentCheckCommand) Run(args []string) int { 51 var minPeers, minServers int 52 53 flags := c.Meta.FlagSet("check", FlagSetClient) 54 flags.Usage = func() { c.Ui.Output(c.Help()) } 55 flags.IntVar(&minPeers, "min-peers", 0, "") 56 flags.IntVar(&minServers, "min-servers", 1, "") 57 58 if err := flags.Parse(args); err != nil { 59 return 1 60 } 61 62 client, err := c.Meta.Client() 63 if err != nil { 64 c.Ui.Error(fmt.Sprintf("error initializing client: %s", err)) 65 return HealthCritical 66 } 67 68 info, err := client.Agent().Self() 69 if err != nil { 70 c.Ui.Output(fmt.Sprintf("unable to query agent info: %v", err)) 71 return HealthCritical 72 } 73 if _, ok := info.Stats["nomad"]; ok { 74 return c.checkServerHealth(info.Stats, minPeers) 75 } 76 77 if clientStats, ok := info.Stats["client"]; ok { 78 return c.checkClientHealth(clientStats, minServers) 79 } 80 return HealthWarn 81 } 82 83 // checkServerHealth returns the health of a server. 84 // TODO Add more rules for determining server health 85 func (c *AgentCheckCommand) checkServerHealth(info map[string]map[string]string, minPeers int) int { 86 raft := info["raft"] 87 knownPeers, err := strconv.Atoi(raft["num_peers"]) 88 if err != nil { 89 c.Ui.Output(fmt.Sprintf("unable to get known peers: %v", err)) 90 return HealthCritical 91 } 92 93 if knownPeers < minPeers { 94 c.Ui.Output(fmt.Sprintf("known peers: %v, is less than expected number of peers: %v", knownPeers, minPeers)) 95 return HealthCritical 96 } 97 return HealthPass 98 } 99 100 // checkClientHealth returns the health of a client 101 func (c *AgentCheckCommand) checkClientHealth(clientStats map[string]string, minServers int) int { 102 knownServers, err := strconv.Atoi(clientStats["known_servers"]) 103 if err != nil { 104 c.Ui.Output(fmt.Sprintf("unable to get known servers: %v", err)) 105 return HealthCritical 106 } 107 108 heartbeatTTL, err := time.ParseDuration(clientStats["heartbeat_ttl"]) 109 if err != nil { 110 c.Ui.Output(fmt.Sprintf("unable to parse heartbeat TTL: %v", err)) 111 return HealthCritical 112 } 113 114 lastHeartbeat, err := time.ParseDuration(clientStats["last_heartbeat"]) 115 if err != nil { 116 c.Ui.Output(fmt.Sprintf("unable to parse last heartbeat: %v", err)) 117 return HealthCritical 118 } 119 120 if lastHeartbeat > heartbeatTTL { 121 c.Ui.Output(fmt.Sprintf("last heartbeat was %q time ago, expected heartbeat ttl: %q", lastHeartbeat, heartbeatTTL)) 122 return HealthCritical 123 } 124 125 if knownServers < minServers { 126 c.Ui.Output(fmt.Sprintf("known servers: %v, is less than expected number of servers: %v", knownServers, minServers)) 127 return HealthCritical 128 } 129 130 return HealthPass 131 } 132 133 func (c *AgentCheckCommand) AutocompleteFlags() complete.Flags { 134 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 135 complete.Flags{ 136 "-min-peers": complete.PredictAnything, 137 "-min-servers": complete.PredictAnything, 138 }) 139 } 140 141 func (c *AgentCheckCommand) AutocompleteArgs() complete.Predictor { 142 return complete.PredictNothing 143 }