github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/command/check.go (about)

     1  package command
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/posener/complete"
    10  )
    11  
    12  const (
    13  	HealthCritical = 2
    14  	HealthWarn     = 1
    15  	HealthPass     = 0
    16  	HealthUnknown  = 3
    17  )
    18  
    19  type AgentCheckCommand struct {
    20  	Meta
    21  }
    22  
    23  func (c *AgentCheckCommand) Help() string {
    24  	helpText := `
    25  Usage: nomad check [options]
    26  
    27    Display state of the Nomad agent. The exit code of the command is Nagios
    28    compatible and could be used with alerting systems.
    29  
    30  General Options:
    31  
    32    ` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
    33  
    34  Agent Check Options:
    35  
    36    -min-peers
    37       Minimum number of peers that a server is expected to know.
    38  
    39    -min-servers
    40       Minimum number of servers that a client is expected to know.
    41  `
    42  
    43  	return strings.TrimSpace(helpText)
    44  }
    45  
    46  func (c *AgentCheckCommand) Synopsis() string {
    47  	return "Displays health of the local Nomad agent"
    48  }
    49  
    50  func (c *AgentCheckCommand) Name() string { return "check" }
    51  
    52  func (c *AgentCheckCommand) Run(args []string) int {
    53  	var minPeers, minServers int
    54  
    55  	flags := c.Meta.FlagSet("check", FlagSetClient)
    56  	flags.Usage = func() { c.Ui.Output(c.Help()) }
    57  	flags.IntVar(&minPeers, "min-peers", 0, "")
    58  	flags.IntVar(&minServers, "min-servers", 1, "")
    59  
    60  	if err := flags.Parse(args); err != nil {
    61  		return 1
    62  	}
    63  
    64  	args = flags.Args()
    65  	if len(args) > 0 {
    66  		c.Ui.Error("This command takes no arguments")
    67  		c.Ui.Error(commandErrorText(c))
    68  		return 1
    69  	}
    70  
    71  	client, err := c.Meta.Client()
    72  	if err != nil {
    73  		c.Ui.Error(fmt.Sprintf("error initializing client: %s", err))
    74  		return HealthCritical
    75  	}
    76  
    77  	info, err := client.Agent().Self()
    78  	if err != nil {
    79  		c.Ui.Output(fmt.Sprintf("unable to query agent info: %v", err))
    80  		return HealthCritical
    81  	}
    82  	if _, ok := info.Stats["nomad"]; ok {
    83  		return c.checkServerHealth(info.Stats, minPeers)
    84  	}
    85  
    86  	if clientStats, ok := info.Stats["client"]; ok {
    87  		return c.checkClientHealth(clientStats, minServers)
    88  	}
    89  	return HealthWarn
    90  }
    91  
    92  // checkServerHealth returns the health of a server.
    93  // TODO Add more rules for determining server health
    94  func (c *AgentCheckCommand) checkServerHealth(info map[string]map[string]string, minPeers int) int {
    95  	raft := info["raft"]
    96  	knownPeers, err := strconv.Atoi(raft["num_peers"])
    97  	if err != nil {
    98  		c.Ui.Output(fmt.Sprintf("unable to get known peers: %v", err))
    99  		return HealthCritical
   100  	}
   101  
   102  	if knownPeers < minPeers {
   103  		c.Ui.Output(fmt.Sprintf("known peers: %v, is less than expected number of peers: %v", knownPeers, minPeers))
   104  		return HealthCritical
   105  	}
   106  	return HealthPass
   107  }
   108  
   109  // checkClientHealth returns the health of a client
   110  func (c *AgentCheckCommand) checkClientHealth(clientStats map[string]string, minServers int) int {
   111  	knownServers, err := strconv.Atoi(clientStats["known_servers"])
   112  	if err != nil {
   113  		c.Ui.Output(fmt.Sprintf("unable to get known servers: %v", err))
   114  		return HealthCritical
   115  	}
   116  
   117  	heartbeatTTL, err := time.ParseDuration(clientStats["heartbeat_ttl"])
   118  	if err != nil {
   119  		c.Ui.Output(fmt.Sprintf("unable to parse heartbeat TTL: %v", err))
   120  		return HealthCritical
   121  	}
   122  
   123  	lastHeartbeat, err := time.ParseDuration(clientStats["last_heartbeat"])
   124  	if err != nil {
   125  		c.Ui.Output(fmt.Sprintf("unable to parse last heartbeat: %v", err))
   126  		return HealthCritical
   127  	}
   128  
   129  	if lastHeartbeat > heartbeatTTL {
   130  		c.Ui.Output(fmt.Sprintf("last heartbeat was %q time ago, expected heartbeat ttl: %q", lastHeartbeat, heartbeatTTL))
   131  		return HealthCritical
   132  	}
   133  
   134  	if knownServers < minServers {
   135  		c.Ui.Output(fmt.Sprintf("known servers: %v, is less than expected number of servers: %v", knownServers, minServers))
   136  		return HealthCritical
   137  	}
   138  
   139  	return HealthPass
   140  }
   141  
   142  func (c *AgentCheckCommand) AutocompleteFlags() complete.Flags {
   143  	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
   144  		complete.Flags{
   145  			"-min-peers":   complete.PredictAnything,
   146  			"-min-servers": complete.PredictAnything,
   147  		})
   148  }
   149  
   150  func (c *AgentCheckCommand) AutocompleteArgs() complete.Predictor {
   151  	return complete.PredictNothing
   152  }