github.com/djenriquez/nomad-1@v0.8.1/command/agent/command.go (about)

     1  package agent
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"log"
     8  	"os"
     9  	"os/signal"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	metrics "github.com/armon/go-metrics"
    19  	"github.com/armon/go-metrics/circonus"
    20  	"github.com/armon/go-metrics/datadog"
    21  	"github.com/armon/go-metrics/prometheus"
    22  	"github.com/hashicorp/consul/lib"
    23  	checkpoint "github.com/hashicorp/go-checkpoint"
    24  	gsyslog "github.com/hashicorp/go-syslog"
    25  	"github.com/hashicorp/logutils"
    26  	flaghelper "github.com/hashicorp/nomad/helper/flag-helpers"
    27  	gatedwriter "github.com/hashicorp/nomad/helper/gated-writer"
    28  	"github.com/hashicorp/nomad/nomad/structs/config"
    29  	"github.com/hashicorp/nomad/version"
    30  	"github.com/mitchellh/cli"
    31  	"github.com/posener/complete"
    32  )
    33  
    34  // gracefulTimeout controls how long we wait before forcefully terminating
    35  const gracefulTimeout = 5 * time.Second
    36  
    37  // Command is a Command implementation that runs a Nomad agent.
    38  // The command will not end unless a shutdown message is sent on the
    39  // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
    40  // exit.
    41  type Command struct {
    42  	Version    *version.VersionInfo
    43  	Ui         cli.Ui
    44  	ShutdownCh <-chan struct{}
    45  
    46  	args           []string
    47  	agent          *Agent
    48  	httpServer     *HTTPServer
    49  	logFilter      *logutils.LevelFilter
    50  	logOutput      io.Writer
    51  	retryJoinErrCh chan struct{}
    52  }
    53  
    54  func (c *Command) readConfig() *Config {
    55  	var dev bool
    56  	var configPath []string
    57  	var servers string
    58  	var meta []string
    59  
    60  	// Make a new, empty config.
    61  	cmdConfig := &Config{
    62  		Client: &ClientConfig{},
    63  		Consul: &config.ConsulConfig{},
    64  		Ports:  &Ports{},
    65  		Server: &ServerConfig{},
    66  		Vault:  &config.VaultConfig{},
    67  		ACL:    &ACLConfig{},
    68  	}
    69  
    70  	flags := flag.NewFlagSet("agent", flag.ContinueOnError)
    71  	flags.Usage = func() { c.Ui.Error(c.Help()) }
    72  
    73  	// Role options
    74  	flags.BoolVar(&dev, "dev", false, "")
    75  	flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
    76  	flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
    77  
    78  	// Server-only options
    79  	flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
    80  	flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
    81  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
    82  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
    83  	flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
    84  	flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
    85  	flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
    86  	flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "")
    87  
    88  	// Client-only options
    89  	flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
    90  	flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "")
    91  	flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "")
    92  	flags.StringVar(&servers, "servers", "", "")
    93  	flags.Var((*flaghelper.StringFlag)(&meta), "meta", "")
    94  	flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "")
    95  	flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "")
    96  
    97  	// General options
    98  	flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config")
    99  	flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
   100  	flags.StringVar(&cmdConfig.Region, "region", "", "")
   101  	flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
   102  	flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
   103  	flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
   104  	flags.StringVar(&cmdConfig.NodeName, "node", "", "")
   105  
   106  	// Consul options
   107  	flags.StringVar(&cmdConfig.Consul.Auth, "consul-auth", "", "")
   108  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   109  		cmdConfig.Consul.AutoAdvertise = &b
   110  		return nil
   111  	}), "consul-auto-advertise", "")
   112  	flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "")
   113  	flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "")
   114  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   115  		cmdConfig.Consul.ChecksUseAdvertise = &b
   116  		return nil
   117  	}), "consul-checks-use-advertise", "")
   118  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   119  		cmdConfig.Consul.ClientAutoJoin = &b
   120  		return nil
   121  	}), "consul-client-auto-join", "")
   122  	flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "")
   123  	flags.StringVar(&cmdConfig.Consul.ClientHTTPCheckName, "consul-client-http-check-name", "", "")
   124  	flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "")
   125  	flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "")
   126  	flags.StringVar(&cmdConfig.Consul.ServerHTTPCheckName, "consul-server-http-check-name", "", "")
   127  	flags.StringVar(&cmdConfig.Consul.ServerSerfCheckName, "consul-server-serf-check-name", "", "")
   128  	flags.StringVar(&cmdConfig.Consul.ServerRPCCheckName, "consul-server-rpc-check-name", "", "")
   129  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   130  		cmdConfig.Consul.ServerAutoJoin = &b
   131  		return nil
   132  	}), "consul-server-auto-join", "")
   133  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   134  		cmdConfig.Consul.EnableSSL = &b
   135  		return nil
   136  	}), "consul-ssl", "")
   137  	flags.StringVar(&cmdConfig.Consul.Token, "consul-token", "", "")
   138  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   139  		cmdConfig.Consul.VerifySSL = &b
   140  		return nil
   141  	}), "consul-verify-ssl", "")
   142  	flags.StringVar(&cmdConfig.Consul.Addr, "consul-address", "", "")
   143  
   144  	// Vault options
   145  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   146  		cmdConfig.Vault.Enabled = &b
   147  		return nil
   148  	}), "vault-enabled", "")
   149  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   150  		cmdConfig.Vault.AllowUnauthenticated = &b
   151  		return nil
   152  	}), "vault-allow-unauthenticated", "")
   153  	flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "")
   154  	flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "")
   155  	flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "")
   156  	flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "")
   157  	flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "")
   158  	flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "")
   159  	flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "")
   160  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   161  		cmdConfig.Vault.TLSSkipVerify = &b
   162  		return nil
   163  	}), "vault-tls-skip-verify", "")
   164  	flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "")
   165  
   166  	// ACL options
   167  	flags.BoolVar(&cmdConfig.ACL.Enabled, "acl-enabled", false, "")
   168  	flags.StringVar(&cmdConfig.ACL.ReplicationToken, "acl-replication-token", "", "")
   169  
   170  	if err := flags.Parse(c.args); err != nil {
   171  		return nil
   172  	}
   173  
   174  	// Split the servers.
   175  	if servers != "" {
   176  		cmdConfig.Client.Servers = strings.Split(servers, ",")
   177  	}
   178  
   179  	// Parse the meta flags.
   180  	metaLength := len(meta)
   181  	if metaLength != 0 {
   182  		cmdConfig.Client.Meta = make(map[string]string, metaLength)
   183  		for _, kv := range meta {
   184  			parts := strings.SplitN(kv, "=", 2)
   185  			if len(parts) != 2 {
   186  				c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv))
   187  				return nil
   188  			}
   189  
   190  			cmdConfig.Client.Meta[parts[0]] = parts[1]
   191  		}
   192  	}
   193  
   194  	// Load the configuration
   195  	var config *Config
   196  	if dev {
   197  		config = DevConfig()
   198  	} else {
   199  		config = DefaultConfig()
   200  	}
   201  
   202  	// Merge in the enterprise overlay
   203  	config.Merge(DefaultEntConfig())
   204  
   205  	for _, path := range configPath {
   206  		current, err := LoadConfig(path)
   207  		if err != nil {
   208  			c.Ui.Error(fmt.Sprintf(
   209  				"Error loading configuration from %s: %s", path, err))
   210  			return nil
   211  		}
   212  
   213  		// The user asked us to load some config here but we didn't find any,
   214  		// so we'll complain but continue.
   215  		if current == nil || reflect.DeepEqual(current, &Config{}) {
   216  			c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path))
   217  		}
   218  
   219  		if config == nil {
   220  			config = current
   221  		} else {
   222  			config = config.Merge(current)
   223  		}
   224  	}
   225  
   226  	// Ensure the sub-structs at least exist
   227  	if config.Client == nil {
   228  		config.Client = &ClientConfig{}
   229  	}
   230  	if config.Server == nil {
   231  		config.Server = &ServerConfig{}
   232  	}
   233  
   234  	// Merge any CLI options over config file options
   235  	config = config.Merge(cmdConfig)
   236  
   237  	// Set the version info
   238  	config.Version = c.Version
   239  
   240  	// Normalize binds, ports, addresses, and advertise
   241  	if err := config.normalizeAddrs(); err != nil {
   242  		c.Ui.Error(err.Error())
   243  		return nil
   244  	}
   245  
   246  	// Check to see if we should read the Vault token from the environment
   247  	if config.Vault.Token == "" {
   248  		if token, ok := os.LookupEnv("VAULT_TOKEN"); ok {
   249  			config.Vault.Token = token
   250  		}
   251  	}
   252  
   253  	if dev {
   254  		// Skip validation for dev mode
   255  		return config
   256  	}
   257  
   258  	if config.Server.EncryptKey != "" {
   259  		if _, err := config.Server.EncryptBytes(); err != nil {
   260  			c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
   261  			return nil
   262  		}
   263  		keyfile := filepath.Join(config.DataDir, serfKeyring)
   264  		if _, err := os.Stat(keyfile); err == nil {
   265  			c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring")
   266  		}
   267  	}
   268  
   269  	// Parse the RetryInterval.
   270  	dur, err := time.ParseDuration(config.Server.RetryInterval)
   271  	if err != nil {
   272  		c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
   273  		return nil
   274  	}
   275  	config.Server.retryInterval = dur
   276  
   277  	// Check that the server is running in at least one mode.
   278  	if !(config.Server.Enabled || config.Client.Enabled) {
   279  		c.Ui.Error("Must specify either server, client or dev mode for the agent.")
   280  		return nil
   281  	}
   282  
   283  	// Verify the paths are absolute.
   284  	dirs := map[string]string{
   285  		"data-dir":  config.DataDir,
   286  		"alloc-dir": config.Client.AllocDir,
   287  		"state-dir": config.Client.StateDir,
   288  	}
   289  	for k, dir := range dirs {
   290  		if dir == "" {
   291  			continue
   292  		}
   293  
   294  		if !filepath.IsAbs(dir) {
   295  			c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir))
   296  			return nil
   297  		}
   298  	}
   299  
   300  	// Ensure that we have the directories we neet to run.
   301  	if config.Server.Enabled && config.DataDir == "" {
   302  		c.Ui.Error("Must specify data directory")
   303  		return nil
   304  	}
   305  
   306  	// The config is valid if the top-level data-dir is set or if both
   307  	// alloc-dir and state-dir are set.
   308  	if config.Client.Enabled && config.DataDir == "" {
   309  		if config.Client.AllocDir == "" || config.Client.StateDir == "" {
   310  			c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.")
   311  			return nil
   312  		}
   313  	}
   314  
   315  	// Check the bootstrap flags
   316  	if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
   317  		c.Ui.Error("Bootstrap requires server mode to be enabled")
   318  		return nil
   319  	}
   320  	if config.Server.BootstrapExpect == 1 {
   321  		c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
   322  	}
   323  
   324  	// Set up the TLS configuration properly if we have one.
   325  	// XXX chelseakomlo: set up a TLSConfig New method which would wrap
   326  	// constructor-type actions like this.
   327  	if config.TLSConfig != nil && !config.TLSConfig.IsEmpty() {
   328  		if err := config.TLSConfig.SetChecksum(); err != nil {
   329  			c.Ui.Error(fmt.Sprintf("WARNING: Error when parsing TLS configuration: %v", err))
   330  		}
   331  	}
   332  
   333  	return config
   334  }
   335  
   336  // setupLoggers is used to setup the logGate, logWriter, and our logOutput
   337  func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) {
   338  	// Setup logging. First create the gated log writer, which will
   339  	// store logs until we're ready to show them. Then create the level
   340  	// filter, filtering logs of the specified level.
   341  	logGate := &gatedwriter.Writer{
   342  		Writer: &cli.UiWriter{Ui: c.Ui},
   343  	}
   344  
   345  	c.logFilter = LevelFilter()
   346  	c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel))
   347  	c.logFilter.Writer = logGate
   348  	if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) {
   349  		c.Ui.Error(fmt.Sprintf(
   350  			"Invalid log level: %s. Valid log levels are: %v",
   351  			c.logFilter.MinLevel, c.logFilter.Levels))
   352  		return nil, nil, nil
   353  	}
   354  
   355  	// Check if syslog is enabled
   356  	var syslog io.Writer
   357  	if config.EnableSyslog {
   358  		l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
   359  		if err != nil {
   360  			c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
   361  			return nil, nil, nil
   362  		}
   363  		syslog = &SyslogWrapper{l, c.logFilter}
   364  	}
   365  
   366  	// Create a log writer, and wrap a logOutput around it
   367  	logWriter := NewLogWriter(512)
   368  	var logOutput io.Writer
   369  	if syslog != nil {
   370  		logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
   371  	} else {
   372  		logOutput = io.MultiWriter(c.logFilter, logWriter)
   373  	}
   374  	c.logOutput = logOutput
   375  	log.SetOutput(logOutput)
   376  	return logGate, logWriter, logOutput
   377  }
   378  
   379  // setupAgent is used to start the agent and various interfaces
   380  func (c *Command) setupAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) error {
   381  	c.Ui.Output("Starting Nomad agent...")
   382  	agent, err := NewAgent(config, logOutput, inmem)
   383  	if err != nil {
   384  		c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err))
   385  		return err
   386  	}
   387  	c.agent = agent
   388  
   389  	// Setup the HTTP server
   390  	http, err := NewHTTPServer(agent, config)
   391  	if err != nil {
   392  		agent.Shutdown()
   393  		c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err))
   394  		return err
   395  	}
   396  	c.httpServer = http
   397  
   398  	// Setup update checking
   399  	if config.DisableUpdateCheck != nil && *config.DisableUpdateCheck {
   400  		version := config.Version.Version
   401  		if config.Version.VersionPrerelease != "" {
   402  			version += fmt.Sprintf("-%s", config.Version.VersionPrerelease)
   403  		}
   404  		updateParams := &checkpoint.CheckParams{
   405  			Product: "nomad",
   406  			Version: version,
   407  		}
   408  		if !config.DisableAnonymousSignature {
   409  			updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature")
   410  		}
   411  
   412  		// Schedule a periodic check with expected interval of 24 hours
   413  		checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults)
   414  
   415  		// Do an immediate check within the next 30 seconds
   416  		go func() {
   417  			time.Sleep(lib.RandomStagger(30 * time.Second))
   418  			c.checkpointResults(checkpoint.Check(updateParams))
   419  		}()
   420  	}
   421  
   422  	return nil
   423  }
   424  
   425  // checkpointResults is used to handler periodic results from our update checker
   426  func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) {
   427  	if err != nil {
   428  		c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err))
   429  		return
   430  	}
   431  	if results.Outdated {
   432  		c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, c.Version.VersionNumber()))
   433  	}
   434  	for _, alert := range results.Alerts {
   435  		switch alert.Level {
   436  		case "info":
   437  			c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   438  		default:
   439  			c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   440  		}
   441  	}
   442  }
   443  
   444  func (c *Command) AutocompleteFlags() complete.Flags {
   445  	configFilePredictor := complete.PredictOr(
   446  		complete.PredictFiles("*.json"),
   447  		complete.PredictFiles("*.hcl"))
   448  
   449  	return map[string]complete.Predictor{
   450  		"-config": configFilePredictor,
   451  	}
   452  }
   453  
   454  func (c *Command) AutocompleteArgs() complete.Predictor {
   455  	return nil
   456  }
   457  
   458  func (c *Command) Run(args []string) int {
   459  	c.Ui = &cli.PrefixedUi{
   460  		OutputPrefix: "==> ",
   461  		InfoPrefix:   "    ",
   462  		ErrorPrefix:  "==> ",
   463  		Ui:           c.Ui,
   464  	}
   465  
   466  	// Parse our configs
   467  	c.args = args
   468  	config := c.readConfig()
   469  	if config == nil {
   470  		return 1
   471  	}
   472  
   473  	// Setup the log outputs
   474  	logGate, _, logOutput := c.setupLoggers(config)
   475  	if logGate == nil {
   476  		return 1
   477  	}
   478  
   479  	// Log config files
   480  	if len(config.Files) > 0 {
   481  		c.Ui.Output(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", ")))
   482  	} else {
   483  		c.Ui.Output("No configuration files loaded")
   484  	}
   485  
   486  	// Initialize the telemetry
   487  	inmem, err := c.setupTelemetry(config)
   488  	if err != nil {
   489  		c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
   490  		return 1
   491  	}
   492  
   493  	// Create the agent
   494  	if err := c.setupAgent(config, logOutput, inmem); err != nil {
   495  		logGate.Flush()
   496  		return 1
   497  	}
   498  	defer c.agent.Shutdown()
   499  
   500  	// Shutdown the HTTP server at the end
   501  	defer func() {
   502  		if c.httpServer != nil {
   503  			c.httpServer.Shutdown()
   504  		}
   505  	}()
   506  
   507  	// Join startup nodes if specified
   508  	if err := c.startupJoin(config); err != nil {
   509  		c.Ui.Error(err.Error())
   510  		return 1
   511  	}
   512  
   513  	// Compile agent information for output later
   514  	info := make(map[string]string)
   515  	info["version"] = config.Version.VersionNumber()
   516  	info["client"] = strconv.FormatBool(config.Client.Enabled)
   517  	info["log level"] = config.LogLevel
   518  	info["server"] = strconv.FormatBool(config.Server.Enabled)
   519  	info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter)
   520  
   521  	// Sort the keys for output
   522  	infoKeys := make([]string, 0, len(info))
   523  	for key := range info {
   524  		infoKeys = append(infoKeys, key)
   525  	}
   526  	sort.Strings(infoKeys)
   527  
   528  	// Agent configuration output
   529  	padding := 18
   530  	c.Ui.Output("Nomad agent configuration:\n")
   531  	for _, k := range infoKeys {
   532  		c.Ui.Info(fmt.Sprintf(
   533  			"%s%s: %s",
   534  			strings.Repeat(" ", padding-len(k)),
   535  			strings.Title(k),
   536  			info[k]))
   537  	}
   538  	c.Ui.Output("")
   539  
   540  	// Output the header that the server has started
   541  	c.Ui.Output("Nomad agent started! Log data will stream in below:\n")
   542  
   543  	// Enable log streaming
   544  	logGate.Flush()
   545  
   546  	// Start retry join process
   547  	c.retryJoinErrCh = make(chan struct{})
   548  	go c.retryJoin(config)
   549  
   550  	// Wait for exit
   551  	return c.handleSignals()
   552  }
   553  
   554  // handleSignals blocks until we get an exit-causing signal
   555  func (c *Command) handleSignals() int {
   556  	signalCh := make(chan os.Signal, 4)
   557  	signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
   558  
   559  	// Wait for a signal
   560  WAIT:
   561  	var sig os.Signal
   562  	select {
   563  	case s := <-signalCh:
   564  		sig = s
   565  	case <-c.ShutdownCh:
   566  		sig = os.Interrupt
   567  	case <-c.retryJoinErrCh:
   568  		return 1
   569  	}
   570  
   571  	// Skip any SIGPIPE signal and don't try to log it (See issues #1798, #3554)
   572  	if sig == syscall.SIGPIPE {
   573  		goto WAIT
   574  	}
   575  
   576  	c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig))
   577  
   578  	// Check if this is a SIGHUP
   579  	if sig == syscall.SIGHUP {
   580  		c.handleReload()
   581  		goto WAIT
   582  	}
   583  
   584  	// Check if we should do a graceful leave
   585  	graceful := false
   586  	if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt {
   587  		graceful = true
   588  	} else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm {
   589  		graceful = true
   590  	}
   591  
   592  	// Bail fast if not doing a graceful leave
   593  	if !graceful {
   594  		return 1
   595  	}
   596  
   597  	// Attempt a graceful leave
   598  	gracefulCh := make(chan struct{})
   599  	c.Ui.Output("Gracefully shutting down agent...")
   600  	go func() {
   601  		if err := c.agent.Leave(); err != nil {
   602  			c.Ui.Error(fmt.Sprintf("Error: %s", err))
   603  			return
   604  		}
   605  		close(gracefulCh)
   606  	}()
   607  
   608  	// Wait for leave or another signal
   609  	select {
   610  	case <-signalCh:
   611  		return 1
   612  	case <-time.After(gracefulTimeout):
   613  		return 1
   614  	case <-gracefulCh:
   615  		return 0
   616  	}
   617  }
   618  
   619  // reloadHTTPServer shuts down the existing HTTP server and restarts it. This
   620  // is helpful when reloading the agent configuration.
   621  func (c *Command) reloadHTTPServer() error {
   622  	c.agent.logger.Println("[INFO] agent: Reloading HTTP server with new TLS configuration")
   623  
   624  	c.httpServer.Shutdown()
   625  
   626  	http, err := NewHTTPServer(c.agent, c.agent.config)
   627  	if err != nil {
   628  		return err
   629  	}
   630  	c.httpServer = http
   631  
   632  	return nil
   633  }
   634  
   635  // handleReload is invoked when we should reload our configs, e.g. SIGHUP
   636  func (c *Command) handleReload() {
   637  	c.Ui.Output("Reloading configuration...")
   638  	newConf := c.readConfig()
   639  	if newConf == nil {
   640  		c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
   641  		return
   642  	}
   643  
   644  	// Change the log level
   645  	minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel))
   646  	if ValidateLevelFilter(minLevel, c.logFilter) {
   647  		c.logFilter.SetMinLevel(minLevel)
   648  	} else {
   649  		c.Ui.Error(fmt.Sprintf(
   650  			"Invalid log level: %s. Valid log levels are: %v",
   651  			minLevel, c.logFilter.Levels))
   652  
   653  		// Keep the current log level
   654  		newConf.LogLevel = c.agent.GetConfig().LogLevel
   655  	}
   656  
   657  	shouldReloadAgent, shouldReloadHTTP, shouldReloadRPC := c.agent.ShouldReload(newConf)
   658  	if shouldReloadAgent {
   659  		c.agent.logger.Printf("[DEBUG] agent: starting reload of agent config")
   660  		err := c.agent.Reload(newConf)
   661  		if err != nil {
   662  			c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err)
   663  			return
   664  		}
   665  	}
   666  
   667  	if shouldReloadRPC {
   668  		if s := c.agent.Server(); s != nil {
   669  			sconf, err := convertServerConfig(newConf, c.logOutput)
   670  			c.agent.logger.Printf("[DEBUG] agent: starting reload of server config")
   671  			if err != nil {
   672  				c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err)
   673  				return
   674  			} else {
   675  				if err := s.Reload(sconf); err != nil {
   676  					c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err)
   677  					return
   678  				}
   679  			}
   680  		}
   681  
   682  		if s := c.agent.Client(); s != nil {
   683  			clientConfig, err := c.agent.clientConfig()
   684  			c.agent.logger.Printf("[DEBUG] agent: starting reload of client config")
   685  			if err != nil {
   686  				c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err)
   687  				return
   688  			}
   689  			if err := c.agent.Client().Reload(clientConfig); err != nil {
   690  				c.agent.logger.Printf("[ERR] agent: reloading client config failed: %v", err)
   691  				return
   692  			}
   693  		}
   694  	}
   695  
   696  	// reload HTTP server after we have reloaded both client and server, in case
   697  	// we error in either of the above cases. For example, reloading the http
   698  	// server to a TLS connection could succeed, while reloading the server's rpc
   699  	// connections could fail.
   700  	if shouldReloadHTTP {
   701  		err := c.reloadHTTPServer()
   702  		if err != nil {
   703  			c.agent.logger.Printf("[ERR] http: failed to reload the config: %v", err)
   704  			return
   705  		}
   706  	}
   707  }
   708  
   709  // setupTelemetry is used ot setup the telemetry sub-systems
   710  func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) {
   711  	/* Setup telemetry
   712  	Aggregate on 10 second intervals for 1 minute. Expose the
   713  	metrics over stderr when there is a SIGUSR1 received.
   714  	*/
   715  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   716  	metrics.DefaultInmemSignal(inm)
   717  
   718  	var telConfig *Telemetry
   719  	if config.Telemetry == nil {
   720  		telConfig = &Telemetry{}
   721  	} else {
   722  		telConfig = config.Telemetry
   723  	}
   724  
   725  	metricsConf := metrics.DefaultConfig("nomad")
   726  	metricsConf.EnableHostname = !telConfig.DisableHostname
   727  
   728  	// Prefer the hostname as a label.
   729  	metricsConf.EnableHostnameLabel = !telConfig.DisableHostname &&
   730  		!telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics
   731  
   732  	if telConfig.UseNodeName {
   733  		metricsConf.HostName = config.NodeName
   734  		metricsConf.EnableHostname = true
   735  	}
   736  
   737  	// Configure the statsite sink
   738  	var fanout metrics.FanoutSink
   739  	if telConfig.StatsiteAddr != "" {
   740  		sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
   741  		if err != nil {
   742  			return inm, err
   743  		}
   744  		fanout = append(fanout, sink)
   745  	}
   746  
   747  	// Configure the statsd sink
   748  	if telConfig.StatsdAddr != "" {
   749  		sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
   750  		if err != nil {
   751  			return inm, err
   752  		}
   753  		fanout = append(fanout, sink)
   754  	}
   755  
   756  	// Configure the prometheus sink
   757  	if telConfig.PrometheusMetrics {
   758  		promSink, err := prometheus.NewPrometheusSink()
   759  		if err != nil {
   760  			return inm, err
   761  		}
   762  		fanout = append(fanout, promSink)
   763  	}
   764  
   765  	// Configure the datadog sink
   766  	if telConfig.DataDogAddr != "" {
   767  		sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName)
   768  		if err != nil {
   769  			return inm, err
   770  		}
   771  		sink.SetTags(telConfig.DataDogTags)
   772  		fanout = append(fanout, sink)
   773  	}
   774  
   775  	// Configure the Circonus sink
   776  	if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" {
   777  		cfg := &circonus.Config{}
   778  		cfg.Interval = telConfig.CirconusSubmissionInterval
   779  		cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken
   780  		cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp
   781  		cfg.CheckManager.API.URL = telConfig.CirconusAPIURL
   782  		cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL
   783  		cfg.CheckManager.Check.ID = telConfig.CirconusCheckID
   784  		cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation
   785  		cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID
   786  		cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag
   787  		cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags
   788  		cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName
   789  		cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID
   790  		cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag
   791  
   792  		if cfg.CheckManager.Check.DisplayName == "" {
   793  			cfg.CheckManager.Check.DisplayName = "Nomad"
   794  		}
   795  
   796  		if cfg.CheckManager.API.TokenApp == "" {
   797  			cfg.CheckManager.API.TokenApp = "nomad"
   798  		}
   799  
   800  		if cfg.CheckManager.Check.SearchTag == "" {
   801  			cfg.CheckManager.Check.SearchTag = "service:nomad"
   802  		}
   803  
   804  		sink, err := circonus.NewCirconusSink(cfg)
   805  		if err != nil {
   806  			return inm, err
   807  		}
   808  		sink.Start()
   809  		fanout = append(fanout, sink)
   810  	}
   811  
   812  	// Initialize the global sink
   813  	if len(fanout) > 0 {
   814  		fanout = append(fanout, inm)
   815  		metrics.NewGlobal(metricsConf, fanout)
   816  	} else {
   817  		metricsConf.EnableHostname = false
   818  		metrics.NewGlobal(metricsConf, inm)
   819  	}
   820  	return inm, nil
   821  }
   822  
   823  func (c *Command) startupJoin(config *Config) error {
   824  	if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
   825  		return nil
   826  	}
   827  
   828  	c.Ui.Output("Joining cluster...")
   829  	n, err := c.agent.server.Join(config.Server.StartJoin)
   830  	if err != nil {
   831  		return err
   832  	}
   833  
   834  	c.Ui.Output(fmt.Sprintf("Join completed. Synced with %d initial agents", n))
   835  	return nil
   836  }
   837  
   838  // retryJoin is used to handle retrying a join until it succeeds or all retries
   839  // are exhausted.
   840  func (c *Command) retryJoin(config *Config) {
   841  	if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
   842  		return
   843  	}
   844  
   845  	logger := c.agent.logger
   846  	logger.Printf("[INFO] agent: Joining cluster...")
   847  
   848  	attempt := 0
   849  	for {
   850  		n, err := c.agent.server.Join(config.Server.RetryJoin)
   851  		if err == nil {
   852  			logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
   853  			return
   854  		}
   855  
   856  		attempt++
   857  		if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
   858  			logger.Printf("[ERR] agent: max join retry exhausted, exiting")
   859  			close(c.retryJoinErrCh)
   860  			return
   861  		}
   862  
   863  		logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
   864  			config.Server.RetryInterval)
   865  		time.Sleep(config.Server.retryInterval)
   866  	}
   867  }
   868  
   869  func (c *Command) Synopsis() string {
   870  	return "Runs a Nomad agent"
   871  }
   872  
   873  func (c *Command) Help() string {
   874  	helpText := `
   875  Usage: nomad agent [options]
   876  
   877    Starts the Nomad agent and runs until an interrupt is received.
   878    The agent may be a client and/or server.
   879  
   880    The Nomad agent's configuration primarily comes from the config
   881    files used, but a subset of the options may also be passed directly
   882    as CLI arguments, listed below.
   883  
   884  General Options (clients and servers):
   885  
   886    -bind=<addr>
   887      The address the agent will bind to for all of its various network
   888      services. The individual services that run bind to individual
   889      ports on this address. Defaults to the loopback 127.0.0.1.
   890  
   891    -config=<path>
   892      The path to either a single config file or a directory of config
   893      files to use for configuring the Nomad agent. This option may be
   894      specified multiple times. If multiple config files are used, the
   895      values from each will be merged together. During merging, values
   896      from files found later in the list are merged over values from
   897      previously parsed files.
   898  
   899    -data-dir=<path>
   900      The data directory used to store state and other persistent data.
   901      On client machines this is used to house allocation data such as
   902      downloaded artifacts used by drivers. On server nodes, the data
   903      dir is also used to store the replicated log.
   904  
   905    -dc=<datacenter>
   906      The name of the datacenter this Nomad agent is a member of. By
   907      default this is set to "dc1".
   908  
   909    -log-level=<level>
   910      Specify the verbosity level of Nomad's logs. Valid values include
   911      DEBUG, INFO, and WARN, in decreasing order of verbosity. The
   912      default is INFO.
   913  
   914    -node=<name>
   915      The name of the local agent. This name is used to identify the node
   916      in the cluster. The name must be unique per region. The default is
   917      the current hostname of the machine.
   918  
   919    -region=<region>
   920      Name of the region the Nomad agent will be a member of. By default
   921      this value is set to "global".
   922  
   923    -dev
   924      Start the agent in development mode. This enables a pre-configured
   925      dual-role agent (client + server) which is useful for developing
   926      or testing Nomad. No other configuration is required to start the
   927      agent in this mode.
   928  
   929  Server Options:
   930  
   931    -server
   932      Enable server mode for the agent. Agents in server mode are
   933      clustered together and handle the additional responsibility of
   934      leader election, data replication, and scheduling work onto
   935      eligible client nodes.
   936  
   937    -bootstrap-expect=<num>
   938      Configures the expected number of servers nodes to wait for before
   939      bootstrapping the cluster. Once <num> servers have joined each other,
   940      Nomad initiates the bootstrap process.
   941  
   942    -encrypt=<key>
   943      Provides the gossip encryption key
   944  
   945    -join=<address>
   946      Address of an agent to join at start time. Can be specified
   947      multiple times.
   948  
   949    -raft-protocol=<num>
   950      The Raft protocol version to use. Used for enabling certain Autopilot
   951      features. Defaults to 2.
   952  
   953    -retry-join=<address>
   954      Address of an agent to join at start time with retries enabled.
   955      Can be specified multiple times.
   956  
   957    -retry-max=<num>
   958      Maximum number of join attempts. Defaults to 0, which will retry
   959      indefinitely.
   960  
   961    -retry-interval=<dur>
   962      Time to wait between join attempts.
   963  
   964    -rejoin
   965      Ignore a previous leave and attempts to rejoin the cluster.
   966  
   967  Client Options:
   968  
   969    -client
   970      Enable client mode for the agent. Client mode enables a given node to be
   971      evaluated for allocations. If client mode is not enabled, no work will be
   972      scheduled to the agent.
   973  
   974    -state-dir
   975      The directory used to store state and other persistent data. If not
   976      specified a subdirectory under the "-data-dir" will be used.
   977  
   978    -alloc-dir
   979      The directory used to store allocation data such as downloaded artifacts as
   980      well as data produced by tasks. If not specified, a subdirectory under the
   981      "-data-dir" will be used.
   982  
   983    -servers
   984      A list of known server addresses to connect to given as "host:port" and
   985      delimited by commas.
   986  
   987    -node-class
   988      Mark this node as a member of a node-class. This can be used to label
   989      similar node types.
   990  
   991    -meta
   992      User specified metadata to associated with the node. Each instance of -meta
   993      parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair
   994      to be added.
   995  
   996    -network-interface
   997      Forces the network fingerprinter to use the specified network interface.
   998  
   999    -network-speed
  1000      The default speed for network interfaces in MBits if the link speed can not
  1001      be determined dynamically.
  1002  
  1003  ACL Options:
  1004  
  1005    -acl-enabled
  1006      Specifies whether the agent should enable ACLs.
  1007  
  1008    -acl-replication-token
  1009      The replication token for servers to use when replicating from the
  1010      authoritative region. The token must be a valid management token from the
  1011      authoritative region.
  1012  
  1013  Consul Options:
  1014  
  1015    -consul-address=<addr>
  1016      Specifies the address to the local Consul agent, given in the format host:port.
  1017      Supports Unix sockets with the format: unix:///tmp/consul/consul.sock
  1018  
  1019    -consul-auth=<auth>
  1020      Specifies the HTTP Basic Authentication information to use for access to the
  1021      Consul Agent, given in the format username:password.
  1022  
  1023    -consul-auto-advertise
  1024      Specifies if Nomad should advertise its services in Consul. The services
  1025      are named according to server_service_name and client_service_name. Nomad
  1026      servers and clients advertise their respective services, each tagged
  1027      appropriately with either http or rpc tag. Nomad servers also advertise a
  1028      serf tagged service.
  1029  
  1030    -consul-ca-file=<path>
  1031      Specifies an optional path to the CA certificate used for Consul communication.
  1032      This defaults to the system bundle if unspecified.
  1033  
  1034    -consul-cert-file=<path>
  1035      Specifies the path to the certificate used for Consul communication. If this
  1036      is set then you need to also set key_file.
  1037  
  1038    -consul-checks-use-advertise
  1039      Specifies if Consul heath checks should bind to the advertise address. By
  1040      default, this is the bind address.
  1041  
  1042    -consul-client-auto-join
  1043      Specifies if the Nomad clients should automatically discover servers in the
  1044      same region by searching for the Consul service name defined in the
  1045      server_service_name option.
  1046  
  1047    -consul-client-service-name=<name>
  1048      Specifies the name of the service in Consul for the Nomad clients.
  1049  
  1050    -consul-client-http-check-name=<name>
  1051      Specifies the HTTP health check name in Consul for the Nomad clients.
  1052  
  1053    -consul-key-file=<path>
  1054      Specifies the path to the private key used for Consul communication. If this
  1055      is set then you need to also set cert_file.
  1056  
  1057    -consul-server-service-name=<name>
  1058      Specifies the name of the service in Consul for the Nomad servers.
  1059  
  1060    -consul-server-http-check-name=<name>
  1061      Specifies the HTTP health check name in Consul for the Nomad servers.
  1062  
  1063    -consul-server-serf-check-name=<name>
  1064      Specifies the Serf health check name in Consul for the Nomad servers.
  1065  
  1066    -consul-server-rpc-check-name=<name>
  1067      Specifies the RPC health check name in Consul for the Nomad servers.
  1068  
  1069    -consul-server-auto-join
  1070      Specifies if the Nomad servers should automatically discover and join other
  1071      Nomad servers by searching for the Consul service name defined in the
  1072      server_service_name option. This search only happens if the server does not
  1073      have a leader.
  1074  
  1075    -consul-ssl
  1076      Specifies if the transport scheme should use HTTPS to communicate with the
  1077      Consul agent.
  1078  
  1079    -consul-token=<token>
  1080      Specifies the token used to provide a per-request ACL token.
  1081  
  1082    -consul-verify-ssl
  1083      Specifies if SSL peer verification should be used when communicating to the
  1084      Consul API client over HTTPS.
  1085  
  1086  Vault Options:
  1087  
  1088    -vault-enabled
  1089      Whether to enable or disable Vault integration.
  1090  
  1091    -vault-address=<addr>
  1092      The address to communicate with Vault. This should be provided with the http://
  1093      or https:// prefix.
  1094  
  1095    -vault-token=<token>
  1096      The Vault token used to derive tokens from Vault on behalf of clients.
  1097      This only needs to be set on Servers. Overrides the Vault token read from
  1098      the VAULT_TOKEN environment variable.
  1099  
  1100    -vault-create-from-role=<role>
  1101      The role name to create tokens for tasks from.
  1102  
  1103    -vault-allow-unauthenticated
  1104      Whether to allow jobs to be submitted that request Vault Tokens but do not
  1105      authentication. The flag only applies to Servers.
  1106  
  1107    -vault-ca-file=<path>
  1108      The path to a PEM-encoded CA cert file to use to verify the Vault server SSL
  1109      certificate.
  1110  
  1111    -vault-ca-path=<path>
  1112      The path to a directory of PEM-encoded CA cert files to verify the Vault server
  1113      certificate.
  1114  
  1115    -vault-cert-file=<token>
  1116      The path to the certificate for Vault communication.
  1117  
  1118    -vault-key-file=<addr>
  1119      The path to the private key for Vault communication.
  1120  
  1121    -vault-tls-skip-verify=<token>
  1122      Enables or disables SSL certificate verification.
  1123  
  1124    -vault-tls-server-name=<token>
  1125      Used to set the SNI host when connecting over TLS.
  1126   `
  1127  	return strings.TrimSpace(helpText)
  1128  }