github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/command/agent/command.go (about)

     1  package agent
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"log"
     8  	"os"
     9  	"os/signal"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	metrics "github.com/armon/go-metrics"
    19  	"github.com/armon/go-metrics/circonus"
    20  	"github.com/armon/go-metrics/datadog"
    21  	"github.com/armon/go-metrics/prometheus"
    22  	"github.com/hashicorp/consul/lib"
    23  	checkpoint "github.com/hashicorp/go-checkpoint"
    24  	gsyslog "github.com/hashicorp/go-syslog"
    25  	"github.com/hashicorp/logutils"
    26  	flaghelper "github.com/hashicorp/nomad/helper/flag-helpers"
    27  	gatedwriter "github.com/hashicorp/nomad/helper/gated-writer"
    28  	"github.com/hashicorp/nomad/nomad/structs/config"
    29  	"github.com/hashicorp/nomad/version"
    30  	"github.com/mitchellh/cli"
    31  	"github.com/posener/complete"
    32  )
    33  
    34  // gracefulTimeout controls how long we wait before forcefully terminating
    35  const gracefulTimeout = 5 * time.Second
    36  
    37  // Command is a Command implementation that runs a Nomad agent.
    38  // The command will not end unless a shutdown message is sent on the
    39  // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
    40  // exit.
    41  type Command struct {
    42  	Version    *version.VersionInfo
    43  	Ui         cli.Ui
    44  	ShutdownCh <-chan struct{}
    45  
    46  	args           []string
    47  	agent          *Agent
    48  	httpServer     *HTTPServer
    49  	logFilter      *logutils.LevelFilter
    50  	logOutput      io.Writer
    51  	retryJoinErrCh chan struct{}
    52  }
    53  
    54  func (c *Command) readConfig() *Config {
    55  	var dev bool
    56  	var configPath []string
    57  	var servers string
    58  	var meta []string
    59  
    60  	// Make a new, empty config.
    61  	cmdConfig := &Config{
    62  		Client: &ClientConfig{},
    63  		Consul: &config.ConsulConfig{},
    64  		Ports:  &Ports{},
    65  		Server: &ServerConfig{},
    66  		Vault:  &config.VaultConfig{},
    67  		ACL:    &ACLConfig{},
    68  	}
    69  
    70  	flags := flag.NewFlagSet("agent", flag.ContinueOnError)
    71  	flags.Usage = func() { c.Ui.Error(c.Help()) }
    72  
    73  	// Role options
    74  	flags.BoolVar(&dev, "dev", false, "")
    75  	flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
    76  	flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
    77  
    78  	// Server-only options
    79  	flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
    80  	flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
    81  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
    82  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
    83  	flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
    84  	flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
    85  	flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
    86  
    87  	// Client-only options
    88  	flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
    89  	flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "")
    90  	flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "")
    91  	flags.StringVar(&servers, "servers", "", "")
    92  	flags.Var((*flaghelper.StringFlag)(&meta), "meta", "")
    93  	flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "")
    94  	flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "")
    95  
    96  	// General options
    97  	flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config")
    98  	flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
    99  	flags.StringVar(&cmdConfig.Region, "region", "", "")
   100  	flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
   101  	flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
   102  	flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
   103  	flags.StringVar(&cmdConfig.NodeName, "node", "", "")
   104  
   105  	// Consul options
   106  	flags.StringVar(&cmdConfig.Consul.Auth, "consul-auth", "", "")
   107  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   108  		cmdConfig.Consul.AutoAdvertise = &b
   109  		return nil
   110  	}), "consul-auto-advertise", "")
   111  	flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "")
   112  	flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "")
   113  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   114  		cmdConfig.Consul.ChecksUseAdvertise = &b
   115  		return nil
   116  	}), "consul-checks-use-advertise", "")
   117  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   118  		cmdConfig.Consul.ClientAutoJoin = &b
   119  		return nil
   120  	}), "consul-client-auto-join", "")
   121  	flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "")
   122  	flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "")
   123  	flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "")
   124  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   125  		cmdConfig.Consul.ServerAutoJoin = &b
   126  		return nil
   127  	}), "consul-server-auto-join", "")
   128  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   129  		cmdConfig.Consul.EnableSSL = &b
   130  		return nil
   131  	}), "consul-ssl", "")
   132  	flags.StringVar(&cmdConfig.Consul.Token, "consul-token", "", "")
   133  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   134  		cmdConfig.Consul.VerifySSL = &b
   135  		return nil
   136  	}), "consul-verify-ssl", "")
   137  	flags.StringVar(&cmdConfig.Consul.Addr, "consul-address", "", "")
   138  
   139  	// Vault options
   140  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   141  		cmdConfig.Vault.Enabled = &b
   142  		return nil
   143  	}), "vault-enabled", "")
   144  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   145  		cmdConfig.Vault.AllowUnauthenticated = &b
   146  		return nil
   147  	}), "vault-allow-unauthenticated", "")
   148  	flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "")
   149  	flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "")
   150  	flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "")
   151  	flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "")
   152  	flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "")
   153  	flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "")
   154  	flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "")
   155  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   156  		cmdConfig.Vault.TLSSkipVerify = &b
   157  		return nil
   158  	}), "vault-tls-skip-verify", "")
   159  	flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "")
   160  
   161  	// ACL options
   162  	flags.BoolVar(&cmdConfig.ACL.Enabled, "acl-enabled", false, "")
   163  	flags.StringVar(&cmdConfig.ACL.ReplicationToken, "acl-replication-token", "", "")
   164  
   165  	if err := flags.Parse(c.args); err != nil {
   166  		return nil
   167  	}
   168  
   169  	// Split the servers.
   170  	if servers != "" {
   171  		cmdConfig.Client.Servers = strings.Split(servers, ",")
   172  	}
   173  
   174  	// Parse the meta flags.
   175  	metaLength := len(meta)
   176  	if metaLength != 0 {
   177  		cmdConfig.Client.Meta = make(map[string]string, metaLength)
   178  		for _, kv := range meta {
   179  			parts := strings.SplitN(kv, "=", 2)
   180  			if len(parts) != 2 {
   181  				c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv))
   182  				return nil
   183  			}
   184  
   185  			cmdConfig.Client.Meta[parts[0]] = parts[1]
   186  		}
   187  	}
   188  
   189  	// Load the configuration
   190  	var config *Config
   191  	if dev {
   192  		config = DevConfig()
   193  	} else {
   194  		config = DefaultConfig()
   195  	}
   196  	for _, path := range configPath {
   197  		current, err := LoadConfig(path)
   198  		if err != nil {
   199  			c.Ui.Error(fmt.Sprintf(
   200  				"Error loading configuration from %s: %s", path, err))
   201  			return nil
   202  		}
   203  
   204  		// The user asked us to load some config here but we didn't find any,
   205  		// so we'll complain but continue.
   206  		if current == nil || reflect.DeepEqual(current, &Config{}) {
   207  			c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path))
   208  		}
   209  
   210  		if config == nil {
   211  			config = current
   212  		} else {
   213  			config = config.Merge(current)
   214  		}
   215  	}
   216  
   217  	// Ensure the sub-structs at least exist
   218  	if config.Client == nil {
   219  		config.Client = &ClientConfig{}
   220  	}
   221  	if config.Server == nil {
   222  		config.Server = &ServerConfig{}
   223  	}
   224  
   225  	// Merge any CLI options over config file options
   226  	config = config.Merge(cmdConfig)
   227  
   228  	// Set the version info
   229  	config.Version = c.Version
   230  
   231  	// Normalize binds, ports, addresses, and advertise
   232  	if err := config.normalizeAddrs(); err != nil {
   233  		c.Ui.Error(err.Error())
   234  		return nil
   235  	}
   236  
   237  	// Check to see if we should read the Vault token from the environment
   238  	if config.Vault.Token == "" {
   239  		if token, ok := os.LookupEnv("VAULT_TOKEN"); ok {
   240  			config.Vault.Token = token
   241  		}
   242  	}
   243  
   244  	if dev {
   245  		// Skip validation for dev mode
   246  		return config
   247  	}
   248  
   249  	if config.Server.EncryptKey != "" {
   250  		if _, err := config.Server.EncryptBytes(); err != nil {
   251  			c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
   252  			return nil
   253  		}
   254  		keyfile := filepath.Join(config.DataDir, serfKeyring)
   255  		if _, err := os.Stat(keyfile); err == nil {
   256  			c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring")
   257  		}
   258  	}
   259  
   260  	// Parse the RetryInterval.
   261  	dur, err := time.ParseDuration(config.Server.RetryInterval)
   262  	if err != nil {
   263  		c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
   264  		return nil
   265  	}
   266  	config.Server.retryInterval = dur
   267  
   268  	// Check that the server is running in at least one mode.
   269  	if !(config.Server.Enabled || config.Client.Enabled) {
   270  		c.Ui.Error("Must specify either server, client or dev mode for the agent.")
   271  		return nil
   272  	}
   273  
   274  	// Verify the paths are absolute.
   275  	dirs := map[string]string{
   276  		"data-dir":  config.DataDir,
   277  		"alloc-dir": config.Client.AllocDir,
   278  		"state-dir": config.Client.StateDir,
   279  	}
   280  	for k, dir := range dirs {
   281  		if dir == "" {
   282  			continue
   283  		}
   284  
   285  		if !filepath.IsAbs(dir) {
   286  			c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir))
   287  			return nil
   288  		}
   289  	}
   290  
   291  	// Ensure that we have the directories we neet to run.
   292  	if config.Server.Enabled && config.DataDir == "" {
   293  		c.Ui.Error("Must specify data directory")
   294  		return nil
   295  	}
   296  
   297  	// The config is valid if the top-level data-dir is set or if both
   298  	// alloc-dir and state-dir are set.
   299  	if config.Client.Enabled && config.DataDir == "" {
   300  		if config.Client.AllocDir == "" || config.Client.StateDir == "" {
   301  			c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.")
   302  			return nil
   303  		}
   304  	}
   305  
   306  	// Check the bootstrap flags
   307  	if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
   308  		c.Ui.Error("Bootstrap requires server mode to be enabled")
   309  		return nil
   310  	}
   311  	if config.Server.BootstrapExpect == 1 {
   312  		c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
   313  	}
   314  
   315  	return config
   316  }
   317  
   318  // setupLoggers is used to setup the logGate, logWriter, and our logOutput
   319  func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) {
   320  	// Setup logging. First create the gated log writer, which will
   321  	// store logs until we're ready to show them. Then create the level
   322  	// filter, filtering logs of the specified level.
   323  	logGate := &gatedwriter.Writer{
   324  		Writer: &cli.UiWriter{Ui: c.Ui},
   325  	}
   326  
   327  	c.logFilter = LevelFilter()
   328  	c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel))
   329  	c.logFilter.Writer = logGate
   330  	if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) {
   331  		c.Ui.Error(fmt.Sprintf(
   332  			"Invalid log level: %s. Valid log levels are: %v",
   333  			c.logFilter.MinLevel, c.logFilter.Levels))
   334  		return nil, nil, nil
   335  	}
   336  
   337  	// Check if syslog is enabled
   338  	var syslog io.Writer
   339  	if config.EnableSyslog {
   340  		l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
   341  		if err != nil {
   342  			c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
   343  			return nil, nil, nil
   344  		}
   345  		syslog = &SyslogWrapper{l, c.logFilter}
   346  	}
   347  
   348  	// Create a log writer, and wrap a logOutput around it
   349  	logWriter := NewLogWriter(512)
   350  	var logOutput io.Writer
   351  	if syslog != nil {
   352  		logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
   353  	} else {
   354  		logOutput = io.MultiWriter(c.logFilter, logWriter)
   355  	}
   356  	c.logOutput = logOutput
   357  	log.SetOutput(logOutput)
   358  	return logGate, logWriter, logOutput
   359  }
   360  
   361  // setupAgent is used to start the agent and various interfaces
   362  func (c *Command) setupAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) error {
   363  	c.Ui.Output("Starting Nomad agent...")
   364  	agent, err := NewAgent(config, logOutput, inmem)
   365  	if err != nil {
   366  		c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err))
   367  		return err
   368  	}
   369  	c.agent = agent
   370  
   371  	// Setup the HTTP server
   372  	http, err := NewHTTPServer(agent, config)
   373  	if err != nil {
   374  		agent.Shutdown()
   375  		c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err))
   376  		return err
   377  	}
   378  	c.httpServer = http
   379  
   380  	// Setup update checking
   381  	if !config.DisableUpdateCheck {
   382  		version := config.Version.Version
   383  		if config.Version.VersionPrerelease != "" {
   384  			version += fmt.Sprintf("-%s", config.Version.VersionPrerelease)
   385  		}
   386  		updateParams := &checkpoint.CheckParams{
   387  			Product: "nomad",
   388  			Version: version,
   389  		}
   390  		if !config.DisableAnonymousSignature {
   391  			updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature")
   392  		}
   393  
   394  		// Schedule a periodic check with expected interval of 24 hours
   395  		checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults)
   396  
   397  		// Do an immediate check within the next 30 seconds
   398  		go func() {
   399  			time.Sleep(lib.RandomStagger(30 * time.Second))
   400  			c.checkpointResults(checkpoint.Check(updateParams))
   401  		}()
   402  	}
   403  	return nil
   404  }
   405  
   406  // checkpointResults is used to handler periodic results from our update checker
   407  func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) {
   408  	if err != nil {
   409  		c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err))
   410  		return
   411  	}
   412  	if results.Outdated {
   413  		c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, c.Version.VersionNumber()))
   414  	}
   415  	for _, alert := range results.Alerts {
   416  		switch alert.Level {
   417  		case "info":
   418  			c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   419  		default:
   420  			c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   421  		}
   422  	}
   423  }
   424  
   425  func (c *Command) AutocompleteFlags() complete.Flags {
   426  	configFilePredictor := complete.PredictOr(
   427  		complete.PredictFiles("*.json"),
   428  		complete.PredictFiles("*.hcl"))
   429  
   430  	return map[string]complete.Predictor{
   431  		"-config": configFilePredictor,
   432  	}
   433  }
   434  
   435  func (c *Command) AutocompleteArgs() complete.Predictor {
   436  	return nil
   437  }
   438  
   439  func (c *Command) Run(args []string) int {
   440  	c.Ui = &cli.PrefixedUi{
   441  		OutputPrefix: "==> ",
   442  		InfoPrefix:   "    ",
   443  		ErrorPrefix:  "==> ",
   444  		Ui:           c.Ui,
   445  	}
   446  
   447  	// Parse our configs
   448  	c.args = args
   449  	config := c.readConfig()
   450  	if config == nil {
   451  		return 1
   452  	}
   453  
   454  	// Setup the log outputs
   455  	logGate, _, logOutput := c.setupLoggers(config)
   456  	if logGate == nil {
   457  		return 1
   458  	}
   459  
   460  	// Log config files
   461  	if len(config.Files) > 0 {
   462  		c.Ui.Info(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", ")))
   463  	} else {
   464  		c.Ui.Info("No configuration files loaded")
   465  	}
   466  
   467  	// Initialize the telemetry
   468  	inmem, err := c.setupTelemetry(config)
   469  	if err != nil {
   470  		c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
   471  		return 1
   472  	}
   473  
   474  	// Create the agent
   475  	if err := c.setupAgent(config, logOutput, inmem); err != nil {
   476  		logGate.Flush()
   477  		return 1
   478  	}
   479  	defer c.agent.Shutdown()
   480  
   481  	// Shudown the HTTP server at the end
   482  	defer func() {
   483  		if c.httpServer != nil {
   484  			c.httpServer.Shutdown()
   485  		}
   486  	}()
   487  
   488  	// Join startup nodes if specified
   489  	if err := c.startupJoin(config); err != nil {
   490  		c.Ui.Error(err.Error())
   491  		return 1
   492  	}
   493  
   494  	// Compile agent information for output later
   495  	info := make(map[string]string)
   496  	info["version"] = config.Version.VersionNumber()
   497  	info["client"] = strconv.FormatBool(config.Client.Enabled)
   498  	info["log level"] = config.LogLevel
   499  	info["server"] = strconv.FormatBool(config.Server.Enabled)
   500  	info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter)
   501  
   502  	// Sort the keys for output
   503  	infoKeys := make([]string, 0, len(info))
   504  	for key := range info {
   505  		infoKeys = append(infoKeys, key)
   506  	}
   507  	sort.Strings(infoKeys)
   508  
   509  	// Agent configuration output
   510  	padding := 18
   511  	c.Ui.Output("Nomad agent configuration:\n")
   512  	for _, k := range infoKeys {
   513  		c.Ui.Info(fmt.Sprintf(
   514  			"%s%s: %s",
   515  			strings.Repeat(" ", padding-len(k)),
   516  			strings.Title(k),
   517  			info[k]))
   518  	}
   519  	c.Ui.Output("")
   520  
   521  	// Output the header that the server has started
   522  	c.Ui.Output("Nomad agent started! Log data will stream in below:\n")
   523  
   524  	// Enable log streaming
   525  	logGate.Flush()
   526  
   527  	// Start retry join process
   528  	c.retryJoinErrCh = make(chan struct{})
   529  	go c.retryJoin(config)
   530  
   531  	// Wait for exit
   532  	return c.handleSignals()
   533  }
   534  
   535  // handleSignals blocks until we get an exit-causing signal
   536  func (c *Command) handleSignals() int {
   537  	signalCh := make(chan os.Signal, 4)
   538  	signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
   539  
   540  	// Wait for a signal
   541  WAIT:
   542  	var sig os.Signal
   543  	select {
   544  	case s := <-signalCh:
   545  		sig = s
   546  	case <-c.ShutdownCh:
   547  		sig = os.Interrupt
   548  	case <-c.retryJoinErrCh:
   549  		return 1
   550  	}
   551  
   552  	// Skip any SIGPIPE signal and don't try to log it (See issues #1798, #3554)
   553  	if sig == syscall.SIGPIPE {
   554  		goto WAIT
   555  	}
   556  
   557  	c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig))
   558  
   559  	// Check if this is a SIGHUP
   560  	if sig == syscall.SIGHUP {
   561  		c.handleReload()
   562  		goto WAIT
   563  	}
   564  
   565  	// Check if we should do a graceful leave
   566  	graceful := false
   567  	if sig == os.Interrupt && c.agent.GetConfig().LeaveOnInt {
   568  		graceful = true
   569  	} else if sig == syscall.SIGTERM && c.agent.GetConfig().LeaveOnTerm {
   570  		graceful = true
   571  	}
   572  
   573  	// Bail fast if not doing a graceful leave
   574  	if !graceful {
   575  		return 1
   576  	}
   577  
   578  	// Attempt a graceful leave
   579  	gracefulCh := make(chan struct{})
   580  	c.Ui.Output("Gracefully shutting down agent...")
   581  	go func() {
   582  		if err := c.agent.Leave(); err != nil {
   583  			c.Ui.Error(fmt.Sprintf("Error: %s", err))
   584  			return
   585  		}
   586  		close(gracefulCh)
   587  	}()
   588  
   589  	// Wait for leave or another signal
   590  	select {
   591  	case <-signalCh:
   592  		return 1
   593  	case <-time.After(gracefulTimeout):
   594  		return 1
   595  	case <-gracefulCh:
   596  		return 0
   597  	}
   598  }
   599  
   600  // handleReload is invoked when we should reload our configs, e.g. SIGHUP
   601  func (c *Command) handleReload() {
   602  	c.Ui.Output("Reloading configuration...")
   603  	newConf := c.readConfig()
   604  	if newConf == nil {
   605  		c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
   606  		return
   607  	}
   608  
   609  	// Change the log level
   610  	minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel))
   611  	if ValidateLevelFilter(minLevel, c.logFilter) {
   612  		c.logFilter.SetMinLevel(minLevel)
   613  	} else {
   614  		c.Ui.Error(fmt.Sprintf(
   615  			"Invalid log level: %s. Valid log levels are: %v",
   616  			minLevel, c.logFilter.Levels))
   617  
   618  		// Keep the current log level
   619  		newConf.LogLevel = c.agent.GetConfig().LogLevel
   620  	}
   621  
   622  	// Reloads configuration for an agent running in both client and server mode
   623  	err := c.agent.Reload(newConf)
   624  	if err != nil {
   625  		c.agent.logger.Printf("[ERR] agent: failed to reload the config: %v", err)
   626  	}
   627  
   628  	if s := c.agent.Server(); s != nil {
   629  		sconf, err := convertServerConfig(newConf, c.logOutput)
   630  		if err != nil {
   631  			c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err)
   632  		} else {
   633  			if err := s.Reload(sconf); err != nil {
   634  				c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err)
   635  			}
   636  		}
   637  	}
   638  }
   639  
   640  // setupTelemetry is used ot setup the telemetry sub-systems
   641  func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) {
   642  	/* Setup telemetry
   643  	Aggregate on 10 second intervals for 1 minute. Expose the
   644  	metrics over stderr when there is a SIGUSR1 received.
   645  	*/
   646  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   647  	metrics.DefaultInmemSignal(inm)
   648  
   649  	var telConfig *Telemetry
   650  	if config.Telemetry == nil {
   651  		telConfig = &Telemetry{}
   652  	} else {
   653  		telConfig = config.Telemetry
   654  	}
   655  
   656  	metricsConf := metrics.DefaultConfig("nomad")
   657  	metricsConf.EnableHostname = !telConfig.DisableHostname
   658  
   659  	// Prefer the hostname as a label.
   660  	metricsConf.EnableHostnameLabel = !telConfig.DisableHostname &&
   661  		!telConfig.DisableTaggedMetrics && !telConfig.BackwardsCompatibleMetrics
   662  
   663  	if telConfig.UseNodeName {
   664  		metricsConf.HostName = config.NodeName
   665  		metricsConf.EnableHostname = true
   666  	}
   667  
   668  	// Configure the statsite sink
   669  	var fanout metrics.FanoutSink
   670  	if telConfig.StatsiteAddr != "" {
   671  		sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
   672  		if err != nil {
   673  			return inm, err
   674  		}
   675  		fanout = append(fanout, sink)
   676  	}
   677  
   678  	// Configure the statsd sink
   679  	if telConfig.StatsdAddr != "" {
   680  		sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
   681  		if err != nil {
   682  			return inm, err
   683  		}
   684  		fanout = append(fanout, sink)
   685  	}
   686  
   687  	// Configure the prometheus sink
   688  	if telConfig.PrometheusMetrics {
   689  		promSink, err := prometheus.NewPrometheusSink()
   690  		if err != nil {
   691  			return inm, err
   692  		}
   693  		fanout = append(fanout, promSink)
   694  	}
   695  
   696  	// Configure the datadog sink
   697  	if telConfig.DataDogAddr != "" {
   698  		sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName)
   699  		if err != nil {
   700  			return inm, err
   701  		}
   702  		fanout = append(fanout, sink)
   703  	}
   704  
   705  	// Configure the Circonus sink
   706  	if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" {
   707  		cfg := &circonus.Config{}
   708  		cfg.Interval = telConfig.CirconusSubmissionInterval
   709  		cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken
   710  		cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp
   711  		cfg.CheckManager.API.URL = telConfig.CirconusAPIURL
   712  		cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL
   713  		cfg.CheckManager.Check.ID = telConfig.CirconusCheckID
   714  		cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation
   715  		cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID
   716  		cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag
   717  		cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags
   718  		cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName
   719  		cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID
   720  		cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag
   721  
   722  		if cfg.CheckManager.Check.DisplayName == "" {
   723  			cfg.CheckManager.Check.DisplayName = "Nomad"
   724  		}
   725  
   726  		if cfg.CheckManager.API.TokenApp == "" {
   727  			cfg.CheckManager.API.TokenApp = "nomad"
   728  		}
   729  
   730  		if cfg.CheckManager.Check.SearchTag == "" {
   731  			cfg.CheckManager.Check.SearchTag = "service:nomad"
   732  		}
   733  
   734  		sink, err := circonus.NewCirconusSink(cfg)
   735  		if err != nil {
   736  			return inm, err
   737  		}
   738  		sink.Start()
   739  		fanout = append(fanout, sink)
   740  	}
   741  
   742  	// Initialize the global sink
   743  	if len(fanout) > 0 {
   744  		fanout = append(fanout, inm)
   745  		metrics.NewGlobal(metricsConf, fanout)
   746  	} else {
   747  		metricsConf.EnableHostname = false
   748  		metrics.NewGlobal(metricsConf, inm)
   749  	}
   750  	return inm, nil
   751  }
   752  
   753  func (c *Command) startupJoin(config *Config) error {
   754  	if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
   755  		return nil
   756  	}
   757  
   758  	c.Ui.Output("Joining cluster...")
   759  	n, err := c.agent.server.Join(config.Server.StartJoin)
   760  	if err != nil {
   761  		return err
   762  	}
   763  
   764  	c.Ui.Info(fmt.Sprintf("Join completed. Synced with %d initial agents", n))
   765  	return nil
   766  }
   767  
   768  // retryJoin is used to handle retrying a join until it succeeds or all retries
   769  // are exhausted.
   770  func (c *Command) retryJoin(config *Config) {
   771  	if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
   772  		return
   773  	}
   774  
   775  	logger := c.agent.logger
   776  	logger.Printf("[INFO] agent: Joining cluster...")
   777  
   778  	attempt := 0
   779  	for {
   780  		n, err := c.agent.server.Join(config.Server.RetryJoin)
   781  		if err == nil {
   782  			logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
   783  			return
   784  		}
   785  
   786  		attempt++
   787  		if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
   788  			logger.Printf("[ERR] agent: max join retry exhausted, exiting")
   789  			close(c.retryJoinErrCh)
   790  			return
   791  		}
   792  
   793  		logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
   794  			config.Server.RetryInterval)
   795  		time.Sleep(config.Server.retryInterval)
   796  	}
   797  }
   798  
   799  func (c *Command) Synopsis() string {
   800  	return "Runs a Nomad agent"
   801  }
   802  
   803  func (c *Command) Help() string {
   804  	helpText := `
   805  Usage: nomad agent [options]
   806  
   807    Starts the Nomad agent and runs until an interrupt is received.
   808    The agent may be a client and/or server.
   809  
   810    The Nomad agent's configuration primarily comes from the config
   811    files used, but a subset of the options may also be passed directly
   812    as CLI arguments, listed below.
   813  
   814  General Options (clients and servers):
   815  
   816    -bind=<addr>
   817      The address the agent will bind to for all of its various network
   818      services. The individual services that run bind to individual
   819      ports on this address. Defaults to the loopback 127.0.0.1.
   820  
   821    -config=<path>
   822      The path to either a single config file or a directory of config
   823      files to use for configuring the Nomad agent. This option may be
   824      specified multiple times. If multiple config files are used, the
   825      values from each will be merged together. During merging, values
   826      from files found later in the list are merged over values from
   827      previously parsed files.
   828  
   829    -data-dir=<path>
   830      The data directory used to store state and other persistent data.
   831      On client machines this is used to house allocation data such as
   832      downloaded artifacts used by drivers. On server nodes, the data
   833      dir is also used to store the replicated log.
   834  
   835    -dc=<datacenter>
   836      The name of the datacenter this Nomad agent is a member of. By
   837      default this is set to "dc1".
   838  
   839    -log-level=<level>
   840      Specify the verbosity level of Nomad's logs. Valid values include
   841      DEBUG, INFO, and WARN, in decreasing order of verbosity. The
   842      default is INFO.
   843  
   844    -node=<name>
   845      The name of the local agent. This name is used to identify the node
   846      in the cluster. The name must be unique per region. The default is
   847      the current hostname of the machine.
   848  
   849    -region=<region>
   850      Name of the region the Nomad agent will be a member of. By default
   851      this value is set to "global".
   852  
   853    -dev
   854      Start the agent in development mode. This enables a pre-configured
   855      dual-role agent (client + server) which is useful for developing
   856      or testing Nomad. No other configuration is required to start the
   857      agent in this mode.
   858  
   859  Server Options:
   860  
   861    -server
   862      Enable server mode for the agent. Agents in server mode are
   863      clustered together and handle the additional responsibility of
   864      leader election, data replication, and scheduling work onto
   865      eligible client nodes.
   866  
   867    -bootstrap-expect=<num>
   868      Configures the expected number of servers nodes to wait for before
   869      bootstrapping the cluster. Once <num> servers have joined eachother,
   870      Nomad initiates the bootstrap process.
   871  
   872    -encrypt=<key>
   873      Provides the gossip encryption key
   874  
   875    -join=<address>
   876      Address of an agent to join at start time. Can be specified
   877      multiple times.
   878  
   879    -retry-join=<address>
   880      Address of an agent to join at start time with retries enabled.
   881      Can be specified multiple times.
   882  
   883    -retry-max=<num>
   884      Maximum number of join attempts. Defaults to 0, which will retry
   885      indefinitely.
   886  
   887    -retry-interval=<dur>
   888      Time to wait between join attempts.
   889  
   890    -rejoin
   891      Ignore a previous leave and attempts to rejoin the cluster.
   892  
   893  Client Options:
   894  
   895    -client
   896      Enable client mode for the agent. Client mode enables a given node to be
   897      evaluated for allocations. If client mode is not enabled, no work will be
   898      scheduled to the agent.
   899  
   900    -state-dir
   901      The directory used to store state and other persistent data. If not
   902      specified a subdirectory under the "-data-dir" will be used.
   903  
   904    -alloc-dir
   905      The directory used to store allocation data such as downloaded artificats as
   906      well as data produced by tasks. If not specified, a subdirectory under the
   907      "-data-dir" will be used.
   908  
   909    -servers
   910      A list of known server addresses to connect to given as "host:port" and
   911      delimited by commas.
   912  
   913    -node-class
   914      Mark this node as a member of a node-class. This can be used to label
   915      similar node types.
   916  
   917    -meta
   918      User specified metadata to associated with the node. Each instance of -meta
   919      parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair
   920      to be added.
   921  
   922    -network-interface
   923      Forces the network fingerprinter to use the specified network interface.
   924  
   925    -network-speed
   926      The default speed for network interfaces in MBits if the link speed can not
   927      be determined dynamically.
   928  
   929  ACL Options:
   930  
   931    -acl-enabled
   932      Specifies whether the agent should enable ACLs.
   933  
   934    -acl-replication-token
   935      The replication token for servers to use when replicating from the
   936      authoratative region. The token must be a valid management token from the
   937      authoratative region.
   938  
   939  Consul Options:
   940  
   941    -consul-address=<addr>
   942      Specifies the address to the local Consul agent, given in the format host:port.
   943      Supports Unix sockets with the format: unix:///tmp/consul/consul.sock
   944  
   945    -consul-auth=<auth>
   946      Specifies the HTTP Basic Authentication information to use for access to the
   947      Consul Agent, given in the format username:password.
   948  
   949    -consul-auto-advertise
   950      Specifies if Nomad should advertise its services in Consul. The services
   951      are named according to server_service_name and client_service_name. Nomad
   952      servers and clients advertise their respective services, each tagged
   953      appropriately with either http or rpc tag. Nomad servers also advertise a
   954      serf tagged service.
   955  
   956    -consul-ca-file=<path>
   957      Specifies an optional path to the CA certificate used for Consul communication.
   958      This defaults to the system bundle if unspecified.
   959  
   960    -consul-cert-file=<path>
   961      Specifies the path to the certificate used for Consul communication. If this
   962      is set then you need to also set key_file.
   963  
   964    -consul-checks-use-advertise
   965      Specifies if Consul heath checks should bind to the advertise address. By
   966      default, this is the bind address.
   967  
   968    -consul-client-auto-join
   969      Specifies if the Nomad clients should automatically discover servers in the
   970      same region by searching for the Consul service name defined in the
   971      server_service_name option.
   972  
   973    -consul-client-service-name=<name>
   974      Specifies the name of the service in Consul for the Nomad clients.
   975  
   976    -consul-key-file=<path>
   977      Specifies the path to the private key used for Consul communication. If this
   978      is set then you need to also set cert_file.
   979  
   980    -consul-server-service-name=<name>
   981      Specifies the name of the service in Consul for the Nomad servers.
   982  
   983    -consul-server-auto-join
   984      Specifies if the Nomad servers should automatically discover and join other
   985      Nomad servers by searching for the Consul service name defined in the
   986      server_service_name option. This search only happens if the server does not
   987      have a leader.
   988  
   989    -consul-ssl
   990      Specifies if the transport scheme should use HTTPS to communicate with the
   991      Consul agent.
   992  
   993    -consul-token=<token>
   994      Specifies the token used to provide a per-request ACL token.
   995  
   996    -consul-verify-ssl
   997      Specifies if SSL peer verification should be used when communicating to the
   998      Consul API client over HTTPS.
   999  
  1000  Vault Options:
  1001  
  1002    -vault-enabled
  1003      Whether to enable or disable Vault integration.
  1004  
  1005    -vault-address=<addr>
  1006      The address to communicate with Vault. This should be provided with the http://
  1007      or https:// prefix.
  1008  
  1009    -vault-token=<token>
  1010      The Vault token used to derive tokens from Vault on behalf of clients.
  1011      This only needs to be set on Servers. Overrides the Vault token read from
  1012      the VAULT_TOKEN environment variable.
  1013  
  1014    -vault-create-from-role=<role>
  1015      The role name to create tokens for tasks from.
  1016  
  1017    -vault-allow-unauthenticated
  1018      Whether to allow jobs to be sumbitted that request Vault Tokens but do not
  1019      authentication. The flag only applies to Servers.
  1020  
  1021    -vault-ca-file=<path>
  1022      The path to a PEM-encoded CA cert file to use to verify the Vault server SSL
  1023      certificate.
  1024  
  1025    -vault-ca-path=<path>
  1026      The path to a directory of PEM-encoded CA cert files to verify the Vault server
  1027      certificate.
  1028  
  1029    -vault-cert-file=<token>
  1030      The path to the certificate for Vault communication.
  1031  
  1032    -vault-key-file=<addr>
  1033      The path to the private key for Vault communication.
  1034  
  1035    -vault-tls-skip-verify=<token>
  1036      Enables or disables SSL certificate verification.
  1037  
  1038    -vault-tls-server-name=<token>
  1039      Used to set the SNI host when connecting over TLS.
  1040   `
  1041  	return strings.TrimSpace(helpText)
  1042  }