github.com/hooklift/nomad@v0.5.7-0.20170407200202-db11e7dd7b55/command/agent/command.go (about)

     1  package agent
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"log"
     8  	"os"
     9  	"os/signal"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	"github.com/armon/go-metrics"
    19  	"github.com/armon/go-metrics/circonus"
    20  	"github.com/armon/go-metrics/datadog"
    21  	"github.com/hashicorp/consul/lib"
    22  	"github.com/hashicorp/go-checkpoint"
    23  	"github.com/hashicorp/go-syslog"
    24  	"github.com/hashicorp/logutils"
    25  	"github.com/hashicorp/nomad/helper/flag-helpers"
    26  	"github.com/hashicorp/nomad/helper/gated-writer"
    27  	"github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/scada-client/scada"
    29  	"github.com/mitchellh/cli"
    30  )
    31  
    32  // gracefulTimeout controls how long we wait before forcefully terminating
    33  const gracefulTimeout = 5 * time.Second
    34  
    35  // Command is a Command implementation that runs a Nomad agent.
    36  // The command will not end unless a shutdown message is sent on the
    37  // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
    38  // exit.
    39  type Command struct {
    40  	Revision          string
    41  	Version           string
    42  	VersionPrerelease string
    43  	Ui                cli.Ui
    44  	ShutdownCh        <-chan struct{}
    45  
    46  	args           []string
    47  	agent          *Agent
    48  	httpServer     *HTTPServer
    49  	logFilter      *logutils.LevelFilter
    50  	logOutput      io.Writer
    51  	retryJoinErrCh chan struct{}
    52  
    53  	scadaProvider *scada.Provider
    54  	scadaHttp     *HTTPServer
    55  }
    56  
    57  func (c *Command) readConfig() *Config {
    58  	var dev bool
    59  	var configPath []string
    60  	var servers string
    61  	var meta []string
    62  
    63  	// Make a new, empty config.
    64  	cmdConfig := &Config{
    65  		Atlas:  &AtlasConfig{},
    66  		Client: &ClientConfig{},
    67  		Ports:  &Ports{},
    68  		Server: &ServerConfig{},
    69  		Vault:  &config.VaultConfig{},
    70  	}
    71  
    72  	flags := flag.NewFlagSet("agent", flag.ContinueOnError)
    73  	flags.Usage = func() { c.Ui.Error(c.Help()) }
    74  
    75  	// Role options
    76  	flags.BoolVar(&dev, "dev", false, "")
    77  	flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
    78  	flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
    79  
    80  	// Server-only options
    81  	flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
    82  	flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
    83  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
    84  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
    85  	flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
    86  	flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
    87  	flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
    88  
    89  	// Client-only options
    90  	flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
    91  	flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "")
    92  	flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "")
    93  	flags.StringVar(&servers, "servers", "", "")
    94  	flags.Var((*flaghelper.StringFlag)(&meta), "meta", "")
    95  	flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "")
    96  	flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "")
    97  
    98  	// General options
    99  	flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config")
   100  	flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
   101  	flags.StringVar(&cmdConfig.Region, "region", "", "")
   102  	flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
   103  	flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
   104  	flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
   105  	flags.StringVar(&cmdConfig.NodeName, "node", "", "")
   106  
   107  	// Atlas options
   108  	flags.StringVar(&cmdConfig.Atlas.Infrastructure, "atlas", "", "")
   109  	flags.BoolVar(&cmdConfig.Atlas.Join, "atlas-join", false, "")
   110  	flags.StringVar(&cmdConfig.Atlas.Token, "atlas-token", "", "")
   111  
   112  	// Vault options
   113  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   114  		cmdConfig.Vault.Enabled = &b
   115  		return nil
   116  	}), "vault-enabled", "")
   117  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   118  		cmdConfig.Vault.AllowUnauthenticated = &b
   119  		return nil
   120  	}), "vault-allow-unauthenticated", "")
   121  	flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "")
   122  	flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "")
   123  	flags.StringVar(&cmdConfig.Vault.Role, "vault-create-from-role", "", "")
   124  	flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "")
   125  	flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "")
   126  	flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "")
   127  	flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "")
   128  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   129  		cmdConfig.Vault.TLSSkipVerify = &b
   130  		return nil
   131  	}), "vault-tls-skip-verify", "")
   132  	flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "")
   133  
   134  	if err := flags.Parse(c.args); err != nil {
   135  		return nil
   136  	}
   137  
   138  	// Split the servers.
   139  	if servers != "" {
   140  		cmdConfig.Client.Servers = strings.Split(servers, ",")
   141  	}
   142  
   143  	// Parse the meta flags.
   144  	metaLength := len(meta)
   145  	if metaLength != 0 {
   146  		cmdConfig.Client.Meta = make(map[string]string, metaLength)
   147  		for _, kv := range meta {
   148  			parts := strings.SplitN(kv, "=", 2)
   149  			if len(parts) != 2 {
   150  				c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv))
   151  				return nil
   152  			}
   153  
   154  			cmdConfig.Client.Meta[parts[0]] = parts[1]
   155  		}
   156  	}
   157  
   158  	// Load the configuration
   159  	var config *Config
   160  	if dev {
   161  		config = DevConfig()
   162  	} else {
   163  		config = DefaultConfig()
   164  	}
   165  	for _, path := range configPath {
   166  		current, err := LoadConfig(path)
   167  		if err != nil {
   168  			c.Ui.Error(fmt.Sprintf(
   169  				"Error loading configuration from %s: %s", path, err))
   170  			return nil
   171  		}
   172  
   173  		// The user asked us to load some config here but we didn't find any,
   174  		// so we'll complain but continue.
   175  		if current == nil || reflect.DeepEqual(current, &Config{}) {
   176  			c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path))
   177  		}
   178  
   179  		if config == nil {
   180  			config = current
   181  		} else {
   182  			config = config.Merge(current)
   183  		}
   184  	}
   185  
   186  	// Ensure the sub-structs at least exist
   187  	if config.Atlas == nil {
   188  		config.Atlas = &AtlasConfig{}
   189  	}
   190  	if config.Client == nil {
   191  		config.Client = &ClientConfig{}
   192  	}
   193  	if config.Server == nil {
   194  		config.Server = &ServerConfig{}
   195  	}
   196  
   197  	// Merge any CLI options over config file options
   198  	config = config.Merge(cmdConfig)
   199  
   200  	// Set the version info
   201  	config.Revision = c.Revision
   202  	config.Version = c.Version
   203  	config.VersionPrerelease = c.VersionPrerelease
   204  
   205  	// Normalize binds, ports, addresses, and advertise
   206  	if err := config.normalizeAddrs(); err != nil {
   207  		c.Ui.Error(err.Error())
   208  		return nil
   209  	}
   210  
   211  	// Check to see if we should read the Vault token from the environment
   212  	if config.Vault.Token == "" {
   213  		if token, ok := os.LookupEnv("VAULT_TOKEN"); ok {
   214  			config.Vault.Token = token
   215  		}
   216  	}
   217  
   218  	if dev {
   219  		// Skip validation for dev mode
   220  		return config
   221  	}
   222  
   223  	if config.Server.EncryptKey != "" {
   224  		if _, err := config.Server.EncryptBytes(); err != nil {
   225  			c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
   226  			return nil
   227  		}
   228  		keyfile := filepath.Join(config.DataDir, serfKeyring)
   229  		if _, err := os.Stat(keyfile); err == nil {
   230  			c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring")
   231  		}
   232  	}
   233  
   234  	// Parse the RetryInterval.
   235  	dur, err := time.ParseDuration(config.Server.RetryInterval)
   236  	if err != nil {
   237  		c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
   238  		return nil
   239  	}
   240  	config.Server.retryInterval = dur
   241  
   242  	// Check that the server is running in at least one mode.
   243  	if !(config.Server.Enabled || config.Client.Enabled) {
   244  		c.Ui.Error("Must specify either server, client or dev mode for the agent.")
   245  		return nil
   246  	}
   247  
   248  	// Verify the paths are absolute.
   249  	dirs := map[string]string{
   250  		"data-dir":  config.DataDir,
   251  		"alloc-dir": config.Client.AllocDir,
   252  		"state-dir": config.Client.StateDir,
   253  	}
   254  	for k, dir := range dirs {
   255  		if dir == "" {
   256  			continue
   257  		}
   258  
   259  		if !filepath.IsAbs(dir) {
   260  			c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir))
   261  			return nil
   262  		}
   263  	}
   264  
   265  	// Ensure that we have the directories we neet to run.
   266  	if config.Server.Enabled && config.DataDir == "" {
   267  		c.Ui.Error("Must specify data directory")
   268  		return nil
   269  	}
   270  
   271  	// The config is valid if the top-level data-dir is set or if both
   272  	// alloc-dir and state-dir are set.
   273  	if config.Client.Enabled && config.DataDir == "" {
   274  		if config.Client.AllocDir == "" || config.Client.StateDir == "" {
   275  			c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.")
   276  			return nil
   277  		}
   278  	}
   279  
   280  	// Check the bootstrap flags
   281  	if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
   282  		c.Ui.Error("Bootstrap requires server mode to be enabled")
   283  		return nil
   284  	}
   285  	if config.Server.BootstrapExpect == 1 {
   286  		c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
   287  	}
   288  
   289  	return config
   290  }
   291  
   292  // setupLoggers is used to setup the logGate, logWriter, and our logOutput
   293  func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) {
   294  	// Setup logging. First create the gated log writer, which will
   295  	// store logs until we're ready to show them. Then create the level
   296  	// filter, filtering logs of the specified level.
   297  	logGate := &gatedwriter.Writer{
   298  		Writer: &cli.UiWriter{Ui: c.Ui},
   299  	}
   300  
   301  	c.logFilter = LevelFilter()
   302  	c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel))
   303  	c.logFilter.Writer = logGate
   304  	if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) {
   305  		c.Ui.Error(fmt.Sprintf(
   306  			"Invalid log level: %s. Valid log levels are: %v",
   307  			c.logFilter.MinLevel, c.logFilter.Levels))
   308  		return nil, nil, nil
   309  	}
   310  
   311  	// Check if syslog is enabled
   312  	var syslog io.Writer
   313  	if config.EnableSyslog {
   314  		l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
   315  		if err != nil {
   316  			c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
   317  			return nil, nil, nil
   318  		}
   319  		syslog = &SyslogWrapper{l, c.logFilter}
   320  	}
   321  
   322  	// Create a log writer, and wrap a logOutput around it
   323  	logWriter := NewLogWriter(512)
   324  	var logOutput io.Writer
   325  	if syslog != nil {
   326  		logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
   327  	} else {
   328  		logOutput = io.MultiWriter(c.logFilter, logWriter)
   329  	}
   330  	c.logOutput = logOutput
   331  	log.SetOutput(logOutput)
   332  	return logGate, logWriter, logOutput
   333  }
   334  
   335  // setupAgent is used to start the agent and various interfaces
   336  func (c *Command) setupAgent(config *Config, logOutput io.Writer) error {
   337  	c.Ui.Output("Starting Nomad agent...")
   338  	agent, err := NewAgent(config, logOutput)
   339  	if err != nil {
   340  		c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err))
   341  		return err
   342  	}
   343  	c.agent = agent
   344  
   345  	// Enable the SCADA integration
   346  	if err := c.setupSCADA(config); err != nil {
   347  		agent.Shutdown()
   348  		c.Ui.Error(fmt.Sprintf("Error starting SCADA: %s", err))
   349  		return err
   350  	}
   351  
   352  	// Setup the HTTP server
   353  	http, err := NewHTTPServer(agent, config, logOutput)
   354  	if err != nil {
   355  		agent.Shutdown()
   356  		c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err))
   357  		return err
   358  	}
   359  	c.httpServer = http
   360  
   361  	// Setup update checking
   362  	if !config.DisableUpdateCheck {
   363  		version := config.Version
   364  		if config.VersionPrerelease != "" {
   365  			version += fmt.Sprintf("-%s", config.VersionPrerelease)
   366  		}
   367  		updateParams := &checkpoint.CheckParams{
   368  			Product: "nomad",
   369  			Version: version,
   370  		}
   371  		if !config.DisableAnonymousSignature {
   372  			updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature")
   373  		}
   374  
   375  		// Schedule a periodic check with expected interval of 24 hours
   376  		checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults)
   377  
   378  		// Do an immediate check within the next 30 seconds
   379  		go func() {
   380  			time.Sleep(lib.RandomStagger(30 * time.Second))
   381  			c.checkpointResults(checkpoint.Check(updateParams))
   382  		}()
   383  	}
   384  	return nil
   385  }
   386  
   387  // checkpointResults is used to handler periodic results from our update checker
   388  func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) {
   389  	if err != nil {
   390  		c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err))
   391  		return
   392  	}
   393  	if results.Outdated {
   394  		versionStr := c.Version
   395  		if c.VersionPrerelease != "" {
   396  			versionStr += fmt.Sprintf("-%s", c.VersionPrerelease)
   397  		}
   398  
   399  		c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, versionStr))
   400  	}
   401  	for _, alert := range results.Alerts {
   402  		switch alert.Level {
   403  		case "info":
   404  			c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   405  		default:
   406  			c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   407  		}
   408  	}
   409  }
   410  
   411  func (c *Command) Run(args []string) int {
   412  	c.Ui = &cli.PrefixedUi{
   413  		OutputPrefix: "==> ",
   414  		InfoPrefix:   "    ",
   415  		ErrorPrefix:  "==> ",
   416  		Ui:           c.Ui,
   417  	}
   418  
   419  	// Parse our configs
   420  	c.args = args
   421  	config := c.readConfig()
   422  	if config == nil {
   423  		return 1
   424  	}
   425  
   426  	// Setup the log outputs
   427  	logGate, _, logOutput := c.setupLoggers(config)
   428  	if logGate == nil {
   429  		return 1
   430  	}
   431  
   432  	// Log config files
   433  	if len(config.Files) > 0 {
   434  		c.Ui.Info(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", ")))
   435  	} else {
   436  		c.Ui.Info("No configuration files loaded")
   437  	}
   438  
   439  	// Initialize the telemetry
   440  	if err := c.setupTelemetry(config); err != nil {
   441  		c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
   442  		return 1
   443  	}
   444  
   445  	// Create the agent
   446  	if err := c.setupAgent(config, logOutput); err != nil {
   447  		logGate.Flush()
   448  		return 1
   449  	}
   450  	defer c.agent.Shutdown()
   451  
   452  	// Check and shut down the SCADA listeners at the end
   453  	defer func() {
   454  		if c.httpServer != nil {
   455  			c.httpServer.Shutdown()
   456  		}
   457  		if c.scadaHttp != nil {
   458  			c.scadaHttp.Shutdown()
   459  		}
   460  		if c.scadaProvider != nil {
   461  			c.scadaProvider.Shutdown()
   462  		}
   463  	}()
   464  
   465  	// Join startup nodes if specified
   466  	if err := c.startupJoin(config); err != nil {
   467  		c.Ui.Error(err.Error())
   468  		return 1
   469  	}
   470  
   471  	// Compile agent information for output later
   472  	info := make(map[string]string)
   473  	info["version"] = fmt.Sprintf("%s%s", config.Version, config.VersionPrerelease)
   474  	info["client"] = strconv.FormatBool(config.Client.Enabled)
   475  	info["log level"] = config.LogLevel
   476  	info["server"] = strconv.FormatBool(config.Server.Enabled)
   477  	info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter)
   478  	if config.Atlas != nil && config.Atlas.Infrastructure != "" {
   479  		info["atlas"] = fmt.Sprintf("(Infrastructure: '%s' Join: %v)",
   480  			config.Atlas.Infrastructure, config.Atlas.Join)
   481  	} else {
   482  		info["atlas"] = "<disabled>"
   483  	}
   484  
   485  	// Sort the keys for output
   486  	infoKeys := make([]string, 0, len(info))
   487  	for key := range info {
   488  		infoKeys = append(infoKeys, key)
   489  	}
   490  	sort.Strings(infoKeys)
   491  
   492  	// Agent configuration output
   493  	padding := 18
   494  	c.Ui.Output("Nomad agent configuration:\n")
   495  	for _, k := range infoKeys {
   496  		c.Ui.Info(fmt.Sprintf(
   497  			"%s%s: %s",
   498  			strings.Repeat(" ", padding-len(k)),
   499  			strings.Title(k),
   500  			info[k]))
   501  	}
   502  	c.Ui.Output("")
   503  
   504  	// Output the header that the server has started
   505  	c.Ui.Output("Nomad agent started! Log data will stream in below:\n")
   506  
   507  	// Enable log streaming
   508  	logGate.Flush()
   509  
   510  	// Start retry join process
   511  	c.retryJoinErrCh = make(chan struct{})
   512  	go c.retryJoin(config)
   513  
   514  	// Wait for exit
   515  	return c.handleSignals(config)
   516  }
   517  
   518  // handleSignals blocks until we get an exit-causing signal
   519  func (c *Command) handleSignals(config *Config) int {
   520  	signalCh := make(chan os.Signal, 4)
   521  	signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
   522  
   523  	// Wait for a signal
   524  WAIT:
   525  	var sig os.Signal
   526  	select {
   527  	case s := <-signalCh:
   528  		sig = s
   529  	case <-c.ShutdownCh:
   530  		sig = os.Interrupt
   531  	case <-c.retryJoinErrCh:
   532  		return 1
   533  	}
   534  	c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig))
   535  
   536  	// Skip any SIGPIPE signal (See issue #1798)
   537  	if sig == syscall.SIGPIPE {
   538  		goto WAIT
   539  	}
   540  
   541  	// Check if this is a SIGHUP
   542  	if sig == syscall.SIGHUP {
   543  		if conf := c.handleReload(config); conf != nil {
   544  			*config = *conf
   545  		}
   546  		goto WAIT
   547  	}
   548  
   549  	// Check if we should do a graceful leave
   550  	graceful := false
   551  	if sig == os.Interrupt && config.LeaveOnInt {
   552  		graceful = true
   553  	} else if sig == syscall.SIGTERM && config.LeaveOnTerm {
   554  		graceful = true
   555  	}
   556  
   557  	// Bail fast if not doing a graceful leave
   558  	if !graceful {
   559  		return 1
   560  	}
   561  
   562  	// Attempt a graceful leave
   563  	gracefulCh := make(chan struct{})
   564  	c.Ui.Output("Gracefully shutting down agent...")
   565  	go func() {
   566  		if err := c.agent.Leave(); err != nil {
   567  			c.Ui.Error(fmt.Sprintf("Error: %s", err))
   568  			return
   569  		}
   570  		close(gracefulCh)
   571  	}()
   572  
   573  	// Wait for leave or another signal
   574  	select {
   575  	case <-signalCh:
   576  		return 1
   577  	case <-time.After(gracefulTimeout):
   578  		return 1
   579  	case <-gracefulCh:
   580  		return 0
   581  	}
   582  }
   583  
   584  // handleReload is invoked when we should reload our configs, e.g. SIGHUP
   585  func (c *Command) handleReload(config *Config) *Config {
   586  	c.Ui.Output("Reloading configuration...")
   587  	newConf := c.readConfig()
   588  	if newConf == nil {
   589  		c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
   590  		return config
   591  	}
   592  
   593  	// Change the log level
   594  	minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel))
   595  	if ValidateLevelFilter(minLevel, c.logFilter) {
   596  		c.logFilter.SetMinLevel(minLevel)
   597  	} else {
   598  		c.Ui.Error(fmt.Sprintf(
   599  			"Invalid log level: %s. Valid log levels are: %v",
   600  			minLevel, c.logFilter.Levels))
   601  
   602  		// Keep the current log level
   603  		newConf.LogLevel = config.LogLevel
   604  	}
   605  
   606  	if s := c.agent.Server(); s != nil {
   607  		sconf, err := convertServerConfig(newConf, c.logOutput)
   608  		if err != nil {
   609  			c.agent.logger.Printf("[ERR] agent: failed to convert server config: %v", err)
   610  		} else {
   611  			if err := s.Reload(sconf); err != nil {
   612  				c.agent.logger.Printf("[ERR] agent: reloading server config failed: %v", err)
   613  			}
   614  		}
   615  	}
   616  
   617  	return newConf
   618  }
   619  
   620  // setupTelemetry is used ot setup the telemetry sub-systems
   621  func (c *Command) setupTelemetry(config *Config) error {
   622  	/* Setup telemetry
   623  	Aggregate on 10 second intervals for 1 minute. Expose the
   624  	metrics over stderr when there is a SIGUSR1 received.
   625  	*/
   626  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   627  	metrics.DefaultInmemSignal(inm)
   628  
   629  	var telConfig *Telemetry
   630  	if config.Telemetry == nil {
   631  		telConfig = &Telemetry{}
   632  	} else {
   633  		telConfig = config.Telemetry
   634  	}
   635  
   636  	metricsConf := metrics.DefaultConfig("nomad")
   637  	metricsConf.EnableHostname = !telConfig.DisableHostname
   638  	if telConfig.UseNodeName {
   639  		metricsConf.HostName = config.NodeName
   640  		metricsConf.EnableHostname = true
   641  	}
   642  
   643  	// Configure the statsite sink
   644  	var fanout metrics.FanoutSink
   645  	if telConfig.StatsiteAddr != "" {
   646  		sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
   647  		if err != nil {
   648  			return err
   649  		}
   650  		fanout = append(fanout, sink)
   651  	}
   652  
   653  	// Configure the statsd sink
   654  	if telConfig.StatsdAddr != "" {
   655  		sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
   656  		if err != nil {
   657  			return err
   658  		}
   659  		fanout = append(fanout, sink)
   660  	}
   661  
   662  	// Configure the datadog sink
   663  	if telConfig.DataDogAddr != "" {
   664  		sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName)
   665  		if err != nil {
   666  			return err
   667  		}
   668  		fanout = append(fanout, sink)
   669  	}
   670  
   671  	// Configure the Circonus sink
   672  	if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" {
   673  		cfg := &circonus.Config{}
   674  		cfg.Interval = telConfig.CirconusSubmissionInterval
   675  		cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken
   676  		cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp
   677  		cfg.CheckManager.API.URL = telConfig.CirconusAPIURL
   678  		cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL
   679  		cfg.CheckManager.Check.ID = telConfig.CirconusCheckID
   680  		cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation
   681  		cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID
   682  		cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag
   683  		cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags
   684  		cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName
   685  		cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID
   686  		cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag
   687  
   688  		if cfg.CheckManager.Check.DisplayName == "" {
   689  			cfg.CheckManager.Check.DisplayName = "Nomad"
   690  		}
   691  
   692  		if cfg.CheckManager.API.TokenApp == "" {
   693  			cfg.CheckManager.API.TokenApp = "nomad"
   694  		}
   695  
   696  		if cfg.CheckManager.Check.SearchTag == "" {
   697  			cfg.CheckManager.Check.SearchTag = "service:nomad"
   698  		}
   699  
   700  		sink, err := circonus.NewCirconusSink(cfg)
   701  		if err != nil {
   702  			return err
   703  		}
   704  		sink.Start()
   705  		fanout = append(fanout, sink)
   706  	}
   707  
   708  	// Initialize the global sink
   709  	if len(fanout) > 0 {
   710  		fanout = append(fanout, inm)
   711  		metrics.NewGlobal(metricsConf, fanout)
   712  	} else {
   713  		metricsConf.EnableHostname = false
   714  		metrics.NewGlobal(metricsConf, inm)
   715  	}
   716  	return nil
   717  }
   718  
   719  // setupSCADA is used to start a new SCADA provider and listener,
   720  // replacing any existing listeners.
   721  func (c *Command) setupSCADA(config *Config) error {
   722  	// Shut down existing SCADA listeners
   723  	if c.scadaProvider != nil {
   724  		c.scadaProvider.Shutdown()
   725  	}
   726  	if c.scadaHttp != nil {
   727  		c.scadaHttp.Shutdown()
   728  	}
   729  
   730  	// No-op if we don't have an infrastructure
   731  	if config.Atlas == nil || config.Atlas.Infrastructure == "" {
   732  		return nil
   733  	}
   734  
   735  	// Create the new provider and listener
   736  	c.Ui.Output("Connecting to Atlas: " + config.Atlas.Infrastructure)
   737  
   738  	scadaConfig := &scada.Config{
   739  		Service:      "nomad",
   740  		Version:      fmt.Sprintf("%s%s", config.Version, config.VersionPrerelease),
   741  		ResourceType: "nomad-cluster",
   742  		Meta: map[string]string{
   743  			"auto-join":  strconv.FormatBool(config.Atlas.Join),
   744  			"region":     config.Region,
   745  			"datacenter": config.Datacenter,
   746  			"client":     strconv.FormatBool(config.Client != nil && config.Client.Enabled),
   747  			"server":     strconv.FormatBool(config.Server != nil && config.Server.Enabled),
   748  		},
   749  		Atlas: scada.AtlasConfig{
   750  			Endpoint:       config.Atlas.Endpoint,
   751  			Infrastructure: config.Atlas.Infrastructure,
   752  			Token:          config.Atlas.Token,
   753  		},
   754  	}
   755  
   756  	provider, list, err := scada.NewHTTPProvider(scadaConfig, c.logOutput)
   757  	if err != nil {
   758  		return err
   759  	}
   760  	c.scadaProvider = provider
   761  	c.scadaHttp = newScadaHttp(c.agent, list)
   762  	return nil
   763  }
   764  
   765  func (c *Command) startupJoin(config *Config) error {
   766  	if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
   767  		return nil
   768  	}
   769  
   770  	c.Ui.Output("Joining cluster...")
   771  	n, err := c.agent.server.Join(config.Server.StartJoin)
   772  	if err != nil {
   773  		return err
   774  	}
   775  
   776  	c.Ui.Info(fmt.Sprintf("Join completed. Synced with %d initial agents", n))
   777  	return nil
   778  }
   779  
   780  // retryJoin is used to handle retrying a join until it succeeds or all retries
   781  // are exhausted.
   782  func (c *Command) retryJoin(config *Config) {
   783  	if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
   784  		return
   785  	}
   786  
   787  	logger := c.agent.logger
   788  	logger.Printf("[INFO] agent: Joining cluster...")
   789  
   790  	attempt := 0
   791  	for {
   792  		n, err := c.agent.server.Join(config.Server.RetryJoin)
   793  		if err == nil {
   794  			logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
   795  			return
   796  		}
   797  
   798  		attempt++
   799  		if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
   800  			logger.Printf("[ERR] agent: max join retry exhausted, exiting")
   801  			close(c.retryJoinErrCh)
   802  			return
   803  		}
   804  
   805  		logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
   806  			config.Server.RetryInterval)
   807  		time.Sleep(config.Server.retryInterval)
   808  	}
   809  }
   810  
   811  func (c *Command) Synopsis() string {
   812  	return "Runs a Nomad agent"
   813  }
   814  
   815  func (c *Command) Help() string {
   816  	helpText := `
   817  Usage: nomad agent [options]
   818  
   819    Starts the Nomad agent and runs until an interrupt is received.
   820    The agent may be a client and/or server.
   821  
   822    The Nomad agent's configuration primarily comes from the config
   823    files used, but a subset of the options may also be passed directly
   824    as CLI arguments, listed below.
   825  
   826  General Options (clients and servers):
   827  
   828    -bind=<addr>
   829      The address the agent will bind to for all of its various network
   830      services. The individual services that run bind to individual
   831      ports on this address. Defaults to the loopback 127.0.0.1.
   832  
   833    -config=<path>
   834      The path to either a single config file or a directory of config
   835      files to use for configuring the Nomad agent. This option may be
   836      specified multiple times. If multiple config files are used, the
   837      values from each will be merged together. During merging, values
   838      from files found later in the list are merged over values from
   839      previously parsed files.
   840  
   841    -data-dir=<path>
   842      The data directory used to store state and other persistent data.
   843      On client machines this is used to house allocation data such as
   844      downloaded artifacts used by drivers. On server nodes, the data
   845      dir is also used to store the replicated log.
   846  
   847    -dc=<datacenter>
   848      The name of the datacenter this Nomad agent is a member of. By
   849      default this is set to "dc1".
   850  
   851    -log-level=<level>
   852      Specify the verbosity level of Nomad's logs. Valid values include
   853      DEBUG, INFO, and WARN, in decreasing order of verbosity. The
   854      default is INFO.
   855  
   856    -node=<name>
   857      The name of the local agent. This name is used to identify the node
   858      in the cluster. The name must be unique per region. The default is
   859      the current hostname of the machine.
   860  
   861    -region=<region>
   862      Name of the region the Nomad agent will be a member of. By default
   863      this value is set to "global".
   864  
   865    -dev
   866      Start the agent in development mode. This enables a pre-configured
   867      dual-role agent (client + server) which is useful for developing
   868      or testing Nomad. No other configuration is required to start the
   869      agent in this mode.
   870  
   871  Server Options:
   872  
   873    -server
   874      Enable server mode for the agent. Agents in server mode are
   875      clustered together and handle the additional responsibility of
   876      leader election, data replication, and scheduling work onto
   877      eligible client nodes.
   878  
   879    -bootstrap-expect=<num>
   880      Configures the expected number of servers nodes to wait for before
   881      bootstrapping the cluster. Once <num> servers have joined eachother,
   882      Nomad initiates the bootstrap process.
   883  
   884    -encrypt=<key>
   885      Provides the gossip encryption key
   886  
   887    -join=<address>
   888      Address of an agent to join at start time. Can be specified
   889      multiple times.
   890  
   891    -retry-join=<address>
   892      Address of an agent to join at start time with retries enabled.
   893      Can be specified multiple times.
   894  
   895    -retry-max=<num>
   896      Maximum number of join attempts. Defaults to 0, which will retry
   897      indefinitely.
   898  
   899    -retry-interval=<dur>
   900      Time to wait between join attempts.
   901  
   902    -rejoin
   903      Ignore a previous leave and attempts to rejoin the cluster.
   904  
   905  Client Options:
   906  
   907    -client
   908      Enable client mode for the agent. Client mode enables a given node to be
   909      evaluated for allocations. If client mode is not enabled, no work will be
   910      scheduled to the agent.
   911  
   912    -state-dir
   913      The directory used to store state and other persistent data. If not
   914      specified a subdirectory under the "-data-dir" will be used.
   915  
   916    -alloc-dir
   917      The directory used to store allocation data such as downloaded artificats as
   918      well as data produced by tasks. If not specified, a subdirectory under the
   919      "-data-dir" will be used.
   920  
   921    -servers
   922      A list of known server addresses to connect to given as "host:port" and
   923      delimited by commas.
   924  
   925    -node-class
   926      Mark this node as a member of a node-class. This can be used to label
   927      similar node types.
   928  
   929    -meta
   930      User specified metadata to associated with the node. Each instance of -meta
   931      parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair
   932      to be added.
   933  
   934    -network-interface
   935      Forces the network fingerprinter to use the specified network interface.
   936  
   937    -network-speed
   938      The default speed for network interfaces in MBits if the link speed can not
   939      be determined dynamically.
   940  
   941  Vault Options:
   942  
   943    -vault-enabled
   944      Whether to enable or disable Vault integration.
   945  
   946    -vault-address=<addr>
   947      The address to communicate with Vault. This should be provided with the http://
   948      or https:// prefix.
   949  
   950    -vault-token=<token>
   951      The Vault token used to derive tokens from Vault on behalf of clients.
   952      This only needs to be set on Servers. Overrides the Vault token read from
   953      the VAULT_TOKEN environment variable.
   954  
   955    -vault-create-from-role=<role>
   956      The role name to create tokens for tasks from.
   957  
   958    -vault-allow-unauthenticated
   959      Whether to allow jobs to be sumbitted that request Vault Tokens but do not
   960      authentication. The flag only applies to Servers.
   961  
   962    -vault-ca-file=<path>
   963      The path to a PEM-encoded CA cert file to use to verify the Vault server SSL
   964      certificate.
   965  
   966    -vault-ca-path=<path>
   967      The path to a directory of PEM-encoded CA cert files to verify the Vault server
   968      certificate.
   969  
   970    -vault-cert-file=<token>
   971      The path to the certificate for Vault communication.
   972  
   973    -vault-key-file=<addr>
   974      The path to the private key for Vault communication.
   975  
   976    -vault-tls-skip-verify=<token>
   977      Enables or disables SSL certificate verification.
   978  
   979    -vault-tls-server-name=<token>
   980      Used to set the SNI host when connecting over TLS.
   981  
   982  Atlas Options:
   983  
   984    -atlas=<infrastructure>
   985      The Atlas infrastructure name to configure. This enables the SCADA
   986      client and attempts to connect Nomad to the HashiCorp Atlas service
   987      using the provided infrastructure name and token.
   988  
   989    -atlas-token=<token>
   990      The Atlas token to use when connecting to the HashiCorp Atlas
   991      service. This must be provided to successfully connect your Nomad
   992      agent to Atlas.
   993  
   994    -atlas-join
   995      Enable the Atlas join feature. This mode allows agents to discover
   996      eachother automatically using the SCADA integration features.
   997   `
   998  	return strings.TrimSpace(helpText)
   999  }