github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/command/agent/command.go (about)

     1  package agent
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"log"
     8  	"os"
     9  	"os/signal"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	"github.com/armon/go-metrics"
    19  	"github.com/armon/go-metrics/circonus"
    20  	"github.com/armon/go-metrics/datadog"
    21  	"github.com/hashicorp/consul/lib"
    22  	"github.com/hashicorp/go-checkpoint"
    23  	"github.com/hashicorp/go-syslog"
    24  	"github.com/hashicorp/logutils"
    25  	"github.com/hashicorp/nomad/helper/flag-helpers"
    26  	"github.com/hashicorp/nomad/helper/gated-writer"
    27  	"github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/scada-client/scada"
    29  	"github.com/mitchellh/cli"
    30  )
    31  
    32  // gracefulTimeout controls how long we wait before forcefully terminating
    33  const gracefulTimeout = 5 * time.Second
    34  
    35  // Command is a Command implementation that runs a Nomad agent.
    36  // The command will not end unless a shutdown message is sent on the
    37  // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
    38  // exit.
    39  type Command struct {
    40  	Revision          string
    41  	Version           string
    42  	VersionPrerelease string
    43  	Ui                cli.Ui
    44  	ShutdownCh        <-chan struct{}
    45  
    46  	args           []string
    47  	agent          *Agent
    48  	httpServer     *HTTPServer
    49  	logFilter      *logutils.LevelFilter
    50  	logOutput      io.Writer
    51  	retryJoinErrCh chan struct{}
    52  
    53  	scadaProvider *scada.Provider
    54  	scadaHttp     *HTTPServer
    55  }
    56  
    57  func (c *Command) readConfig() *Config {
    58  	var dev bool
    59  	var configPath []string
    60  	var servers string
    61  	var meta []string
    62  
    63  	// Make a new, empty config.
    64  	cmdConfig := &Config{
    65  		Atlas:  &AtlasConfig{},
    66  		Client: &ClientConfig{},
    67  		Ports:  &Ports{},
    68  		Server: &ServerConfig{},
    69  		Vault:  &config.VaultConfig{},
    70  	}
    71  
    72  	flags := flag.NewFlagSet("agent", flag.ContinueOnError)
    73  	flags.Usage = func() { c.Ui.Error(c.Help()) }
    74  
    75  	// Role options
    76  	flags.BoolVar(&dev, "dev", false, "")
    77  	flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
    78  	flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
    79  
    80  	// Server-only options
    81  	flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
    82  	flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
    83  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
    84  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
    85  	flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
    86  	flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
    87  	flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
    88  
    89  	// Client-only options
    90  	flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
    91  	flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "")
    92  	flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "")
    93  	flags.StringVar(&servers, "servers", "", "")
    94  	flags.Var((*flaghelper.StringFlag)(&meta), "meta", "")
    95  	flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "")
    96  	flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "")
    97  
    98  	// General options
    99  	flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config")
   100  	flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
   101  	flags.StringVar(&cmdConfig.Region, "region", "", "")
   102  	flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
   103  	flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
   104  	flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
   105  	flags.StringVar(&cmdConfig.NodeName, "node", "", "")
   106  
   107  	// Atlas options
   108  	flags.StringVar(&cmdConfig.Atlas.Infrastructure, "atlas", "", "")
   109  	flags.BoolVar(&cmdConfig.Atlas.Join, "atlas-join", false, "")
   110  	flags.StringVar(&cmdConfig.Atlas.Token, "atlas-token", "", "")
   111  
   112  	// Vault options
   113  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   114  		cmdConfig.Vault.Enabled = &b
   115  		return nil
   116  	}), "vault-enabled", "")
   117  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   118  		cmdConfig.Vault.AllowUnauthenticated = &b
   119  		return nil
   120  	}), "vault-allow-unauthenticated", "")
   121  	flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "")
   122  	flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "")
   123  	flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "")
   124  	flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "")
   125  	flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "")
   126  	flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "")
   127  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   128  		cmdConfig.Vault.TLSSkipVerify = &b
   129  		return nil
   130  	}), "vault-tls-skip-verify", "")
   131  	flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "")
   132  
   133  	if err := flags.Parse(c.args); err != nil {
   134  		return nil
   135  	}
   136  
   137  	// Split the servers.
   138  	if servers != "" {
   139  		cmdConfig.Client.Servers = strings.Split(servers, ",")
   140  	}
   141  
   142  	// Parse the meta flags.
   143  	metaLength := len(meta)
   144  	if metaLength != 0 {
   145  		cmdConfig.Client.Meta = make(map[string]string, metaLength)
   146  		for _, kv := range meta {
   147  			parts := strings.SplitN(kv, "=", 2)
   148  			if len(parts) != 2 {
   149  				c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv))
   150  				return nil
   151  			}
   152  
   153  			cmdConfig.Client.Meta[parts[0]] = parts[1]
   154  		}
   155  	}
   156  
   157  	// Load the configuration
   158  	var config *Config
   159  	if dev {
   160  		config = DevConfig()
   161  	} else {
   162  		config = DefaultConfig()
   163  	}
   164  	for _, path := range configPath {
   165  		current, err := LoadConfig(path)
   166  		if err != nil {
   167  			c.Ui.Error(fmt.Sprintf(
   168  				"Error loading configuration from %s: %s", path, err))
   169  			return nil
   170  		}
   171  
   172  		// The user asked us to load some config here but we didn't find any,
   173  		// so we'll complain but continue.
   174  		if current == nil || reflect.DeepEqual(current, &Config{}) {
   175  			c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path))
   176  		}
   177  
   178  		if config == nil {
   179  			config = current
   180  		} else {
   181  			config = config.Merge(current)
   182  		}
   183  	}
   184  
   185  	// Ensure the sub-structs at least exist
   186  	if config.Atlas == nil {
   187  		config.Atlas = &AtlasConfig{}
   188  	}
   189  	if config.Client == nil {
   190  		config.Client = &ClientConfig{}
   191  	}
   192  	if config.Server == nil {
   193  		config.Server = &ServerConfig{}
   194  	}
   195  
   196  	// Merge any CLI options over config file options
   197  	config = config.Merge(cmdConfig)
   198  
   199  	// Set the version info
   200  	config.Revision = c.Revision
   201  	config.Version = c.Version
   202  	config.VersionPrerelease = c.VersionPrerelease
   203  
   204  	if dev {
   205  		// Skip validation for dev mode
   206  		return config
   207  	}
   208  
   209  	if config.Server.EncryptKey != "" {
   210  		if _, err := config.Server.EncryptBytes(); err != nil {
   211  			c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
   212  			return nil
   213  		}
   214  		keyfile := filepath.Join(config.DataDir, serfKeyring)
   215  		if _, err := os.Stat(keyfile); err == nil {
   216  			c.Ui.Error("WARNING: keyring exists but -encrypt given, using keyring")
   217  		}
   218  	}
   219  
   220  	// Parse the RetryInterval.
   221  	dur, err := time.ParseDuration(config.Server.RetryInterval)
   222  	if err != nil {
   223  		c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
   224  		return nil
   225  	}
   226  	config.Server.retryInterval = dur
   227  
   228  	// Check that the server is running in at least one mode.
   229  	if !(config.Server.Enabled || config.Client.Enabled) {
   230  		c.Ui.Error("Must specify either server, client or dev mode for the agent.")
   231  		return nil
   232  	}
   233  
   234  	// Verify the paths are absolute.
   235  	dirs := map[string]string{
   236  		"data-dir":  config.DataDir,
   237  		"alloc-dir": config.Client.AllocDir,
   238  		"state-dir": config.Client.StateDir,
   239  	}
   240  	for k, dir := range dirs {
   241  		if dir == "" {
   242  			continue
   243  		}
   244  
   245  		if !filepath.IsAbs(dir) {
   246  			c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir))
   247  			return nil
   248  		}
   249  	}
   250  
   251  	// Ensure that we have the directories we neet to run.
   252  	if config.Server.Enabled && config.DataDir == "" {
   253  		c.Ui.Error("Must specify data directory")
   254  		return nil
   255  	}
   256  
   257  	// The config is valid if the top-level data-dir is set or if both
   258  	// alloc-dir and state-dir are set.
   259  	if config.Client.Enabled && config.DataDir == "" {
   260  		if config.Client.AllocDir == "" || config.Client.StateDir == "" {
   261  			c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.")
   262  			return nil
   263  		}
   264  	}
   265  
   266  	// Check the bootstrap flags
   267  	if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
   268  		c.Ui.Error("Bootstrap requires server mode to be enabled")
   269  		return nil
   270  	}
   271  	if config.Server.BootstrapExpect == 1 {
   272  		c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
   273  	}
   274  
   275  	// Check to see if we should read the Vault token from the environment
   276  	if config.Vault.Token == "" {
   277  		if token, ok := os.LookupEnv("VAULT_TOKEN"); ok {
   278  			config.Vault.Token = token
   279  		}
   280  	}
   281  
   282  	return config
   283  }
   284  
   285  // setupLoggers is used to setup the logGate, logWriter, and our logOutput
   286  func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) {
   287  	// Setup logging. First create the gated log writer, which will
   288  	// store logs until we're ready to show them. Then create the level
   289  	// filter, filtering logs of the specified level.
   290  	logGate := &gatedwriter.Writer{
   291  		Writer: &cli.UiWriter{Ui: c.Ui},
   292  	}
   293  
   294  	c.logFilter = LevelFilter()
   295  	c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel))
   296  	c.logFilter.Writer = logGate
   297  	if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) {
   298  		c.Ui.Error(fmt.Sprintf(
   299  			"Invalid log level: %s. Valid log levels are: %v",
   300  			c.logFilter.MinLevel, c.logFilter.Levels))
   301  		return nil, nil, nil
   302  	}
   303  
   304  	// Check if syslog is enabled
   305  	var syslog io.Writer
   306  	if config.EnableSyslog {
   307  		l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
   308  		if err != nil {
   309  			c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
   310  			return nil, nil, nil
   311  		}
   312  		syslog = &SyslogWrapper{l, c.logFilter}
   313  	}
   314  
   315  	// Create a log writer, and wrap a logOutput around it
   316  	logWriter := NewLogWriter(512)
   317  	var logOutput io.Writer
   318  	if syslog != nil {
   319  		logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
   320  	} else {
   321  		logOutput = io.MultiWriter(c.logFilter, logWriter)
   322  	}
   323  	c.logOutput = logOutput
   324  	log.SetOutput(logOutput)
   325  	return logGate, logWriter, logOutput
   326  }
   327  
   328  // setupAgent is used to start the agent and various interfaces
   329  func (c *Command) setupAgent(config *Config, logOutput io.Writer) error {
   330  	c.Ui.Output("Starting Nomad agent...")
   331  	agent, err := NewAgent(config, logOutput)
   332  	if err != nil {
   333  		c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err))
   334  		return err
   335  	}
   336  	c.agent = agent
   337  
   338  	// Enable the SCADA integration
   339  	if err := c.setupSCADA(config); err != nil {
   340  		agent.Shutdown()
   341  		c.Ui.Error(fmt.Sprintf("Error starting SCADA: %s", err))
   342  		return err
   343  	}
   344  
   345  	// Setup the HTTP server
   346  	http, err := NewHTTPServer(agent, config, logOutput)
   347  	if err != nil {
   348  		agent.Shutdown()
   349  		c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err))
   350  		return err
   351  	}
   352  	c.httpServer = http
   353  
   354  	// Setup update checking
   355  	if !config.DisableUpdateCheck {
   356  		version := config.Version
   357  		if config.VersionPrerelease != "" {
   358  			version += fmt.Sprintf("-%s", config.VersionPrerelease)
   359  		}
   360  		updateParams := &checkpoint.CheckParams{
   361  			Product: "nomad",
   362  			Version: version,
   363  		}
   364  		if !config.DisableAnonymousSignature {
   365  			updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature")
   366  		}
   367  
   368  		// Schedule a periodic check with expected interval of 24 hours
   369  		checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults)
   370  
   371  		// Do an immediate check within the next 30 seconds
   372  		go func() {
   373  			time.Sleep(lib.RandomStagger(30 * time.Second))
   374  			c.checkpointResults(checkpoint.Check(updateParams))
   375  		}()
   376  	}
   377  	return nil
   378  }
   379  
   380  // checkpointResults is used to handler periodic results from our update checker
   381  func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) {
   382  	if err != nil {
   383  		c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err))
   384  		return
   385  	}
   386  	if results.Outdated {
   387  		versionStr := c.Version
   388  		if c.VersionPrerelease != "" {
   389  			versionStr += fmt.Sprintf("-%s", c.VersionPrerelease)
   390  		}
   391  
   392  		c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, versionStr))
   393  	}
   394  	for _, alert := range results.Alerts {
   395  		switch alert.Level {
   396  		case "info":
   397  			c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   398  		default:
   399  			c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   400  		}
   401  	}
   402  }
   403  
   404  func (c *Command) Run(args []string) int {
   405  	c.Ui = &cli.PrefixedUi{
   406  		OutputPrefix: "==> ",
   407  		InfoPrefix:   "    ",
   408  		ErrorPrefix:  "==> ",
   409  		Ui:           c.Ui,
   410  	}
   411  
   412  	// Parse our configs
   413  	c.args = args
   414  	config := c.readConfig()
   415  	if config == nil {
   416  		return 1
   417  	}
   418  
   419  	// Setup the log outputs
   420  	logGate, _, logOutput := c.setupLoggers(config)
   421  	if logGate == nil {
   422  		return 1
   423  	}
   424  
   425  	// Log config files
   426  	if len(config.Files) > 0 {
   427  		c.Ui.Info(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", ")))
   428  	} else {
   429  		c.Ui.Info("No configuration files loaded")
   430  	}
   431  
   432  	// Initialize the telemetry
   433  	if err := c.setupTelemetry(config); err != nil {
   434  		c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
   435  		return 1
   436  	}
   437  
   438  	// Create the agent
   439  	if err := c.setupAgent(config, logOutput); err != nil {
   440  		return 1
   441  	}
   442  	defer c.agent.Shutdown()
   443  
   444  	// Check and shut down the SCADA listeners at the end
   445  	defer func() {
   446  		if c.httpServer != nil {
   447  			c.httpServer.Shutdown()
   448  		}
   449  		if c.scadaHttp != nil {
   450  			c.scadaHttp.Shutdown()
   451  		}
   452  		if c.scadaProvider != nil {
   453  			c.scadaProvider.Shutdown()
   454  		}
   455  	}()
   456  
   457  	// Join startup nodes if specified
   458  	if err := c.startupJoin(config); err != nil {
   459  		c.Ui.Error(err.Error())
   460  		return 1
   461  	}
   462  
   463  	// Compile agent information for output later
   464  	info := make(map[string]string)
   465  	info["version"] = fmt.Sprintf("%s%s", config.Version, config.VersionPrerelease)
   466  	info["client"] = strconv.FormatBool(config.Client.Enabled)
   467  	info["log level"] = config.LogLevel
   468  	info["server"] = strconv.FormatBool(config.Server.Enabled)
   469  	info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter)
   470  	if config.Atlas != nil && config.Atlas.Infrastructure != "" {
   471  		info["atlas"] = fmt.Sprintf("(Infrastructure: '%s' Join: %v)",
   472  			config.Atlas.Infrastructure, config.Atlas.Join)
   473  	} else {
   474  		info["atlas"] = "<disabled>"
   475  	}
   476  
   477  	// Sort the keys for output
   478  	infoKeys := make([]string, 0, len(info))
   479  	for key := range info {
   480  		infoKeys = append(infoKeys, key)
   481  	}
   482  	sort.Strings(infoKeys)
   483  
   484  	// Agent configuration output
   485  	padding := 18
   486  	c.Ui.Output("Nomad agent configuration:\n")
   487  	for _, k := range infoKeys {
   488  		c.Ui.Info(fmt.Sprintf(
   489  			"%s%s: %s",
   490  			strings.Repeat(" ", padding-len(k)),
   491  			strings.Title(k),
   492  			info[k]))
   493  	}
   494  	c.Ui.Output("")
   495  
   496  	// Output the header that the server has started
   497  	c.Ui.Output("Nomad agent started! Log data will stream in below:\n")
   498  
   499  	// Enable log streaming
   500  	logGate.Flush()
   501  
   502  	// Start retry join process
   503  	c.retryJoinErrCh = make(chan struct{})
   504  	go c.retryJoin(config)
   505  
   506  	// Wait for exit
   507  	return c.handleSignals(config)
   508  }
   509  
   510  // handleSignals blocks until we get an exit-causing signal
   511  func (c *Command) handleSignals(config *Config) int {
   512  	signalCh := make(chan os.Signal, 4)
   513  	signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
   514  
   515  	// Wait for a signal
   516  WAIT:
   517  	var sig os.Signal
   518  	select {
   519  	case s := <-signalCh:
   520  		sig = s
   521  	case <-c.ShutdownCh:
   522  		sig = os.Interrupt
   523  	case <-c.retryJoinErrCh:
   524  		return 1
   525  	}
   526  	c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig))
   527  
   528  	// Skip any SIGPIPE signal (See issue #1798)
   529  	if sig == syscall.SIGPIPE {
   530  		goto WAIT
   531  	}
   532  
   533  	// Check if this is a SIGHUP
   534  	if sig == syscall.SIGHUP {
   535  		if conf := c.handleReload(config); conf != nil {
   536  			*config = *conf
   537  		}
   538  		goto WAIT
   539  	}
   540  
   541  	// Check if we should do a graceful leave
   542  	graceful := false
   543  	if sig == os.Interrupt && config.LeaveOnInt {
   544  		graceful = true
   545  	} else if sig == syscall.SIGTERM && config.LeaveOnTerm {
   546  		graceful = true
   547  	}
   548  
   549  	// Bail fast if not doing a graceful leave
   550  	if !graceful {
   551  		return 1
   552  	}
   553  
   554  	// Attempt a graceful leave
   555  	gracefulCh := make(chan struct{})
   556  	c.Ui.Output("Gracefully shutting down agent...")
   557  	go func() {
   558  		if err := c.agent.Leave(); err != nil {
   559  			c.Ui.Error(fmt.Sprintf("Error: %s", err))
   560  			return
   561  		}
   562  		close(gracefulCh)
   563  	}()
   564  
   565  	// Wait for leave or another signal
   566  	select {
   567  	case <-signalCh:
   568  		return 1
   569  	case <-time.After(gracefulTimeout):
   570  		return 1
   571  	case <-gracefulCh:
   572  		return 0
   573  	}
   574  }
   575  
   576  // handleReload is invoked when we should reload our configs, e.g. SIGHUP
   577  func (c *Command) handleReload(config *Config) *Config {
   578  	c.Ui.Output("Reloading configuration...")
   579  	newConf := c.readConfig()
   580  	if newConf == nil {
   581  		c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
   582  		return config
   583  	}
   584  
   585  	// Change the log level
   586  	minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel))
   587  	if ValidateLevelFilter(minLevel, c.logFilter) {
   588  		c.logFilter.SetMinLevel(minLevel)
   589  	} else {
   590  		c.Ui.Error(fmt.Sprintf(
   591  			"Invalid log level: %s. Valid log levels are: %v",
   592  			minLevel, c.logFilter.Levels))
   593  
   594  		// Keep the current log level
   595  		newConf.LogLevel = config.LogLevel
   596  	}
   597  	return newConf
   598  }
   599  
   600  // setupTelemetry is used ot setup the telemetry sub-systems
   601  func (c *Command) setupTelemetry(config *Config) error {
   602  	/* Setup telemetry
   603  	Aggregate on 10 second intervals for 1 minute. Expose the
   604  	metrics over stderr when there is a SIGUSR1 received.
   605  	*/
   606  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   607  	metrics.DefaultInmemSignal(inm)
   608  
   609  	var telConfig *Telemetry
   610  	if config.Telemetry == nil {
   611  		telConfig = &Telemetry{}
   612  	} else {
   613  		telConfig = config.Telemetry
   614  	}
   615  
   616  	metricsConf := metrics.DefaultConfig("nomad")
   617  	metricsConf.EnableHostname = !telConfig.DisableHostname
   618  
   619  	// Configure the statsite sink
   620  	var fanout metrics.FanoutSink
   621  	if telConfig.StatsiteAddr != "" {
   622  		sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
   623  		if err != nil {
   624  			return err
   625  		}
   626  		fanout = append(fanout, sink)
   627  	}
   628  
   629  	// Configure the statsd sink
   630  	if telConfig.StatsdAddr != "" {
   631  		sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
   632  		if err != nil {
   633  			return err
   634  		}
   635  		fanout = append(fanout, sink)
   636  	}
   637  
   638  	// Configure the datadog sink
   639  	if telConfig.DataDogAddr != "" {
   640  		sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName)
   641  		if err != nil {
   642  			return err
   643  		}
   644  		fanout = append(fanout, sink)
   645  	}
   646  
   647  	// Configure the Circonus sink
   648  	if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" {
   649  		cfg := &circonus.Config{}
   650  		cfg.Interval = telConfig.CirconusSubmissionInterval
   651  		cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken
   652  		cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp
   653  		cfg.CheckManager.API.URL = telConfig.CirconusAPIURL
   654  		cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL
   655  		cfg.CheckManager.Check.ID = telConfig.CirconusCheckID
   656  		cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation
   657  		cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID
   658  		cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag
   659  		cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags
   660  		cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName
   661  		cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID
   662  		cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag
   663  
   664  		if cfg.CheckManager.API.TokenApp == "" {
   665  			cfg.CheckManager.API.TokenApp = "nomad"
   666  		}
   667  
   668  		if cfg.CheckManager.Check.SearchTag == "" {
   669  			cfg.CheckManager.Check.SearchTag = "service:nomad"
   670  		}
   671  
   672  		sink, err := circonus.NewCirconusSink(cfg)
   673  		if err != nil {
   674  			return err
   675  		}
   676  		sink.Start()
   677  		fanout = append(fanout, sink)
   678  	}
   679  
   680  	// Initialize the global sink
   681  	if len(fanout) > 0 {
   682  		fanout = append(fanout, inm)
   683  		metrics.NewGlobal(metricsConf, fanout)
   684  	} else {
   685  		metricsConf.EnableHostname = false
   686  		metrics.NewGlobal(metricsConf, inm)
   687  	}
   688  	return nil
   689  }
   690  
   691  // setupSCADA is used to start a new SCADA provider and listener,
   692  // replacing any existing listeners.
   693  func (c *Command) setupSCADA(config *Config) error {
   694  	// Shut down existing SCADA listeners
   695  	if c.scadaProvider != nil {
   696  		c.scadaProvider.Shutdown()
   697  	}
   698  	if c.scadaHttp != nil {
   699  		c.scadaHttp.Shutdown()
   700  	}
   701  
   702  	// No-op if we don't have an infrastructure
   703  	if config.Atlas == nil || config.Atlas.Infrastructure == "" {
   704  		return nil
   705  	}
   706  
   707  	// Create the new provider and listener
   708  	c.Ui.Output("Connecting to Atlas: " + config.Atlas.Infrastructure)
   709  
   710  	scadaConfig := &scada.Config{
   711  		Service:      "nomad",
   712  		Version:      fmt.Sprintf("%s%s", config.Version, config.VersionPrerelease),
   713  		ResourceType: "nomad-cluster",
   714  		Meta: map[string]string{
   715  			"auto-join":  strconv.FormatBool(config.Atlas.Join),
   716  			"region":     config.Region,
   717  			"datacenter": config.Datacenter,
   718  			"client":     strconv.FormatBool(config.Client != nil && config.Client.Enabled),
   719  			"server":     strconv.FormatBool(config.Server != nil && config.Server.Enabled),
   720  		},
   721  		Atlas: scada.AtlasConfig{
   722  			Endpoint:       config.Atlas.Endpoint,
   723  			Infrastructure: config.Atlas.Infrastructure,
   724  			Token:          config.Atlas.Token,
   725  		},
   726  	}
   727  
   728  	provider, list, err := scada.NewHTTPProvider(scadaConfig, c.logOutput)
   729  	if err != nil {
   730  		return err
   731  	}
   732  	c.scadaProvider = provider
   733  	c.scadaHttp = newScadaHttp(c.agent, list)
   734  	return nil
   735  }
   736  
   737  func (c *Command) startupJoin(config *Config) error {
   738  	if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
   739  		return nil
   740  	}
   741  
   742  	c.Ui.Output("Joining cluster...")
   743  	n, err := c.agent.server.Join(config.Server.StartJoin)
   744  	if err != nil {
   745  		return err
   746  	}
   747  
   748  	c.Ui.Info(fmt.Sprintf("Join completed. Synced with %d initial agents", n))
   749  	return nil
   750  }
   751  
   752  // retryJoin is used to handle retrying a join until it succeeds or all retries
   753  // are exhausted.
   754  func (c *Command) retryJoin(config *Config) {
   755  	if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
   756  		return
   757  	}
   758  
   759  	logger := c.agent.logger
   760  	logger.Printf("[INFO] agent: Joining cluster...")
   761  
   762  	attempt := 0
   763  	for {
   764  		n, err := c.agent.server.Join(config.Server.RetryJoin)
   765  		if err == nil {
   766  			logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
   767  			return
   768  		}
   769  
   770  		attempt++
   771  		if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
   772  			logger.Printf("[ERR] agent: max join retry exhausted, exiting")
   773  			close(c.retryJoinErrCh)
   774  			return
   775  		}
   776  
   777  		logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
   778  			config.Server.RetryInterval)
   779  		time.Sleep(config.Server.retryInterval)
   780  	}
   781  }
   782  
   783  func (c *Command) Synopsis() string {
   784  	return "Runs a Nomad agent"
   785  }
   786  
   787  func (c *Command) Help() string {
   788  	helpText := `
   789  Usage: nomad agent [options]
   790  
   791    Starts the Nomad agent and runs until an interrupt is received.
   792    The agent may be a client and/or server.
   793  
   794    The Nomad agent's configuration primarily comes from the config
   795    files used, but a subset of the options may also be passed directly
   796    as CLI arguments, listed below.
   797  
   798  General Options (clients and servers):
   799  
   800    -bind=<addr>
   801      The address the agent will bind to for all of its various network
   802      services. The individual services that run bind to individual
   803      ports on this address. Defaults to the loopback 127.0.0.1.
   804  
   805    -config=<path>
   806      The path to either a single config file or a directory of config
   807      files to use for configuring the Nomad agent. This option may be
   808      specified multiple times. If multiple config files are used, the
   809      values from each will be merged together. During merging, values
   810      from files found later in the list are merged over values from
   811      previously parsed files.
   812  
   813    -data-dir=<path>
   814      The data directory used to store state and other persistent data.
   815      On client machines this is used to house allocation data such as
   816      downloaded artifacts used by drivers. On server nodes, the data
   817      dir is also used to store the replicated log.
   818  
   819    -dc=<datacenter>
   820      The name of the datacenter this Nomad agent is a member of. By
   821      default this is set to "dc1".
   822  
   823    -log-level=<level>
   824      Specify the verbosity level of Nomad's logs. Valid values include
   825      DEBUG, INFO, and WARN, in decreasing order of verbosity. The
   826      default is INFO.
   827  
   828    -node=<name>
   829      The name of the local agent. This name is used to identify the node
   830      in the cluster. The name must be unique per region. The default is
   831      the current hostname of the machine.
   832  
   833    -region=<region>
   834      Name of the region the Nomad agent will be a member of. By default
   835      this value is set to "global".
   836  
   837    -dev
   838      Start the agent in development mode. This enables a pre-configured
   839      dual-role agent (client + server) which is useful for developing
   840      or testing Nomad. No other configuration is required to start the
   841      agent in this mode.
   842  
   843  Server Options:
   844  
   845    -server
   846      Enable server mode for the agent. Agents in server mode are
   847      clustered together and handle the additional responsibility of
   848      leader election, data replication, and scheduling work onto
   849      eligible client nodes.
   850  
   851    -bootstrap-expect=<num>
   852      Configures the expected number of servers nodes to wait for before
   853      bootstrapping the cluster. Once <num> servers have joined eachother,
   854      Nomad initiates the bootstrap process.
   855  
   856    -encrypt=<key>
   857      Provides the gossip encryption key
   858  
   859    -join=<address>
   860      Address of an agent to join at start time. Can be specified
   861      multiple times.
   862  
   863    -retry-join=<address>
   864      Address of an agent to join at start time with retries enabled.
   865      Can be specified multiple times.
   866  
   867    -retry-max=<num>
   868      Maximum number of join attempts. Defaults to 0, which will retry
   869      indefinitely.
   870  
   871    -retry-interval=<dur>
   872      Time to wait between join attempts.
   873  
   874    -rejoin
   875      Ignore a previous leave and attempts to rejoin the cluster.
   876  
   877  Client Options:
   878  
   879    -client
   880      Enable client mode for the agent. Client mode enables a given node to be
   881      evaluated for allocations. If client mode is not enabled, no work will be
   882      scheduled to the agent.
   883  
   884    -state-dir
   885      The directory used to store state and other persistent data. If not
   886      specified a subdirectory under the "-data-dir" will be used.
   887  
   888    -alloc-dir
   889      The directory used to store allocation data such as downloaded artificats as
   890      well as data produced by tasks. If not specified, a subdirectory under the
   891      "-data-dir" will be used.
   892  
   893    -servers
   894      A list of known server addresses to connect to given as "host:port" and
   895      delimited by commas.
   896  
   897    -node-class
   898      Mark this node as a member of a node-class. This can be used to label
   899      similar node types.
   900  
   901    -meta
   902      User specified metadata to associated with the node. Each instance of -meta
   903      parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair
   904      to be added.
   905  
   906    -network-interface
   907      Forces the network fingerprinter to use the specified network interface.
   908  
   909    -network-speed
   910      The default speed for network interfaces in MBits if the link speed can not
   911      be determined dynamically.
   912  
   913  Vault Options:
   914  
   915    -vault-enabled
   916      Whether to enable or disable Vault integration.
   917  
   918    -vault-address=<addr>
   919      The address to communicate with Vault. This should be provided with the http://
   920      or https:// prefix.
   921  
   922    -vault-token=<token>
   923      The Vault token used to derive tokens from Vault on behalf of clients.
   924      This only needs to be set on Servers. Overrides the Vault token read from
   925      the VAULT_TOKEN environment variable.
   926  
   927    -vault-allow-unauthenticated
   928      Whether to allow jobs to be sumbitted that request Vault Tokens but do not
   929      authentication. The flag only applies to Servers.
   930  
   931    -vault-ca-file=<path>
   932      The path to a PEM-encoded CA cert file to use to verify the Vault server SSL
   933      certificate.
   934  
   935    -vault-ca-path=<path>
   936      The path to a directory of PEM-encoded CA cert files to verify the Vault server
   937      certificate.
   938  
   939    -vault-cert-file=<token>
   940      The path to the certificate for Vault communication.
   941  
   942    -vault-key-file=<addr>
   943      The path to the private key for Vault communication.
   944  
   945    -vault-tls-skip-verify=<token>
   946      Enables or disables SSL certificate verification.
   947  
   948    -vault-tls-server-name=<token>
   949      Used to set the SNI host when connecting over TLS.
   950  
   951  Atlas Options:
   952  
   953    -atlas=<infrastructure>
   954      The Atlas infrastructure name to configure. This enables the SCADA
   955      client and attempts to connect Nomad to the HashiCorp Atlas service
   956      using the provided infrastructure name and token.
   957  
   958    -atlas-token=<token>
   959      The Atlas token to use when connecting to the HashiCorp Atlas
   960      service. This must be provided to successfully connect your Nomad
   961      agent to Atlas.
   962  
   963    -atlas-join
   964      Enable the Atlas join feature. This mode allows agents to discover
   965      eachother automatically using the SCADA integration features.
   966   `
   967  	return strings.TrimSpace(helpText)
   968  }