github.com/mongey/nomad@v0.5.2/command/agent/command.go (about)

     1  package agent
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"io"
     7  	"log"
     8  	"os"
     9  	"os/signal"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	"github.com/armon/go-metrics"
    19  	"github.com/armon/go-metrics/circonus"
    20  	"github.com/armon/go-metrics/datadog"
    21  	"github.com/hashicorp/consul/lib"
    22  	"github.com/hashicorp/go-checkpoint"
    23  	"github.com/hashicorp/go-syslog"
    24  	"github.com/hashicorp/logutils"
    25  	"github.com/hashicorp/nomad/helper/flag-helpers"
    26  	"github.com/hashicorp/nomad/helper/gated-writer"
    27  	"github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/scada-client/scada"
    29  	"github.com/mitchellh/cli"
    30  )
    31  
    32  // gracefulTimeout controls how long we wait before forcefully terminating
    33  const gracefulTimeout = 5 * time.Second
    34  
    35  // Command is a Command implementation that runs a Nomad agent.
    36  // The command will not end unless a shutdown message is sent on the
    37  // ShutdownCh. If two messages are sent on the ShutdownCh it will forcibly
    38  // exit.
    39  type Command struct {
    40  	Revision          string
    41  	Version           string
    42  	VersionPrerelease string
    43  	Ui                cli.Ui
    44  	ShutdownCh        <-chan struct{}
    45  
    46  	args           []string
    47  	agent          *Agent
    48  	httpServer     *HTTPServer
    49  	logFilter      *logutils.LevelFilter
    50  	logOutput      io.Writer
    51  	retryJoinErrCh chan struct{}
    52  
    53  	scadaProvider *scada.Provider
    54  	scadaHttp     *HTTPServer
    55  }
    56  
    57  func (c *Command) readConfig() *Config {
    58  	var dev bool
    59  	var configPath []string
    60  	var servers string
    61  	var meta []string
    62  
    63  	// Make a new, empty config.
    64  	cmdConfig := &Config{
    65  		Atlas:  &AtlasConfig{},
    66  		Client: &ClientConfig{},
    67  		Ports:  &Ports{},
    68  		Server: &ServerConfig{},
    69  		Vault:  &config.VaultConfig{},
    70  	}
    71  
    72  	flags := flag.NewFlagSet("agent", flag.ContinueOnError)
    73  	flags.Usage = func() { c.Ui.Error(c.Help()) }
    74  
    75  	// Role options
    76  	flags.BoolVar(&dev, "dev", false, "")
    77  	flags.BoolVar(&cmdConfig.Server.Enabled, "server", false, "")
    78  	flags.BoolVar(&cmdConfig.Client.Enabled, "client", false, "")
    79  
    80  	// Server-only options
    81  	flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
    82  	flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
    83  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
    84  	flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
    85  	flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
    86  	flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
    87  	flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
    88  
    89  	// Client-only options
    90  	flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
    91  	flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "")
    92  	flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "")
    93  	flags.StringVar(&servers, "servers", "", "")
    94  	flags.Var((*flaghelper.StringFlag)(&meta), "meta", "")
    95  	flags.StringVar(&cmdConfig.Client.NetworkInterface, "network-interface", "", "")
    96  	flags.IntVar(&cmdConfig.Client.NetworkSpeed, "network-speed", 0, "")
    97  
    98  	// General options
    99  	flags.Var((*flaghelper.StringFlag)(&configPath), "config", "config")
   100  	flags.StringVar(&cmdConfig.BindAddr, "bind", "", "")
   101  	flags.StringVar(&cmdConfig.Region, "region", "", "")
   102  	flags.StringVar(&cmdConfig.DataDir, "data-dir", "", "")
   103  	flags.StringVar(&cmdConfig.Datacenter, "dc", "", "")
   104  	flags.StringVar(&cmdConfig.LogLevel, "log-level", "", "")
   105  	flags.StringVar(&cmdConfig.NodeName, "node", "", "")
   106  
   107  	// Atlas options
   108  	flags.StringVar(&cmdConfig.Atlas.Infrastructure, "atlas", "", "")
   109  	flags.BoolVar(&cmdConfig.Atlas.Join, "atlas-join", false, "")
   110  	flags.StringVar(&cmdConfig.Atlas.Token, "atlas-token", "", "")
   111  
   112  	// Vault options
   113  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   114  		cmdConfig.Vault.Enabled = &b
   115  		return nil
   116  	}), "vault-enabled", "")
   117  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   118  		cmdConfig.Vault.AllowUnauthenticated = &b
   119  		return nil
   120  	}), "vault-allow-unauthenticated", "")
   121  	flags.StringVar(&cmdConfig.Vault.Token, "vault-token", "", "")
   122  	flags.StringVar(&cmdConfig.Vault.Addr, "vault-address", "", "")
   123  	flags.StringVar(&cmdConfig.Vault.TLSCaFile, "vault-ca-file", "", "")
   124  	flags.StringVar(&cmdConfig.Vault.TLSCaPath, "vault-ca-path", "", "")
   125  	flags.StringVar(&cmdConfig.Vault.TLSCertFile, "vault-cert-file", "", "")
   126  	flags.StringVar(&cmdConfig.Vault.TLSKeyFile, "vault-key-file", "", "")
   127  	flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
   128  		cmdConfig.Vault.TLSSkipVerify = &b
   129  		return nil
   130  	}), "vault-tls-skip-verify", "")
   131  	flags.StringVar(&cmdConfig.Vault.TLSServerName, "vault-tls-server-name", "", "")
   132  
   133  	if err := flags.Parse(c.args); err != nil {
   134  		return nil
   135  	}
   136  
   137  	// Split the servers.
   138  	if servers != "" {
   139  		cmdConfig.Client.Servers = strings.Split(servers, ",")
   140  	}
   141  
   142  	// Parse the meta flags.
   143  	metaLength := len(meta)
   144  	if metaLength != 0 {
   145  		cmdConfig.Client.Meta = make(map[string]string, metaLength)
   146  		for _, kv := range meta {
   147  			parts := strings.SplitN(kv, "=", 2)
   148  			if len(parts) != 2 {
   149  				c.Ui.Error(fmt.Sprintf("Error parsing Client.Meta value: %v", kv))
   150  				return nil
   151  			}
   152  
   153  			cmdConfig.Client.Meta[parts[0]] = parts[1]
   154  		}
   155  	}
   156  
   157  	// Load the configuration
   158  	var config *Config
   159  	if dev {
   160  		config = DevConfig()
   161  	} else {
   162  		config = DefaultConfig()
   163  	}
   164  	for _, path := range configPath {
   165  		current, err := LoadConfig(path)
   166  		if err != nil {
   167  			c.Ui.Error(fmt.Sprintf(
   168  				"Error loading configuration from %s: %s", path, err))
   169  			return nil
   170  		}
   171  
   172  		// The user asked us to load some config here but we didn't find any,
   173  		// so we'll complain but continue.
   174  		if current == nil || reflect.DeepEqual(current, &Config{}) {
   175  			c.Ui.Warn(fmt.Sprintf("No configuration loaded from %s", path))
   176  		}
   177  
   178  		if config == nil {
   179  			config = current
   180  		} else {
   181  			config = config.Merge(current)
   182  		}
   183  	}
   184  
   185  	// Ensure the sub-structs at least exist
   186  	if config.Atlas == nil {
   187  		config.Atlas = &AtlasConfig{}
   188  	}
   189  	if config.Client == nil {
   190  		config.Client = &ClientConfig{}
   191  	}
   192  	if config.Server == nil {
   193  		config.Server = &ServerConfig{}
   194  	}
   195  
   196  	// Merge any CLI options over config file options
   197  	config = config.Merge(cmdConfig)
   198  
   199  	// Set the version info
   200  	config.Revision = c.Revision
   201  	config.Version = c.Version
   202  	config.VersionPrerelease = c.VersionPrerelease
   203  
   204  	// Normalize binds, ports, addresses, and advertise
   205  	if err := config.normalizeAddrs(); err != nil {
   206  		c.Ui.Error(err.Error())
   207  		return nil
   208  	}
   209  
   210  	// Check to see if we should read the Vault token from the environment
   211  	if config.Vault.Token == "" {
   212  		if token, ok := os.LookupEnv("VAULT_TOKEN"); ok {
   213  			config.Vault.Token = token
   214  		}
   215  	}
   216  
   217  	if dev {
   218  		// Skip validation for dev mode
   219  		return config
   220  	}
   221  
   222  	if config.Server.EncryptKey != "" {
   223  		if _, err := config.Server.EncryptBytes(); err != nil {
   224  			c.Ui.Error(fmt.Sprintf("Invalid encryption key: %s", err))
   225  			return nil
   226  		}
   227  		keyfile := filepath.Join(config.DataDir, serfKeyring)
   228  		if _, err := os.Stat(keyfile); err == nil {
   229  			c.Ui.Warn("WARNING: keyring exists but -encrypt given, using keyring")
   230  		}
   231  	}
   232  
   233  	// Parse the RetryInterval.
   234  	dur, err := time.ParseDuration(config.Server.RetryInterval)
   235  	if err != nil {
   236  		c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
   237  		return nil
   238  	}
   239  	config.Server.retryInterval = dur
   240  
   241  	// Check that the server is running in at least one mode.
   242  	if !(config.Server.Enabled || config.Client.Enabled) {
   243  		c.Ui.Error("Must specify either server, client or dev mode for the agent.")
   244  		return nil
   245  	}
   246  
   247  	// Verify the paths are absolute.
   248  	dirs := map[string]string{
   249  		"data-dir":  config.DataDir,
   250  		"alloc-dir": config.Client.AllocDir,
   251  		"state-dir": config.Client.StateDir,
   252  	}
   253  	for k, dir := range dirs {
   254  		if dir == "" {
   255  			continue
   256  		}
   257  
   258  		if !filepath.IsAbs(dir) {
   259  			c.Ui.Error(fmt.Sprintf("%s must be given as an absolute path: got %v", k, dir))
   260  			return nil
   261  		}
   262  	}
   263  
   264  	// Ensure that we have the directories we neet to run.
   265  	if config.Server.Enabled && config.DataDir == "" {
   266  		c.Ui.Error("Must specify data directory")
   267  		return nil
   268  	}
   269  
   270  	// The config is valid if the top-level data-dir is set or if both
   271  	// alloc-dir and state-dir are set.
   272  	if config.Client.Enabled && config.DataDir == "" {
   273  		if config.Client.AllocDir == "" || config.Client.StateDir == "" {
   274  			c.Ui.Error("Must specify both the state and alloc dir if data-dir is omitted.")
   275  			return nil
   276  		}
   277  	}
   278  
   279  	// Check the bootstrap flags
   280  	if config.Server.BootstrapExpect > 0 && !config.Server.Enabled {
   281  		c.Ui.Error("Bootstrap requires server mode to be enabled")
   282  		return nil
   283  	}
   284  	if config.Server.BootstrapExpect == 1 {
   285  		c.Ui.Error("WARNING: Bootstrap mode enabled! Potentially unsafe operation.")
   286  	}
   287  
   288  	return config
   289  }
   290  
   291  // setupLoggers is used to setup the logGate, logWriter, and our logOutput
   292  func (c *Command) setupLoggers(config *Config) (*gatedwriter.Writer, *logWriter, io.Writer) {
   293  	// Setup logging. First create the gated log writer, which will
   294  	// store logs until we're ready to show them. Then create the level
   295  	// filter, filtering logs of the specified level.
   296  	logGate := &gatedwriter.Writer{
   297  		Writer: &cli.UiWriter{Ui: c.Ui},
   298  	}
   299  
   300  	c.logFilter = LevelFilter()
   301  	c.logFilter.MinLevel = logutils.LogLevel(strings.ToUpper(config.LogLevel))
   302  	c.logFilter.Writer = logGate
   303  	if !ValidateLevelFilter(c.logFilter.MinLevel, c.logFilter) {
   304  		c.Ui.Error(fmt.Sprintf(
   305  			"Invalid log level: %s. Valid log levels are: %v",
   306  			c.logFilter.MinLevel, c.logFilter.Levels))
   307  		return nil, nil, nil
   308  	}
   309  
   310  	// Check if syslog is enabled
   311  	var syslog io.Writer
   312  	if config.EnableSyslog {
   313  		l, err := gsyslog.NewLogger(gsyslog.LOG_NOTICE, config.SyslogFacility, "nomad")
   314  		if err != nil {
   315  			c.Ui.Error(fmt.Sprintf("Syslog setup failed: %v", err))
   316  			return nil, nil, nil
   317  		}
   318  		syslog = &SyslogWrapper{l, c.logFilter}
   319  	}
   320  
   321  	// Create a log writer, and wrap a logOutput around it
   322  	logWriter := NewLogWriter(512)
   323  	var logOutput io.Writer
   324  	if syslog != nil {
   325  		logOutput = io.MultiWriter(c.logFilter, logWriter, syslog)
   326  	} else {
   327  		logOutput = io.MultiWriter(c.logFilter, logWriter)
   328  	}
   329  	c.logOutput = logOutput
   330  	log.SetOutput(logOutput)
   331  	return logGate, logWriter, logOutput
   332  }
   333  
   334  // setupAgent is used to start the agent and various interfaces
   335  func (c *Command) setupAgent(config *Config, logOutput io.Writer) error {
   336  	c.Ui.Output("Starting Nomad agent...")
   337  	agent, err := NewAgent(config, logOutput)
   338  	if err != nil {
   339  		c.Ui.Error(fmt.Sprintf("Error starting agent: %s", err))
   340  		return err
   341  	}
   342  	c.agent = agent
   343  
   344  	// Enable the SCADA integration
   345  	if err := c.setupSCADA(config); err != nil {
   346  		agent.Shutdown()
   347  		c.Ui.Error(fmt.Sprintf("Error starting SCADA: %s", err))
   348  		return err
   349  	}
   350  
   351  	// Setup the HTTP server
   352  	http, err := NewHTTPServer(agent, config, logOutput)
   353  	if err != nil {
   354  		agent.Shutdown()
   355  		c.Ui.Error(fmt.Sprintf("Error starting http server: %s", err))
   356  		return err
   357  	}
   358  	c.httpServer = http
   359  
   360  	// Setup update checking
   361  	if !config.DisableUpdateCheck {
   362  		version := config.Version
   363  		if config.VersionPrerelease != "" {
   364  			version += fmt.Sprintf("-%s", config.VersionPrerelease)
   365  		}
   366  		updateParams := &checkpoint.CheckParams{
   367  			Product: "nomad",
   368  			Version: version,
   369  		}
   370  		if !config.DisableAnonymousSignature {
   371  			updateParams.SignatureFile = filepath.Join(config.DataDir, "checkpoint-signature")
   372  		}
   373  
   374  		// Schedule a periodic check with expected interval of 24 hours
   375  		checkpoint.CheckInterval(updateParams, 24*time.Hour, c.checkpointResults)
   376  
   377  		// Do an immediate check within the next 30 seconds
   378  		go func() {
   379  			time.Sleep(lib.RandomStagger(30 * time.Second))
   380  			c.checkpointResults(checkpoint.Check(updateParams))
   381  		}()
   382  	}
   383  	return nil
   384  }
   385  
   386  // checkpointResults is used to handler periodic results from our update checker
   387  func (c *Command) checkpointResults(results *checkpoint.CheckResponse, err error) {
   388  	if err != nil {
   389  		c.Ui.Error(fmt.Sprintf("Failed to check for updates: %v", err))
   390  		return
   391  	}
   392  	if results.Outdated {
   393  		versionStr := c.Version
   394  		if c.VersionPrerelease != "" {
   395  			versionStr += fmt.Sprintf("-%s", c.VersionPrerelease)
   396  		}
   397  
   398  		c.Ui.Error(fmt.Sprintf("Newer Nomad version available: %s (currently running: %s)", results.CurrentVersion, versionStr))
   399  	}
   400  	for _, alert := range results.Alerts {
   401  		switch alert.Level {
   402  		case "info":
   403  			c.Ui.Info(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   404  		default:
   405  			c.Ui.Error(fmt.Sprintf("Bulletin [%s]: %s (%s)", alert.Level, alert.Message, alert.URL))
   406  		}
   407  	}
   408  }
   409  
   410  func (c *Command) Run(args []string) int {
   411  	c.Ui = &cli.PrefixedUi{
   412  		OutputPrefix: "==> ",
   413  		InfoPrefix:   "    ",
   414  		ErrorPrefix:  "==> ",
   415  		Ui:           c.Ui,
   416  	}
   417  
   418  	// Parse our configs
   419  	c.args = args
   420  	config := c.readConfig()
   421  	if config == nil {
   422  		return 1
   423  	}
   424  
   425  	// Setup the log outputs
   426  	logGate, _, logOutput := c.setupLoggers(config)
   427  	if logGate == nil {
   428  		return 1
   429  	}
   430  
   431  	// Log config files
   432  	if len(config.Files) > 0 {
   433  		c.Ui.Info(fmt.Sprintf("Loaded configuration from %s", strings.Join(config.Files, ", ")))
   434  	} else {
   435  		c.Ui.Info("No configuration files loaded")
   436  	}
   437  
   438  	// Initialize the telemetry
   439  	if err := c.setupTelemetry(config); err != nil {
   440  		c.Ui.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
   441  		return 1
   442  	}
   443  
   444  	// Create the agent
   445  	if err := c.setupAgent(config, logOutput); err != nil {
   446  		return 1
   447  	}
   448  	defer c.agent.Shutdown()
   449  
   450  	// Check and shut down the SCADA listeners at the end
   451  	defer func() {
   452  		if c.httpServer != nil {
   453  			c.httpServer.Shutdown()
   454  		}
   455  		if c.scadaHttp != nil {
   456  			c.scadaHttp.Shutdown()
   457  		}
   458  		if c.scadaProvider != nil {
   459  			c.scadaProvider.Shutdown()
   460  		}
   461  	}()
   462  
   463  	// Join startup nodes if specified
   464  	if err := c.startupJoin(config); err != nil {
   465  		c.Ui.Error(err.Error())
   466  		return 1
   467  	}
   468  
   469  	// Compile agent information for output later
   470  	info := make(map[string]string)
   471  	info["version"] = fmt.Sprintf("%s%s", config.Version, config.VersionPrerelease)
   472  	info["client"] = strconv.FormatBool(config.Client.Enabled)
   473  	info["log level"] = config.LogLevel
   474  	info["server"] = strconv.FormatBool(config.Server.Enabled)
   475  	info["region"] = fmt.Sprintf("%s (DC: %s)", config.Region, config.Datacenter)
   476  	if config.Atlas != nil && config.Atlas.Infrastructure != "" {
   477  		info["atlas"] = fmt.Sprintf("(Infrastructure: '%s' Join: %v)",
   478  			config.Atlas.Infrastructure, config.Atlas.Join)
   479  	} else {
   480  		info["atlas"] = "<disabled>"
   481  	}
   482  
   483  	// Sort the keys for output
   484  	infoKeys := make([]string, 0, len(info))
   485  	for key := range info {
   486  		infoKeys = append(infoKeys, key)
   487  	}
   488  	sort.Strings(infoKeys)
   489  
   490  	// Agent configuration output
   491  	padding := 18
   492  	c.Ui.Output("Nomad agent configuration:\n")
   493  	for _, k := range infoKeys {
   494  		c.Ui.Info(fmt.Sprintf(
   495  			"%s%s: %s",
   496  			strings.Repeat(" ", padding-len(k)),
   497  			strings.Title(k),
   498  			info[k]))
   499  	}
   500  	c.Ui.Output("")
   501  
   502  	// Output the header that the server has started
   503  	c.Ui.Output("Nomad agent started! Log data will stream in below:\n")
   504  
   505  	// Enable log streaming
   506  	logGate.Flush()
   507  
   508  	// Start retry join process
   509  	c.retryJoinErrCh = make(chan struct{})
   510  	go c.retryJoin(config)
   511  
   512  	// Wait for exit
   513  	return c.handleSignals(config)
   514  }
   515  
   516  // handleSignals blocks until we get an exit-causing signal
   517  func (c *Command) handleSignals(config *Config) int {
   518  	signalCh := make(chan os.Signal, 4)
   519  	signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGPIPE)
   520  
   521  	// Wait for a signal
   522  WAIT:
   523  	var sig os.Signal
   524  	select {
   525  	case s := <-signalCh:
   526  		sig = s
   527  	case <-c.ShutdownCh:
   528  		sig = os.Interrupt
   529  	case <-c.retryJoinErrCh:
   530  		return 1
   531  	}
   532  	c.Ui.Output(fmt.Sprintf("Caught signal: %v", sig))
   533  
   534  	// Skip any SIGPIPE signal (See issue #1798)
   535  	if sig == syscall.SIGPIPE {
   536  		goto WAIT
   537  	}
   538  
   539  	// Check if this is a SIGHUP
   540  	if sig == syscall.SIGHUP {
   541  		if conf := c.handleReload(config); conf != nil {
   542  			*config = *conf
   543  		}
   544  		goto WAIT
   545  	}
   546  
   547  	// Check if we should do a graceful leave
   548  	graceful := false
   549  	if sig == os.Interrupt && config.LeaveOnInt {
   550  		graceful = true
   551  	} else if sig == syscall.SIGTERM && config.LeaveOnTerm {
   552  		graceful = true
   553  	}
   554  
   555  	// Bail fast if not doing a graceful leave
   556  	if !graceful {
   557  		return 1
   558  	}
   559  
   560  	// Attempt a graceful leave
   561  	gracefulCh := make(chan struct{})
   562  	c.Ui.Output("Gracefully shutting down agent...")
   563  	go func() {
   564  		if err := c.agent.Leave(); err != nil {
   565  			c.Ui.Error(fmt.Sprintf("Error: %s", err))
   566  			return
   567  		}
   568  		close(gracefulCh)
   569  	}()
   570  
   571  	// Wait for leave or another signal
   572  	select {
   573  	case <-signalCh:
   574  		return 1
   575  	case <-time.After(gracefulTimeout):
   576  		return 1
   577  	case <-gracefulCh:
   578  		return 0
   579  	}
   580  }
   581  
   582  // handleReload is invoked when we should reload our configs, e.g. SIGHUP
   583  func (c *Command) handleReload(config *Config) *Config {
   584  	c.Ui.Output("Reloading configuration...")
   585  	newConf := c.readConfig()
   586  	if newConf == nil {
   587  		c.Ui.Error(fmt.Sprintf("Failed to reload configs"))
   588  		return config
   589  	}
   590  
   591  	// Change the log level
   592  	minLevel := logutils.LogLevel(strings.ToUpper(newConf.LogLevel))
   593  	if ValidateLevelFilter(minLevel, c.logFilter) {
   594  		c.logFilter.SetMinLevel(minLevel)
   595  	} else {
   596  		c.Ui.Error(fmt.Sprintf(
   597  			"Invalid log level: %s. Valid log levels are: %v",
   598  			minLevel, c.logFilter.Levels))
   599  
   600  		// Keep the current log level
   601  		newConf.LogLevel = config.LogLevel
   602  	}
   603  	return newConf
   604  }
   605  
   606  // setupTelemetry is used ot setup the telemetry sub-systems
   607  func (c *Command) setupTelemetry(config *Config) error {
   608  	/* Setup telemetry
   609  	Aggregate on 10 second intervals for 1 minute. Expose the
   610  	metrics over stderr when there is a SIGUSR1 received.
   611  	*/
   612  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   613  	metrics.DefaultInmemSignal(inm)
   614  
   615  	var telConfig *Telemetry
   616  	if config.Telemetry == nil {
   617  		telConfig = &Telemetry{}
   618  	} else {
   619  		telConfig = config.Telemetry
   620  	}
   621  
   622  	metricsConf := metrics.DefaultConfig("nomad")
   623  	metricsConf.EnableHostname = !telConfig.DisableHostname
   624  
   625  	// Configure the statsite sink
   626  	var fanout metrics.FanoutSink
   627  	if telConfig.StatsiteAddr != "" {
   628  		sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
   629  		if err != nil {
   630  			return err
   631  		}
   632  		fanout = append(fanout, sink)
   633  	}
   634  
   635  	// Configure the statsd sink
   636  	if telConfig.StatsdAddr != "" {
   637  		sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
   638  		if err != nil {
   639  			return err
   640  		}
   641  		fanout = append(fanout, sink)
   642  	}
   643  
   644  	// Configure the datadog sink
   645  	if telConfig.DataDogAddr != "" {
   646  		sink, err := datadog.NewDogStatsdSink(telConfig.DataDogAddr, config.NodeName)
   647  		if err != nil {
   648  			return err
   649  		}
   650  		fanout = append(fanout, sink)
   651  	}
   652  
   653  	// Configure the Circonus sink
   654  	if telConfig.CirconusAPIToken != "" || telConfig.CirconusCheckSubmissionURL != "" {
   655  		cfg := &circonus.Config{}
   656  		cfg.Interval = telConfig.CirconusSubmissionInterval
   657  		cfg.CheckManager.API.TokenKey = telConfig.CirconusAPIToken
   658  		cfg.CheckManager.API.TokenApp = telConfig.CirconusAPIApp
   659  		cfg.CheckManager.API.URL = telConfig.CirconusAPIURL
   660  		cfg.CheckManager.Check.SubmissionURL = telConfig.CirconusCheckSubmissionURL
   661  		cfg.CheckManager.Check.ID = telConfig.CirconusCheckID
   662  		cfg.CheckManager.Check.ForceMetricActivation = telConfig.CirconusCheckForceMetricActivation
   663  		cfg.CheckManager.Check.InstanceID = telConfig.CirconusCheckInstanceID
   664  		cfg.CheckManager.Check.SearchTag = telConfig.CirconusCheckSearchTag
   665  		cfg.CheckManager.Check.Tags = telConfig.CirconusCheckTags
   666  		cfg.CheckManager.Check.DisplayName = telConfig.CirconusCheckDisplayName
   667  		cfg.CheckManager.Broker.ID = telConfig.CirconusBrokerID
   668  		cfg.CheckManager.Broker.SelectTag = telConfig.CirconusBrokerSelectTag
   669  
   670  		if cfg.CheckManager.API.TokenApp == "" {
   671  			cfg.CheckManager.API.TokenApp = "nomad"
   672  		}
   673  
   674  		if cfg.CheckManager.Check.SearchTag == "" {
   675  			cfg.CheckManager.Check.SearchTag = "service:nomad"
   676  		}
   677  
   678  		sink, err := circonus.NewCirconusSink(cfg)
   679  		if err != nil {
   680  			return err
   681  		}
   682  		sink.Start()
   683  		fanout = append(fanout, sink)
   684  	}
   685  
   686  	// Initialize the global sink
   687  	if len(fanout) > 0 {
   688  		fanout = append(fanout, inm)
   689  		metrics.NewGlobal(metricsConf, fanout)
   690  	} else {
   691  		metricsConf.EnableHostname = false
   692  		metrics.NewGlobal(metricsConf, inm)
   693  	}
   694  	return nil
   695  }
   696  
   697  // setupSCADA is used to start a new SCADA provider and listener,
   698  // replacing any existing listeners.
   699  func (c *Command) setupSCADA(config *Config) error {
   700  	// Shut down existing SCADA listeners
   701  	if c.scadaProvider != nil {
   702  		c.scadaProvider.Shutdown()
   703  	}
   704  	if c.scadaHttp != nil {
   705  		c.scadaHttp.Shutdown()
   706  	}
   707  
   708  	// No-op if we don't have an infrastructure
   709  	if config.Atlas == nil || config.Atlas.Infrastructure == "" {
   710  		return nil
   711  	}
   712  
   713  	// Create the new provider and listener
   714  	c.Ui.Output("Connecting to Atlas: " + config.Atlas.Infrastructure)
   715  
   716  	scadaConfig := &scada.Config{
   717  		Service:      "nomad",
   718  		Version:      fmt.Sprintf("%s%s", config.Version, config.VersionPrerelease),
   719  		ResourceType: "nomad-cluster",
   720  		Meta: map[string]string{
   721  			"auto-join":  strconv.FormatBool(config.Atlas.Join),
   722  			"region":     config.Region,
   723  			"datacenter": config.Datacenter,
   724  			"client":     strconv.FormatBool(config.Client != nil && config.Client.Enabled),
   725  			"server":     strconv.FormatBool(config.Server != nil && config.Server.Enabled),
   726  		},
   727  		Atlas: scada.AtlasConfig{
   728  			Endpoint:       config.Atlas.Endpoint,
   729  			Infrastructure: config.Atlas.Infrastructure,
   730  			Token:          config.Atlas.Token,
   731  		},
   732  	}
   733  
   734  	provider, list, err := scada.NewHTTPProvider(scadaConfig, c.logOutput)
   735  	if err != nil {
   736  		return err
   737  	}
   738  	c.scadaProvider = provider
   739  	c.scadaHttp = newScadaHttp(c.agent, list)
   740  	return nil
   741  }
   742  
   743  func (c *Command) startupJoin(config *Config) error {
   744  	if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
   745  		return nil
   746  	}
   747  
   748  	c.Ui.Output("Joining cluster...")
   749  	n, err := c.agent.server.Join(config.Server.StartJoin)
   750  	if err != nil {
   751  		return err
   752  	}
   753  
   754  	c.Ui.Info(fmt.Sprintf("Join completed. Synced with %d initial agents", n))
   755  	return nil
   756  }
   757  
   758  // retryJoin is used to handle retrying a join until it succeeds or all retries
   759  // are exhausted.
   760  func (c *Command) retryJoin(config *Config) {
   761  	if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
   762  		return
   763  	}
   764  
   765  	logger := c.agent.logger
   766  	logger.Printf("[INFO] agent: Joining cluster...")
   767  
   768  	attempt := 0
   769  	for {
   770  		n, err := c.agent.server.Join(config.Server.RetryJoin)
   771  		if err == nil {
   772  			logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
   773  			return
   774  		}
   775  
   776  		attempt++
   777  		if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
   778  			logger.Printf("[ERR] agent: max join retry exhausted, exiting")
   779  			close(c.retryJoinErrCh)
   780  			return
   781  		}
   782  
   783  		logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
   784  			config.Server.RetryInterval)
   785  		time.Sleep(config.Server.retryInterval)
   786  	}
   787  }
   788  
   789  func (c *Command) Synopsis() string {
   790  	return "Runs a Nomad agent"
   791  }
   792  
   793  func (c *Command) Help() string {
   794  	helpText := `
   795  Usage: nomad agent [options]
   796  
   797    Starts the Nomad agent and runs until an interrupt is received.
   798    The agent may be a client and/or server.
   799  
   800    The Nomad agent's configuration primarily comes from the config
   801    files used, but a subset of the options may also be passed directly
   802    as CLI arguments, listed below.
   803  
   804  General Options (clients and servers):
   805  
   806    -bind=<addr>
   807      The address the agent will bind to for all of its various network
   808      services. The individual services that run bind to individual
   809      ports on this address. Defaults to the loopback 127.0.0.1.
   810  
   811    -config=<path>
   812      The path to either a single config file or a directory of config
   813      files to use for configuring the Nomad agent. This option may be
   814      specified multiple times. If multiple config files are used, the
   815      values from each will be merged together. During merging, values
   816      from files found later in the list are merged over values from
   817      previously parsed files.
   818  
   819    -data-dir=<path>
   820      The data directory used to store state and other persistent data.
   821      On client machines this is used to house allocation data such as
   822      downloaded artifacts used by drivers. On server nodes, the data
   823      dir is also used to store the replicated log.
   824  
   825    -dc=<datacenter>
   826      The name of the datacenter this Nomad agent is a member of. By
   827      default this is set to "dc1".
   828  
   829    -log-level=<level>
   830      Specify the verbosity level of Nomad's logs. Valid values include
   831      DEBUG, INFO, and WARN, in decreasing order of verbosity. The
   832      default is INFO.
   833  
   834    -node=<name>
   835      The name of the local agent. This name is used to identify the node
   836      in the cluster. The name must be unique per region. The default is
   837      the current hostname of the machine.
   838  
   839    -region=<region>
   840      Name of the region the Nomad agent will be a member of. By default
   841      this value is set to "global".
   842  
   843    -dev
   844      Start the agent in development mode. This enables a pre-configured
   845      dual-role agent (client + server) which is useful for developing
   846      or testing Nomad. No other configuration is required to start the
   847      agent in this mode.
   848  
   849  Server Options:
   850  
   851    -server
   852      Enable server mode for the agent. Agents in server mode are
   853      clustered together and handle the additional responsibility of
   854      leader election, data replication, and scheduling work onto
   855      eligible client nodes.
   856  
   857    -bootstrap-expect=<num>
   858      Configures the expected number of servers nodes to wait for before
   859      bootstrapping the cluster. Once <num> servers have joined eachother,
   860      Nomad initiates the bootstrap process.
   861  
   862    -encrypt=<key>
   863      Provides the gossip encryption key
   864  
   865    -join=<address>
   866      Address of an agent to join at start time. Can be specified
   867      multiple times.
   868  
   869    -retry-join=<address>
   870      Address of an agent to join at start time with retries enabled.
   871      Can be specified multiple times.
   872  
   873    -retry-max=<num>
   874      Maximum number of join attempts. Defaults to 0, which will retry
   875      indefinitely.
   876  
   877    -retry-interval=<dur>
   878      Time to wait between join attempts.
   879  
   880    -rejoin
   881      Ignore a previous leave and attempts to rejoin the cluster.
   882  
   883  Client Options:
   884  
   885    -client
   886      Enable client mode for the agent. Client mode enables a given node to be
   887      evaluated for allocations. If client mode is not enabled, no work will be
   888      scheduled to the agent.
   889  
   890    -state-dir
   891      The directory used to store state and other persistent data. If not
   892      specified a subdirectory under the "-data-dir" will be used.
   893  
   894    -alloc-dir
   895      The directory used to store allocation data such as downloaded artificats as
   896      well as data produced by tasks. If not specified, a subdirectory under the
   897      "-data-dir" will be used.
   898  
   899    -servers
   900      A list of known server addresses to connect to given as "host:port" and
   901      delimited by commas.
   902  
   903    -node-class
   904      Mark this node as a member of a node-class. This can be used to label
   905      similar node types.
   906  
   907    -meta
   908      User specified metadata to associated with the node. Each instance of -meta
   909      parses a single KEY=VALUE pair. Repeat the meta flag for each key/value pair
   910      to be added.
   911  
   912    -network-interface
   913      Forces the network fingerprinter to use the specified network interface.
   914  
   915    -network-speed
   916      The default speed for network interfaces in MBits if the link speed can not
   917      be determined dynamically.
   918  
   919  Vault Options:
   920  
   921    -vault-enabled
   922      Whether to enable or disable Vault integration.
   923  
   924    -vault-address=<addr>
   925      The address to communicate with Vault. This should be provided with the http://
   926      or https:// prefix.
   927  
   928    -vault-token=<token>
   929      The Vault token used to derive tokens from Vault on behalf of clients.
   930      This only needs to be set on Servers. Overrides the Vault token read from
   931      the VAULT_TOKEN environment variable.
   932  
   933    -vault-allow-unauthenticated
   934      Whether to allow jobs to be sumbitted that request Vault Tokens but do not
   935      authentication. The flag only applies to Servers.
   936  
   937    -vault-ca-file=<path>
   938      The path to a PEM-encoded CA cert file to use to verify the Vault server SSL
   939      certificate.
   940  
   941    -vault-ca-path=<path>
   942      The path to a directory of PEM-encoded CA cert files to verify the Vault server
   943      certificate.
   944  
   945    -vault-cert-file=<token>
   946      The path to the certificate for Vault communication.
   947  
   948    -vault-key-file=<addr>
   949      The path to the private key for Vault communication.
   950  
   951    -vault-tls-skip-verify=<token>
   952      Enables or disables SSL certificate verification.
   953  
   954    -vault-tls-server-name=<token>
   955      Used to set the SNI host when connecting over TLS.
   956  
   957  Atlas Options:
   958  
   959    -atlas=<infrastructure>
   960      The Atlas infrastructure name to configure. This enables the SCADA
   961      client and attempts to connect Nomad to the HashiCorp Atlas service
   962      using the provided infrastructure name and token.
   963  
   964    -atlas-token=<token>
   965      The Atlas token to use when connecting to the HashiCorp Atlas
   966      service. This must be provided to successfully connect your Nomad
   967      agent to Atlas.
   968  
   969    -atlas-join
   970      Enable the Atlas join feature. This mode allows agents to discover
   971      eachother automatically using the SCADA integration features.
   972   `
   973  	return strings.TrimSpace(helpText)
   974  }