github.com/thomasobenaus/nomad@v0.11.1/command/agent/agent.go (about)

     1  package agent
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	golog "log"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/consul/lib"
    20  	log "github.com/hashicorp/go-hclog"
    21  	uuidparse "github.com/hashicorp/go-uuid"
    22  	"github.com/hashicorp/nomad/client"
    23  	clientconfig "github.com/hashicorp/nomad/client/config"
    24  	"github.com/hashicorp/nomad/client/state"
    25  	"github.com/hashicorp/nomad/command/agent/consul"
    26  	"github.com/hashicorp/nomad/command/agent/event"
    27  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    28  	"github.com/hashicorp/nomad/helper/uuid"
    29  	"github.com/hashicorp/nomad/nomad"
    30  	"github.com/hashicorp/nomad/nomad/structs"
    31  	"github.com/hashicorp/nomad/nomad/structs/config"
    32  	"github.com/hashicorp/raft"
    33  )
    34  
    35  const (
    36  	agentHttpCheckInterval  = 10 * time.Second
    37  	agentHttpCheckTimeout   = 5 * time.Second
    38  	serverRpcCheckInterval  = 10 * time.Second
    39  	serverRpcCheckTimeout   = 3 * time.Second
    40  	serverSerfCheckInterval = 10 * time.Second
    41  	serverSerfCheckTimeout  = 3 * time.Second
    42  
    43  	// roles used in identifying Consul entries for Nomad agents
    44  	consulRoleServer = "server"
    45  	consulRoleClient = "client"
    46  )
    47  
    48  // Agent is a long running daemon that is used to run both
    49  // clients and servers. Servers are responsible for managing
    50  // state and making scheduling decisions. Clients can be
    51  // scheduled to, and are responsible for interfacing with
    52  // servers to run allocations.
    53  type Agent struct {
    54  	config     *Config
    55  	configLock sync.Mutex
    56  
    57  	logger     log.InterceptLogger
    58  	auditor    event.Auditor
    59  	httpLogger log.Logger
    60  	logOutput  io.Writer
    61  
    62  	// consulService is Nomad's custom Consul client for managing services
    63  	// and checks.
    64  	consulService *consul.ServiceClient
    65  
    66  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    67  	consulCatalog consul.CatalogAPI
    68  
    69  	// consulACLs is Nomad's subset of Consul's ACL API Nomad uses.
    70  	consulACLs consul.ACLsAPI
    71  
    72  	// client is the launched Nomad Client. Can be nil if the agent isn't
    73  	// configured to run a client.
    74  	client *client.Client
    75  
    76  	// server is the launched Nomad Server. Can be nil if the agent isn't
    77  	// configured to run a server.
    78  	server *nomad.Server
    79  
    80  	// pluginLoader is used to load plugins
    81  	pluginLoader loader.PluginCatalog
    82  
    83  	// pluginSingletonLoader is a plugin loader that will returns singleton
    84  	// instances of the plugins.
    85  	pluginSingletonLoader loader.PluginCatalog
    86  
    87  	shutdown     bool
    88  	shutdownCh   chan struct{}
    89  	shutdownLock sync.Mutex
    90  
    91  	InmemSink *metrics.InmemSink
    92  }
    93  
    94  // NewAgent is used to create a new agent with the given configuration
    95  func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
    96  	a := &Agent{
    97  		config:     config,
    98  		logOutput:  logOutput,
    99  		shutdownCh: make(chan struct{}),
   100  		InmemSink:  inmem,
   101  	}
   102  
   103  	// Create the loggers
   104  	a.logger = logger
   105  	a.httpLogger = a.logger.ResetNamed("http")
   106  
   107  	// Global logger should match internal logger as much as possible
   108  	golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds)
   109  
   110  	if err := a.setupConsul(config.Consul); err != nil {
   111  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
   112  	}
   113  
   114  	if err := a.setupPlugins(); err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	if err := a.setupServer(); err != nil {
   119  		return nil, err
   120  	}
   121  	if err := a.setupClient(); err != nil {
   122  		return nil, err
   123  	}
   124  	if err := a.setupEnterpriseAgent(logger); err != nil {
   125  		return nil, err
   126  	}
   127  	if a.client == nil && a.server == nil {
   128  		return nil, fmt.Errorf("must have at least client or server mode enabled")
   129  	}
   130  
   131  	return a, nil
   132  }
   133  
   134  // convertServerConfig takes an agent config and log output and returns a Nomad
   135  // Config. There may be missing fields that must be set by the agent. To do this
   136  // call finalizeServerConfig
   137  func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
   138  	conf := agentConfig.NomadConfig
   139  	if conf == nil {
   140  		conf = nomad.DefaultConfig()
   141  	}
   142  	conf.DevMode = agentConfig.DevMode
   143  	conf.EnableDebug = agentConfig.EnableDebug
   144  
   145  	conf.Build = agentConfig.Version.VersionNumber()
   146  	if agentConfig.Region != "" {
   147  		conf.Region = agentConfig.Region
   148  	}
   149  
   150  	// Set the Authoritative Region if set, otherwise default to
   151  	// the same as the local region.
   152  	if agentConfig.Server.AuthoritativeRegion != "" {
   153  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   154  	} else if agentConfig.Region != "" {
   155  		conf.AuthoritativeRegion = agentConfig.Region
   156  	}
   157  
   158  	if agentConfig.Datacenter != "" {
   159  		conf.Datacenter = agentConfig.Datacenter
   160  	}
   161  	if agentConfig.NodeName != "" {
   162  		conf.NodeName = agentConfig.NodeName
   163  	}
   164  	if agentConfig.Server.BootstrapExpect > 0 {
   165  		conf.BootstrapExpect = agentConfig.Server.BootstrapExpect
   166  	}
   167  	if agentConfig.DataDir != "" {
   168  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   169  	}
   170  	if agentConfig.Server.DataDir != "" {
   171  		conf.DataDir = agentConfig.Server.DataDir
   172  	}
   173  	if agentConfig.Server.ProtocolVersion != 0 {
   174  		conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion)
   175  	}
   176  	if agentConfig.Server.RaftProtocol != 0 {
   177  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   178  	}
   179  	if agentConfig.Server.NumSchedulers != nil {
   180  		conf.NumSchedulers = *agentConfig.Server.NumSchedulers
   181  	}
   182  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   183  		// Convert to a set and require the core scheduler
   184  		set := make(map[string]struct{}, 4)
   185  		set[structs.JobTypeCore] = struct{}{}
   186  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   187  			set[sched] = struct{}{}
   188  		}
   189  
   190  		schedulers := make([]string, 0, len(set))
   191  		for k := range set {
   192  			schedulers = append(schedulers, k)
   193  		}
   194  
   195  		conf.EnabledSchedulers = schedulers
   196  
   197  	}
   198  	if agentConfig.ACL.Enabled {
   199  		conf.ACLEnabled = true
   200  	}
   201  	if agentConfig.ACL.ReplicationToken != "" {
   202  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   203  	}
   204  	if agentConfig.Sentinel != nil {
   205  		conf.SentinelConfig = agentConfig.Sentinel
   206  	}
   207  	if agentConfig.Server.NonVotingServer {
   208  		conf.NonVoter = true
   209  	}
   210  	if agentConfig.Server.RedundancyZone != "" {
   211  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   212  	}
   213  	if agentConfig.Server.UpgradeVersion != "" {
   214  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   215  	}
   216  	if agentConfig.Autopilot != nil {
   217  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   218  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   219  		}
   220  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   221  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   222  		}
   223  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   224  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   225  		}
   226  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   227  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   228  		}
   229  		if agentConfig.Autopilot.MinQuorum != 0 {
   230  			conf.AutopilotConfig.MinQuorum = uint(agentConfig.Autopilot.MinQuorum)
   231  		}
   232  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   233  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   234  		}
   235  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   236  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   237  		}
   238  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   239  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   240  		}
   241  	}
   242  
   243  	// Set up the bind addresses
   244  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   245  	if err != nil {
   246  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   247  	}
   248  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   249  	if err != nil {
   250  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   251  	}
   252  	conf.RPCAddr.Port = rpcAddr.Port
   253  	conf.RPCAddr.IP = rpcAddr.IP
   254  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   255  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   256  
   257  	// Set up the advertise addresses
   258  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   259  	if err != nil {
   260  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   261  	}
   262  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   263  	if err != nil {
   264  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   265  	}
   266  
   267  	// Server address is the serf advertise address and rpc port. This is the
   268  	// address that all servers should be able to communicate over RPC with.
   269  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   270  	if err != nil {
   271  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   272  	}
   273  
   274  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   275  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   276  	conf.ClientRPCAdvertise = rpcAddr
   277  	conf.ServerRPCAdvertise = serverAddr
   278  
   279  	// Set up gc threshold and heartbeat grace period
   280  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   281  		dur, err := time.ParseDuration(gcThreshold)
   282  		if err != nil {
   283  			return nil, err
   284  		}
   285  		conf.NodeGCThreshold = dur
   286  	}
   287  	if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" {
   288  		dur, err := time.ParseDuration(gcInterval)
   289  		if err != nil {
   290  			return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err)
   291  		} else if dur <= time.Duration(0) {
   292  			return nil, fmt.Errorf("job_gc_interval should be greater than 0s")
   293  		}
   294  		conf.JobGCInterval = dur
   295  	}
   296  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   297  		dur, err := time.ParseDuration(gcThreshold)
   298  		if err != nil {
   299  			return nil, err
   300  		}
   301  		conf.JobGCThreshold = dur
   302  	}
   303  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   304  		dur, err := time.ParseDuration(gcThreshold)
   305  		if err != nil {
   306  			return nil, err
   307  		}
   308  		conf.EvalGCThreshold = dur
   309  	}
   310  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   311  		dur, err := time.ParseDuration(gcThreshold)
   312  		if err != nil {
   313  			return nil, err
   314  		}
   315  		conf.DeploymentGCThreshold = dur
   316  	}
   317  
   318  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   319  		conf.HeartbeatGrace = heartbeatGrace
   320  	}
   321  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   322  		conf.MinHeartbeatTTL = min
   323  	}
   324  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   325  		conf.MaxHeartbeatsPerSecond = maxHPS
   326  	}
   327  
   328  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   329  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   330  	}
   331  
   332  	// handle system scheduler preemption default
   333  	if agentConfig.Server.DefaultSchedulerConfig != nil {
   334  		conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig
   335  	}
   336  
   337  	// Add the Consul and Vault configs
   338  	conf.ConsulConfig = agentConfig.Consul
   339  	conf.VaultConfig = agentConfig.Vault
   340  
   341  	// Set the TLS config
   342  	conf.TLSConfig = agentConfig.TLSConfig
   343  
   344  	// Setup telemetry related config
   345  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   346  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   347  	conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics
   348  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   349  
   350  	// Parse Limits timeout from a string into durations
   351  	if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil {
   352  		return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err)
   353  	} else if d < 0 {
   354  		return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0")
   355  	} else {
   356  		conf.RPCHandshakeTimeout = d
   357  	}
   358  
   359  	// Set max rpc conns; nil/0 == unlimited
   360  	// Leave a little room for streaming RPCs
   361  	minLimit := config.LimitsNonStreamingConnsPerClient + 5
   362  	if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 {
   363  		conf.RPCMaxConnsPerClient = 0
   364  	} else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit {
   365  		return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit)
   366  	} else {
   367  		conf.RPCMaxConnsPerClient = limit
   368  	}
   369  
   370  	return conf, nil
   371  }
   372  
   373  // serverConfig is used to generate a new server configuration struct
   374  // for initializing a nomad server.
   375  func (a *Agent) serverConfig() (*nomad.Config, error) {
   376  	c, err := convertServerConfig(a.config)
   377  	if err != nil {
   378  		return nil, err
   379  	}
   380  
   381  	a.finalizeServerConfig(c)
   382  	return c, nil
   383  }
   384  
   385  // finalizeServerConfig sets configuration fields on the server config that are
   386  // not staticly convertable and are from the agent.
   387  func (a *Agent) finalizeServerConfig(c *nomad.Config) {
   388  	// Setup the logging
   389  	c.Logger = a.logger
   390  	c.LogOutput = a.logOutput
   391  
   392  	// Setup the plugin loaders
   393  	c.PluginLoader = a.pluginLoader
   394  	c.PluginSingletonLoader = a.pluginSingletonLoader
   395  }
   396  
   397  // clientConfig is used to generate a new client configuration struct for
   398  // initializing a Nomad client.
   399  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   400  	c, err := convertClientConfig(a.config)
   401  	if err != nil {
   402  		return nil, err
   403  	}
   404  
   405  	if err := a.finalizeClientConfig(c); err != nil {
   406  		return nil, err
   407  	}
   408  
   409  	return c, nil
   410  }
   411  
   412  // finalizeClientConfig sets configuration fields on the client config that are
   413  // not staticly convertable and are from the agent.
   414  func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error {
   415  	// Setup the logging
   416  	c.Logger = a.logger
   417  	c.LogOutput = a.logOutput
   418  
   419  	// If we are running a server, append both its bind and advertise address so
   420  	// we are able to at least talk to the local server even if that isn't
   421  	// configured explicitly. This handles both running server and client on one
   422  	// host and -dev mode.
   423  	if a.server != nil {
   424  		advertised := a.config.AdvertiseAddrs
   425  		normalized := a.config.normalizedAddrs
   426  
   427  		if advertised == nil || advertised.RPC == "" {
   428  			return fmt.Errorf("AdvertiseAddrs is nil or empty")
   429  		} else if normalized == nil || normalized.RPC == "" {
   430  			return fmt.Errorf("normalizedAddrs is nil or empty")
   431  		}
   432  
   433  		if normalized.RPC == advertised.RPC {
   434  			c.Servers = append(c.Servers, normalized.RPC)
   435  		} else {
   436  			c.Servers = append(c.Servers, normalized.RPC, advertised.RPC)
   437  		}
   438  	}
   439  
   440  	// Setup the plugin loaders
   441  	c.PluginLoader = a.pluginLoader
   442  	c.PluginSingletonLoader = a.pluginSingletonLoader
   443  
   444  	// Log deprecation messages about Consul related configuration in client
   445  	// options
   446  	var invalidConsulKeys []string
   447  	for key := range c.Options {
   448  		if strings.HasPrefix(key, "consul") {
   449  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   450  		}
   451  	}
   452  	if len(invalidConsulKeys) > 0 {
   453  		a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ","))
   454  		a.logger.Warn(`Nomad client ignores consul related configuration in client options.
   455  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   456  		to configure Nomad to work with Consul.`)
   457  	}
   458  
   459  	return nil
   460  }
   461  
   462  // convertClientConfig takes an agent config and log output and returns a client
   463  // Config. There may be missing fields that must be set by the agent. To do this
   464  // call finalizeServerConfig
   465  func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
   466  	// Setup the configuration
   467  	conf := agentConfig.ClientConfig
   468  	if conf == nil {
   469  		conf = clientconfig.DefaultConfig()
   470  	}
   471  
   472  	conf.Servers = agentConfig.Client.Servers
   473  	conf.LogLevel = agentConfig.LogLevel
   474  	conf.DevMode = agentConfig.DevMode
   475  	conf.EnableDebug = agentConfig.EnableDebug
   476  
   477  	if agentConfig.Region != "" {
   478  		conf.Region = agentConfig.Region
   479  	}
   480  	if agentConfig.DataDir != "" {
   481  		conf.StateDir = filepath.Join(agentConfig.DataDir, "client")
   482  		conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc")
   483  	}
   484  	if agentConfig.Client.StateDir != "" {
   485  		conf.StateDir = agentConfig.Client.StateDir
   486  	}
   487  	if agentConfig.Client.AllocDir != "" {
   488  		conf.AllocDir = agentConfig.Client.AllocDir
   489  	}
   490  	if agentConfig.Client.NetworkInterface != "" {
   491  		conf.NetworkInterface = agentConfig.Client.NetworkInterface
   492  	}
   493  	conf.ChrootEnv = agentConfig.Client.ChrootEnv
   494  	conf.Options = agentConfig.Client.Options
   495  	if agentConfig.Client.NetworkSpeed != 0 {
   496  		conf.NetworkSpeed = agentConfig.Client.NetworkSpeed
   497  	}
   498  	if agentConfig.Client.CpuCompute != 0 {
   499  		conf.CpuCompute = agentConfig.Client.CpuCompute
   500  	}
   501  	if agentConfig.Client.MemoryMB != 0 {
   502  		conf.MemoryMB = agentConfig.Client.MemoryMB
   503  	}
   504  	if agentConfig.Client.MaxKillTimeout != "" {
   505  		dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout)
   506  		if err != nil {
   507  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   508  		}
   509  		conf.MaxKillTimeout = dur
   510  	}
   511  	conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort)
   512  	conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort)
   513  	conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec
   514  	conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist
   515  	conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox
   516  
   517  	hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes))
   518  	for _, v := range agentConfig.Client.HostVolumes {
   519  		hvMap[v.Name] = v
   520  	}
   521  	conf.HostVolumes = hvMap
   522  
   523  	// Setup the node
   524  	conf.Node = new(structs.Node)
   525  	conf.Node.Datacenter = agentConfig.Datacenter
   526  	conf.Node.Name = agentConfig.NodeName
   527  	conf.Node.Meta = agentConfig.Client.Meta
   528  	conf.Node.NodeClass = agentConfig.Client.NodeClass
   529  
   530  	// Set up the HTTP advertise address
   531  	conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP
   532  
   533  	// Canonicalize Node struct
   534  	conf.Node.Canonicalize()
   535  
   536  	// Reserve resources on the node.
   537  	// COMPAT(0.10): Remove in 0.10
   538  	r := conf.Node.Reserved
   539  	if r == nil {
   540  		r = new(structs.Resources)
   541  		conf.Node.Reserved = r
   542  	}
   543  	r.CPU = agentConfig.Client.Reserved.CPU
   544  	r.MemoryMB = agentConfig.Client.Reserved.MemoryMB
   545  	r.DiskMB = agentConfig.Client.Reserved.DiskMB
   546  
   547  	res := conf.Node.ReservedResources
   548  	if res == nil {
   549  		res = new(structs.NodeReservedResources)
   550  		conf.Node.ReservedResources = res
   551  	}
   552  	res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU)
   553  	res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB)
   554  	res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB)
   555  	res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts
   556  
   557  	conf.Version = agentConfig.Version
   558  
   559  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" {
   560  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   561  	}
   562  
   563  	conf.ConsulConfig = agentConfig.Consul
   564  	conf.VaultConfig = agentConfig.Vault
   565  
   566  	// Set up Telemetry configuration
   567  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   568  	conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
   569  	conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
   570  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   571  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   572  
   573  	// Set the TLS related configs
   574  	conf.TLSConfig = agentConfig.TLSConfig
   575  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   576  
   577  	// Set the GC related configs
   578  	conf.GCInterval = agentConfig.Client.GCInterval
   579  	conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys
   580  	conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold
   581  	conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold
   582  	conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs
   583  	if agentConfig.Client.NoHostUUID != nil {
   584  		conf.NoHostUUID = *agentConfig.Client.NoHostUUID
   585  	} else {
   586  		// Default no_host_uuid to true
   587  		conf.NoHostUUID = true
   588  	}
   589  
   590  	// Setup the ACLs
   591  	conf.ACLEnabled = agentConfig.ACL.Enabled
   592  	conf.ACLTokenTTL = agentConfig.ACL.TokenTTL
   593  	conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL
   594  
   595  	// Setup networking configuration
   596  	conf.CNIPath = agentConfig.Client.CNIPath
   597  	conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName
   598  	conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet
   599  
   600  	return conf, nil
   601  }
   602  
   603  // setupServer is used to setup the server if enabled
   604  func (a *Agent) setupServer() error {
   605  	if !a.config.Server.Enabled {
   606  		return nil
   607  	}
   608  
   609  	// Setup the configuration
   610  	conf, err := a.serverConfig()
   611  	if err != nil {
   612  		return fmt.Errorf("server config setup failed: %s", err)
   613  	}
   614  
   615  	// Generate a node ID and persist it if it is the first instance, otherwise
   616  	// read the persisted node ID.
   617  	if err := a.setupNodeID(conf); err != nil {
   618  		return fmt.Errorf("setting up server node ID failed: %s", err)
   619  	}
   620  
   621  	// Sets up the keyring for gossip encryption
   622  	if err := a.setupKeyrings(conf); err != nil {
   623  		return fmt.Errorf("failed to configure keyring: %v", err)
   624  	}
   625  
   626  	// Create the server
   627  	server, err := nomad.NewServer(conf, a.consulCatalog, a.consulACLs)
   628  	if err != nil {
   629  		return fmt.Errorf("server setup failed: %v", err)
   630  	}
   631  	a.server = server
   632  
   633  	// Consul check addresses default to bind but can be toggled to use advertise
   634  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   635  	serfCheckAddr := a.config.normalizedAddrs.Serf
   636  	if *a.config.Consul.ChecksUseAdvertise {
   637  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   638  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   639  	}
   640  
   641  	// Create the Nomad Server services for Consul
   642  	if *a.config.Consul.AutoAdvertise {
   643  		httpServ := &structs.Service{
   644  			Name:      a.config.Consul.ServerServiceName,
   645  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   646  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   647  		}
   648  		const isServer = true
   649  		if check := a.agentHTTPCheck(isServer); check != nil {
   650  			httpServ.Checks = []*structs.ServiceCheck{check}
   651  		}
   652  		rpcServ := &structs.Service{
   653  			Name:      a.config.Consul.ServerServiceName,
   654  			PortLabel: a.config.AdvertiseAddrs.RPC,
   655  			Tags:      append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...),
   656  			Checks: []*structs.ServiceCheck{
   657  				{
   658  					Name:      a.config.Consul.ServerRPCCheckName,
   659  					Type:      "tcp",
   660  					Interval:  serverRpcCheckInterval,
   661  					Timeout:   serverRpcCheckTimeout,
   662  					PortLabel: rpcCheckAddr,
   663  				},
   664  			},
   665  		}
   666  		serfServ := &structs.Service{
   667  			Name:      a.config.Consul.ServerServiceName,
   668  			PortLabel: a.config.AdvertiseAddrs.Serf,
   669  			Tags:      append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...),
   670  			Checks: []*structs.ServiceCheck{
   671  				{
   672  					Name:      a.config.Consul.ServerSerfCheckName,
   673  					Type:      "tcp",
   674  					Interval:  serverSerfCheckInterval,
   675  					Timeout:   serverSerfCheckTimeout,
   676  					PortLabel: serfCheckAddr,
   677  				},
   678  			},
   679  		}
   680  
   681  		// Add the http port check if TLS isn't enabled
   682  		consulServices := []*structs.Service{
   683  			rpcServ,
   684  			serfServ,
   685  			httpServ,
   686  		}
   687  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   688  			return err
   689  		}
   690  	}
   691  
   692  	return nil
   693  }
   694  
   695  // setupNodeID will pull the persisted node ID, if any, or create a random one
   696  // and persist it.
   697  func (a *Agent) setupNodeID(config *nomad.Config) error {
   698  	// For dev mode we have no filesystem access so just make a node ID.
   699  	if a.config.DevMode {
   700  		config.NodeID = uuid.Generate()
   701  		return nil
   702  	}
   703  
   704  	// Load saved state, if any. Since a user could edit this, we also
   705  	// validate it. Saved state overwrites any configured node id
   706  	fileID := filepath.Join(config.DataDir, "node-id")
   707  	if _, err := os.Stat(fileID); err == nil {
   708  		rawID, err := ioutil.ReadFile(fileID)
   709  		if err != nil {
   710  			return err
   711  		}
   712  
   713  		nodeID := strings.TrimSpace(string(rawID))
   714  		nodeID = strings.ToLower(nodeID)
   715  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   716  			return err
   717  		}
   718  		config.NodeID = nodeID
   719  		return nil
   720  	}
   721  
   722  	// If they've configured a node ID manually then just use that, as
   723  	// long as it's valid.
   724  	if config.NodeID != "" {
   725  		config.NodeID = strings.ToLower(config.NodeID)
   726  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   727  			return err
   728  		}
   729  		// Persist this configured nodeID to our data directory
   730  		if err := lib.EnsurePath(fileID, false); err != nil {
   731  			return err
   732  		}
   733  		if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   734  			return err
   735  		}
   736  		return nil
   737  	}
   738  
   739  	// If we still don't have a valid node ID, make one.
   740  	if config.NodeID == "" {
   741  		id := uuid.Generate()
   742  		if err := lib.EnsurePath(fileID, false); err != nil {
   743  			return err
   744  		}
   745  		if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil {
   746  			return err
   747  		}
   748  
   749  		config.NodeID = id
   750  	}
   751  	return nil
   752  }
   753  
   754  // setupKeyrings is used to initialize and load keyrings during agent startup
   755  func (a *Agent) setupKeyrings(config *nomad.Config) error {
   756  	file := filepath.Join(a.config.DataDir, serfKeyring)
   757  
   758  	if a.config.Server.EncryptKey == "" {
   759  		goto LOAD
   760  	}
   761  	if _, err := os.Stat(file); err != nil {
   762  		if err := initKeyring(file, a.config.Server.EncryptKey); err != nil {
   763  			return err
   764  		}
   765  	}
   766  
   767  LOAD:
   768  	if _, err := os.Stat(file); err == nil {
   769  		config.SerfConfig.KeyringFile = file
   770  	}
   771  	if err := loadKeyringFile(config.SerfConfig); err != nil {
   772  		return err
   773  	}
   774  	// Success!
   775  	return nil
   776  }
   777  
   778  // setupClient is used to setup the client if enabled
   779  func (a *Agent) setupClient() error {
   780  	if !a.config.Client.Enabled {
   781  		return nil
   782  	}
   783  
   784  	// Setup the configuration
   785  	conf, err := a.clientConfig()
   786  	if err != nil {
   787  		return fmt.Errorf("client setup failed: %v", err)
   788  	}
   789  
   790  	// Reserve some ports for the plugins if we are on Windows
   791  	if runtime.GOOS == "windows" {
   792  		if err := a.reservePortsForClient(conf); err != nil {
   793  			return err
   794  		}
   795  	}
   796  	if conf.StateDBFactory == nil {
   797  		conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode)
   798  	}
   799  
   800  	client, err := client.NewClient(conf, a.consulCatalog, a.consulService)
   801  	if err != nil {
   802  		return fmt.Errorf("client setup failed: %v", err)
   803  	}
   804  	a.client = client
   805  
   806  	// Create the Nomad Client  services for Consul
   807  	if *a.config.Consul.AutoAdvertise {
   808  		httpServ := &structs.Service{
   809  			Name:      a.config.Consul.ClientServiceName,
   810  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   811  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   812  		}
   813  		const isServer = false
   814  		if check := a.agentHTTPCheck(isServer); check != nil {
   815  			httpServ.Checks = []*structs.ServiceCheck{check}
   816  		}
   817  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
   818  			return err
   819  		}
   820  	}
   821  
   822  	return nil
   823  }
   824  
   825  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
   826  // If no HTTP health check can be supported nil is returned.
   827  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
   828  	// Resolve the http check address
   829  	httpCheckAddr := a.config.normalizedAddrs.HTTP
   830  	if *a.config.Consul.ChecksUseAdvertise {
   831  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
   832  	}
   833  	check := structs.ServiceCheck{
   834  		Name:      a.config.Consul.ClientHTTPCheckName,
   835  		Type:      "http",
   836  		Path:      "/v1/agent/health?type=client",
   837  		Protocol:  "http",
   838  		Interval:  agentHttpCheckInterval,
   839  		Timeout:   agentHttpCheckTimeout,
   840  		PortLabel: httpCheckAddr,
   841  	}
   842  	// Switch to endpoint that doesn't require a leader for servers
   843  	if server {
   844  		check.Name = a.config.Consul.ServerHTTPCheckName
   845  		check.Path = "/v1/agent/health?type=server"
   846  	}
   847  	if !a.config.TLSConfig.EnableHTTP {
   848  		// No HTTPS, return a plain http check
   849  		return &check
   850  	}
   851  	if a.config.TLSConfig.VerifyHTTPSClient {
   852  		a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled")
   853  		return nil
   854  	}
   855  
   856  	// HTTPS enabled; skip verification
   857  	check.Protocol = "https"
   858  	check.TLSSkipVerify = true
   859  	return &check
   860  }
   861  
   862  // reservePortsForClient reserves a range of ports for the client to use when
   863  // it creates various plugins for log collection, executors, drivers, etc
   864  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
   865  	if conf.Node.ReservedResources == nil {
   866  		conf.Node.ReservedResources = &structs.NodeReservedResources{}
   867  	}
   868  
   869  	res := conf.Node.ReservedResources.Networks.ReservedHostPorts
   870  	if res == "" {
   871  		res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
   872  	} else {
   873  		res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
   874  	}
   875  	conf.Node.ReservedResources.Networks.ReservedHostPorts = res
   876  	return nil
   877  }
   878  
   879  // Leave is used gracefully exit. Clients will inform servers
   880  // of their departure so that allocations can be rescheduled.
   881  func (a *Agent) Leave() error {
   882  	if a.client != nil {
   883  		if err := a.client.Leave(); err != nil {
   884  			a.logger.Error("client leave failed", "error", err)
   885  		}
   886  	}
   887  	if a.server != nil {
   888  		if err := a.server.Leave(); err != nil {
   889  			a.logger.Error("server leave failed", "error", err)
   890  		}
   891  	}
   892  	return nil
   893  }
   894  
   895  // Shutdown is used to terminate the agent.
   896  func (a *Agent) Shutdown() error {
   897  	a.shutdownLock.Lock()
   898  	defer a.shutdownLock.Unlock()
   899  
   900  	if a.shutdown {
   901  		return nil
   902  	}
   903  
   904  	a.logger.Info("requesting shutdown")
   905  	if a.client != nil {
   906  		if err := a.client.Shutdown(); err != nil {
   907  			a.logger.Error("client shutdown failed", "error", err)
   908  		}
   909  	}
   910  	if a.server != nil {
   911  		if err := a.server.Shutdown(); err != nil {
   912  			a.logger.Error("server shutdown failed", "error", err)
   913  		}
   914  	}
   915  
   916  	if err := a.consulService.Shutdown(); err != nil {
   917  		a.logger.Error("shutting down Consul client failed", "error", err)
   918  	}
   919  
   920  	a.logger.Info("shutdown complete")
   921  	a.shutdown = true
   922  	close(a.shutdownCh)
   923  	return nil
   924  }
   925  
   926  // RPC is used to make an RPC call to the Nomad servers
   927  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
   928  	if a.server != nil {
   929  		return a.server.RPC(method, args, reply)
   930  	}
   931  	return a.client.RPC(method, args, reply)
   932  }
   933  
   934  // Client returns the configured client or nil
   935  func (a *Agent) Client() *client.Client {
   936  	return a.client
   937  }
   938  
   939  // Server returns the configured server or nil
   940  func (a *Agent) Server() *nomad.Server {
   941  	return a.server
   942  }
   943  
   944  // Stats is used to return statistics for debugging and insight
   945  // for various sub-systems
   946  func (a *Agent) Stats() map[string]map[string]string {
   947  	stats := make(map[string]map[string]string)
   948  	if a.server != nil {
   949  		subStat := a.server.Stats()
   950  		for k, v := range subStat {
   951  			stats[k] = v
   952  		}
   953  	}
   954  	if a.client != nil {
   955  		subStat := a.client.Stats()
   956  		for k, v := range subStat {
   957  			stats[k] = v
   958  		}
   959  	}
   960  	return stats
   961  }
   962  
   963  // ShouldReload determines if we should reload the configuration and agent
   964  // connections. If the TLS Configuration has not changed, we shouldn't reload.
   965  func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) {
   966  	a.configLock.Lock()
   967  	defer a.configLock.Unlock()
   968  
   969  	if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel {
   970  		agent = true
   971  	}
   972  
   973  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
   974  	if err != nil {
   975  		a.logger.Error("parsing TLS certificate", "error", err)
   976  		return agent, false
   977  	} else if !isEqual {
   978  		return true, true
   979  	}
   980  
   981  	// Allow the ability to only reload HTTP connections
   982  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
   983  		http = true
   984  		agent = true
   985  	}
   986  
   987  	// Allow the ability to only reload HTTP connections
   988  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
   989  		agent = true
   990  	}
   991  
   992  	return agent, http
   993  }
   994  
   995  // Reload handles configuration changes for the agent. Provides a method that
   996  // is easier to unit test, as this action is invoked via SIGHUP.
   997  func (a *Agent) Reload(newConfig *Config) error {
   998  	a.configLock.Lock()
   999  	defer a.configLock.Unlock()
  1000  
  1001  	updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel)
  1002  
  1003  	if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging {
  1004  		return fmt.Errorf("cannot reload agent with nil configuration")
  1005  	}
  1006  
  1007  	if updatedLogging {
  1008  		a.config.LogLevel = newConfig.LogLevel
  1009  		a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel))
  1010  	}
  1011  
  1012  	// Update eventer config
  1013  	if newConfig.Audit != nil {
  1014  		if err := a.entReloadEventer(newConfig.Audit); err != nil {
  1015  			return err
  1016  		}
  1017  	}
  1018  	// Allow auditor to call reopen regardless of config changes
  1019  	// This is primarily for enterprise audit logging to allow the underlying
  1020  	// file to be reopened if necessary
  1021  	if err := a.auditor.Reopen(); err != nil {
  1022  		return err
  1023  	}
  1024  
  1025  	fullUpdateTLSConfig := func() {
  1026  		// Completely reload the agent's TLS configuration (moving from non-TLS to
  1027  		// TLS, or vice versa)
  1028  		// This does not handle errors in loading the new TLS configuration
  1029  		a.config.TLSConfig = newConfig.TLSConfig.Copy()
  1030  	}
  1031  
  1032  	if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
  1033  		// This is just a TLS configuration reload, we don't need to refresh
  1034  		// existing network connections
  1035  
  1036  		// Reload the certificates on the keyloader and on success store the
  1037  		// updated TLS config. It is important to reuse the same keyloader
  1038  		// as this allows us to dynamically reload configurations not only
  1039  		// on the Agent but on the Server and Client too (they are
  1040  		// referencing the same keyloader).
  1041  		keyloader := a.config.TLSConfig.GetKeyLoader()
  1042  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
  1043  		if err != nil {
  1044  			return err
  1045  		}
  1046  		a.config.TLSConfig = newConfig.TLSConfig
  1047  		a.config.TLSConfig.KeyLoader = keyloader
  1048  		return nil
  1049  	} else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() {
  1050  		a.logger.Warn("downgrading agent's existing TLS configuration to plaintext")
  1051  		fullUpdateTLSConfig()
  1052  	} else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() {
  1053  		a.logger.Info("upgrading from plaintext configuration to TLS")
  1054  		fullUpdateTLSConfig()
  1055  	}
  1056  
  1057  	return nil
  1058  }
  1059  
  1060  // GetConfig creates a locked reference to the agent's config
  1061  func (a *Agent) GetConfig() *Config {
  1062  	a.configLock.Lock()
  1063  	defer a.configLock.Unlock()
  1064  
  1065  	return a.config
  1066  }
  1067  
  1068  // setupConsul creates the Consul client and starts its main Run loop.
  1069  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
  1070  	apiConf, err := consulConfig.ApiConfig()
  1071  	if err != nil {
  1072  		return err
  1073  	}
  1074  	client, err := api.NewClient(apiConf)
  1075  	if err != nil {
  1076  		return err
  1077  	}
  1078  
  1079  	// Create Consul Catalog client for service discovery.
  1080  	a.consulCatalog = client.Catalog()
  1081  
  1082  	// Create Consul ACL client for managing tokens.
  1083  	a.consulACLs = client.ACL()
  1084  
  1085  	// Create Consul Service client for service advertisement and checks.
  1086  	isClient := false
  1087  	if a.config.Client != nil && a.config.Client.Enabled {
  1088  		isClient = true
  1089  	}
  1090  	a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient)
  1091  
  1092  	// Run the Consul service client's sync'ing main loop
  1093  	go a.consulService.Run()
  1094  	return nil
  1095  }
  1096  
  1097  // noOpAuditor is a no-op Auditor that fulfills the
  1098  // event.Auditor interface.
  1099  type noOpAuditor struct{}
  1100  
  1101  // Ensure noOpAuditor is an Auditor
  1102  var _ event.Auditor = &noOpAuditor{}
  1103  
  1104  func (e *noOpAuditor) Event(ctx context.Context, eventType string, payload interface{}) error {
  1105  	return nil
  1106  }
  1107  
  1108  func (e *noOpAuditor) Enabled() bool {
  1109  	return false
  1110  }
  1111  
  1112  func (e *noOpAuditor) Reopen() error {
  1113  	return nil
  1114  }
  1115  
  1116  func (e *noOpAuditor) SetEnabled(enabled bool) {}
  1117  
  1118  func (e *noOpAuditor) DeliveryEnforced() bool { return false }