github.com/karlem/nomad@v0.10.2-rc1/command/agent/agent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	golog "log"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/consul/lib"
    20  	log "github.com/hashicorp/go-hclog"
    21  	uuidparse "github.com/hashicorp/go-uuid"
    22  	"github.com/hashicorp/nomad/client"
    23  	clientconfig "github.com/hashicorp/nomad/client/config"
    24  	"github.com/hashicorp/nomad/client/state"
    25  	"github.com/hashicorp/nomad/command/agent/consul"
    26  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    27  	"github.com/hashicorp/nomad/helper/uuid"
    28  	"github.com/hashicorp/nomad/nomad"
    29  	"github.com/hashicorp/nomad/nomad/structs"
    30  	"github.com/hashicorp/nomad/nomad/structs/config"
    31  	"github.com/hashicorp/raft"
    32  )
    33  
    34  const (
    35  	agentHttpCheckInterval  = 10 * time.Second
    36  	agentHttpCheckTimeout   = 5 * time.Second
    37  	serverRpcCheckInterval  = 10 * time.Second
    38  	serverRpcCheckTimeout   = 3 * time.Second
    39  	serverSerfCheckInterval = 10 * time.Second
    40  	serverSerfCheckTimeout  = 3 * time.Second
    41  
    42  	// roles used in identifying Consul entries for Nomad agents
    43  	consulRoleServer = "server"
    44  	consulRoleClient = "client"
    45  )
    46  
    47  // Agent is a long running daemon that is used to run both
    48  // clients and servers. Servers are responsible for managing
    49  // state and making scheduling decisions. Clients can be
    50  // scheduled to, and are responsible for interfacing with
    51  // servers to run allocations.
    52  type Agent struct {
    53  	config     *Config
    54  	configLock sync.Mutex
    55  
    56  	logger     log.InterceptLogger
    57  	httpLogger log.Logger
    58  	logOutput  io.Writer
    59  
    60  	// consulService is Nomad's custom Consul client for managing services
    61  	// and checks.
    62  	consulService *consul.ServiceClient
    63  
    64  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    65  	consulCatalog consul.CatalogAPI
    66  
    67  	// client is the launched Nomad Client. Can be nil if the agent isn't
    68  	// configured to run a client.
    69  	client *client.Client
    70  
    71  	// server is the launched Nomad Server. Can be nil if the agent isn't
    72  	// configured to run a server.
    73  	server *nomad.Server
    74  
    75  	// pluginLoader is used to load plugins
    76  	pluginLoader loader.PluginCatalog
    77  
    78  	// pluginSingletonLoader is a plugin loader that will returns singleton
    79  	// instances of the plugins.
    80  	pluginSingletonLoader loader.PluginCatalog
    81  
    82  	shutdown     bool
    83  	shutdownCh   chan struct{}
    84  	shutdownLock sync.Mutex
    85  
    86  	InmemSink *metrics.InmemSink
    87  }
    88  
    89  // NewAgent is used to create a new agent with the given configuration
    90  func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
    91  	a := &Agent{
    92  		config:     config,
    93  		logOutput:  logOutput,
    94  		shutdownCh: make(chan struct{}),
    95  		InmemSink:  inmem,
    96  	}
    97  
    98  	// Create the loggers
    99  	a.logger = logger
   100  	a.httpLogger = a.logger.ResetNamed("http")
   101  
   102  	// Global logger should match internal logger as much as possible
   103  	golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds)
   104  
   105  	if err := a.setupConsul(config.Consul); err != nil {
   106  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
   107  	}
   108  
   109  	if err := a.setupPlugins(); err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	if err := a.setupServer(); err != nil {
   114  		return nil, err
   115  	}
   116  	if err := a.setupClient(); err != nil {
   117  		return nil, err
   118  	}
   119  	if a.client == nil && a.server == nil {
   120  		return nil, fmt.Errorf("must have at least client or server mode enabled")
   121  	}
   122  
   123  	return a, nil
   124  }
   125  
   126  // convertServerConfig takes an agent config and log output and returns a Nomad
   127  // Config. There may be missing fields that must be set by the agent. To do this
   128  // call finalizeServerConfig
   129  func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
   130  	conf := agentConfig.NomadConfig
   131  	if conf == nil {
   132  		conf = nomad.DefaultConfig()
   133  	}
   134  	conf.DevMode = agentConfig.DevMode
   135  	conf.Build = agentConfig.Version.VersionNumber()
   136  	if agentConfig.Region != "" {
   137  		conf.Region = agentConfig.Region
   138  	}
   139  
   140  	// Set the Authoritative Region if set, otherwise default to
   141  	// the same as the local region.
   142  	if agentConfig.Server.AuthoritativeRegion != "" {
   143  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   144  	} else if agentConfig.Region != "" {
   145  		conf.AuthoritativeRegion = agentConfig.Region
   146  	}
   147  
   148  	if agentConfig.Datacenter != "" {
   149  		conf.Datacenter = agentConfig.Datacenter
   150  	}
   151  	if agentConfig.NodeName != "" {
   152  		conf.NodeName = agentConfig.NodeName
   153  	}
   154  	if agentConfig.Server.BootstrapExpect > 0 {
   155  		if agentConfig.Server.BootstrapExpect == 1 {
   156  			conf.Bootstrap = true
   157  		} else {
   158  			atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect))
   159  		}
   160  	}
   161  	if agentConfig.DataDir != "" {
   162  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   163  	}
   164  	if agentConfig.Server.DataDir != "" {
   165  		conf.DataDir = agentConfig.Server.DataDir
   166  	}
   167  	if agentConfig.Server.ProtocolVersion != 0 {
   168  		conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion)
   169  	}
   170  	if agentConfig.Server.RaftProtocol != 0 {
   171  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   172  	}
   173  	if agentConfig.Server.NumSchedulers != nil {
   174  		conf.NumSchedulers = *agentConfig.Server.NumSchedulers
   175  	}
   176  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   177  		// Convert to a set and require the core scheduler
   178  		set := make(map[string]struct{}, 4)
   179  		set[structs.JobTypeCore] = struct{}{}
   180  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   181  			set[sched] = struct{}{}
   182  		}
   183  
   184  		schedulers := make([]string, 0, len(set))
   185  		for k := range set {
   186  			schedulers = append(schedulers, k)
   187  		}
   188  
   189  		conf.EnabledSchedulers = schedulers
   190  
   191  	}
   192  	if agentConfig.ACL.Enabled {
   193  		conf.ACLEnabled = true
   194  	}
   195  	if agentConfig.ACL.ReplicationToken != "" {
   196  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   197  	}
   198  	if agentConfig.Sentinel != nil {
   199  		conf.SentinelConfig = agentConfig.Sentinel
   200  	}
   201  	if agentConfig.Server.NonVotingServer {
   202  		conf.NonVoter = true
   203  	}
   204  	if agentConfig.Server.RedundancyZone != "" {
   205  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   206  	}
   207  	if agentConfig.Server.UpgradeVersion != "" {
   208  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   209  	}
   210  	if agentConfig.Autopilot != nil {
   211  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   212  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   213  		}
   214  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   215  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   216  		}
   217  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   218  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   219  		}
   220  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   221  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   222  		}
   223  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   224  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   225  		}
   226  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   227  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   228  		}
   229  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   230  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   231  		}
   232  	}
   233  
   234  	// Set up the bind addresses
   235  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   236  	if err != nil {
   237  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   238  	}
   239  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   240  	if err != nil {
   241  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   242  	}
   243  	conf.RPCAddr.Port = rpcAddr.Port
   244  	conf.RPCAddr.IP = rpcAddr.IP
   245  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   246  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   247  
   248  	// Set up the advertise addresses
   249  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   250  	if err != nil {
   251  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   252  	}
   253  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   254  	if err != nil {
   255  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   256  	}
   257  
   258  	// Server address is the serf advertise address and rpc port. This is the
   259  	// address that all servers should be able to communicate over RPC with.
   260  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   261  	if err != nil {
   262  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   263  	}
   264  
   265  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   266  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   267  	conf.ClientRPCAdvertise = rpcAddr
   268  	conf.ServerRPCAdvertise = serverAddr
   269  
   270  	// Set up gc threshold and heartbeat grace period
   271  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   272  		dur, err := time.ParseDuration(gcThreshold)
   273  		if err != nil {
   274  			return nil, err
   275  		}
   276  		conf.NodeGCThreshold = dur
   277  	}
   278  	if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" {
   279  		dur, err := time.ParseDuration(gcInterval)
   280  		if err != nil {
   281  			return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err)
   282  		} else if dur <= time.Duration(0) {
   283  			return nil, fmt.Errorf("job_gc_interval should be greater than 0s")
   284  		}
   285  		conf.JobGCInterval = dur
   286  	}
   287  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   288  		dur, err := time.ParseDuration(gcThreshold)
   289  		if err != nil {
   290  			return nil, err
   291  		}
   292  		conf.JobGCThreshold = dur
   293  	}
   294  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   295  		dur, err := time.ParseDuration(gcThreshold)
   296  		if err != nil {
   297  			return nil, err
   298  		}
   299  		conf.EvalGCThreshold = dur
   300  	}
   301  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   302  		dur, err := time.ParseDuration(gcThreshold)
   303  		if err != nil {
   304  			return nil, err
   305  		}
   306  		conf.DeploymentGCThreshold = dur
   307  	}
   308  
   309  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   310  		conf.HeartbeatGrace = heartbeatGrace
   311  	}
   312  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   313  		conf.MinHeartbeatTTL = min
   314  	}
   315  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   316  		conf.MaxHeartbeatsPerSecond = maxHPS
   317  	}
   318  
   319  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   320  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   321  	}
   322  
   323  	// Add the Consul and Vault configs
   324  	conf.ConsulConfig = agentConfig.Consul
   325  	conf.VaultConfig = agentConfig.Vault
   326  
   327  	// Set the TLS config
   328  	conf.TLSConfig = agentConfig.TLSConfig
   329  
   330  	// Setup telemetry related config
   331  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   332  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   333  	conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics
   334  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   335  
   336  	return conf, nil
   337  }
   338  
   339  // serverConfig is used to generate a new server configuration struct
   340  // for initializing a nomad server.
   341  func (a *Agent) serverConfig() (*nomad.Config, error) {
   342  	c, err := convertServerConfig(a.config)
   343  	if err != nil {
   344  		return nil, err
   345  	}
   346  
   347  	a.finalizeServerConfig(c)
   348  	return c, nil
   349  }
   350  
   351  // finalizeServerConfig sets configuration fields on the server config that are
   352  // not staticly convertable and are from the agent.
   353  func (a *Agent) finalizeServerConfig(c *nomad.Config) {
   354  	// Setup the logging
   355  	c.Logger = a.logger
   356  	c.LogOutput = a.logOutput
   357  
   358  	// Setup the plugin loaders
   359  	c.PluginLoader = a.pluginLoader
   360  	c.PluginSingletonLoader = a.pluginSingletonLoader
   361  }
   362  
   363  // clientConfig is used to generate a new client configuration struct for
   364  // initializing a Nomad client.
   365  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   366  	c, err := convertClientConfig(a.config)
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  
   371  	if err := a.finalizeClientConfig(c); err != nil {
   372  		return nil, err
   373  	}
   374  
   375  	return c, nil
   376  }
   377  
   378  // finalizeClientConfig sets configuration fields on the client config that are
   379  // not staticly convertable and are from the agent.
   380  func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error {
   381  	// Setup the logging
   382  	c.Logger = a.logger
   383  	c.LogOutput = a.logOutput
   384  
   385  	// If we are running a server, append both its bind and advertise address so
   386  	// we are able to at least talk to the local server even if that isn't
   387  	// configured explicitly. This handles both running server and client on one
   388  	// host and -dev mode.
   389  	if a.server != nil {
   390  		if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" {
   391  			return fmt.Errorf("AdvertiseAddrs is nil or empty")
   392  		} else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" {
   393  			return fmt.Errorf("normalizedAddrs is nil or empty")
   394  		}
   395  
   396  		c.Servers = append(c.Servers,
   397  			a.config.normalizedAddrs.RPC,
   398  			a.config.AdvertiseAddrs.RPC)
   399  	}
   400  
   401  	// Setup the plugin loaders
   402  	c.PluginLoader = a.pluginLoader
   403  	c.PluginSingletonLoader = a.pluginSingletonLoader
   404  
   405  	// Log deprecation messages about Consul related configuration in client
   406  	// options
   407  	var invalidConsulKeys []string
   408  	for key := range c.Options {
   409  		if strings.HasPrefix(key, "consul") {
   410  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   411  		}
   412  	}
   413  	if len(invalidConsulKeys) > 0 {
   414  		a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ","))
   415  		a.logger.Warn(`Nomad client ignores consul related configuration in client options.
   416  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   417  		to configure Nomad to work with Consul.`)
   418  	}
   419  
   420  	return nil
   421  }
   422  
   423  // convertClientConfig takes an agent config and log output and returns a client
   424  // Config. There may be missing fields that must be set by the agent. To do this
   425  // call finalizeServerConfig
   426  func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
   427  	// Setup the configuration
   428  	conf := agentConfig.ClientConfig
   429  	if conf == nil {
   430  		conf = clientconfig.DefaultConfig()
   431  	}
   432  
   433  	conf.Servers = agentConfig.Client.Servers
   434  	conf.LogLevel = agentConfig.LogLevel
   435  	conf.DevMode = agentConfig.DevMode
   436  	if agentConfig.Region != "" {
   437  		conf.Region = agentConfig.Region
   438  	}
   439  	if agentConfig.DataDir != "" {
   440  		conf.StateDir = filepath.Join(agentConfig.DataDir, "client")
   441  		conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc")
   442  	}
   443  	if agentConfig.Client.StateDir != "" {
   444  		conf.StateDir = agentConfig.Client.StateDir
   445  	}
   446  	if agentConfig.Client.AllocDir != "" {
   447  		conf.AllocDir = agentConfig.Client.AllocDir
   448  	}
   449  	if agentConfig.Client.NetworkInterface != "" {
   450  		conf.NetworkInterface = agentConfig.Client.NetworkInterface
   451  	}
   452  	conf.ChrootEnv = agentConfig.Client.ChrootEnv
   453  	conf.Options = agentConfig.Client.Options
   454  	if agentConfig.Client.NetworkSpeed != 0 {
   455  		conf.NetworkSpeed = agentConfig.Client.NetworkSpeed
   456  	}
   457  	if agentConfig.Client.CpuCompute != 0 {
   458  		conf.CpuCompute = agentConfig.Client.CpuCompute
   459  	}
   460  	if agentConfig.Client.MemoryMB != 0 {
   461  		conf.MemoryMB = agentConfig.Client.MemoryMB
   462  	}
   463  	if agentConfig.Client.MaxKillTimeout != "" {
   464  		dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout)
   465  		if err != nil {
   466  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   467  		}
   468  		conf.MaxKillTimeout = dur
   469  	}
   470  	conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort)
   471  	conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort)
   472  	conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec
   473  	conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist
   474  	conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox
   475  
   476  	hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes))
   477  	for _, v := range agentConfig.Client.HostVolumes {
   478  		hvMap[v.Name] = v
   479  	}
   480  	conf.HostVolumes = hvMap
   481  
   482  	// Setup the node
   483  	conf.Node = new(structs.Node)
   484  	conf.Node.Datacenter = agentConfig.Datacenter
   485  	conf.Node.Name = agentConfig.NodeName
   486  	conf.Node.Meta = agentConfig.Client.Meta
   487  	conf.Node.NodeClass = agentConfig.Client.NodeClass
   488  
   489  	// Set up the HTTP advertise address
   490  	conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP
   491  
   492  	// Reserve resources on the node.
   493  	// COMPAT(0.10): Remove in 0.10
   494  	r := conf.Node.Reserved
   495  	if r == nil {
   496  		r = new(structs.Resources)
   497  		conf.Node.Reserved = r
   498  	}
   499  	r.CPU = agentConfig.Client.Reserved.CPU
   500  	r.MemoryMB = agentConfig.Client.Reserved.MemoryMB
   501  	r.DiskMB = agentConfig.Client.Reserved.DiskMB
   502  
   503  	res := conf.Node.ReservedResources
   504  	if res == nil {
   505  		res = new(structs.NodeReservedResources)
   506  		conf.Node.ReservedResources = res
   507  	}
   508  	res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU)
   509  	res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB)
   510  	res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB)
   511  	res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts
   512  
   513  	conf.Version = agentConfig.Version
   514  
   515  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" {
   516  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   517  	}
   518  
   519  	conf.ConsulConfig = agentConfig.Consul
   520  	conf.VaultConfig = agentConfig.Vault
   521  
   522  	// Set up Telemetry configuration
   523  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   524  	conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
   525  	conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
   526  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   527  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   528  
   529  	// Set the TLS related configs
   530  	conf.TLSConfig = agentConfig.TLSConfig
   531  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   532  
   533  	// Set the GC related configs
   534  	conf.GCInterval = agentConfig.Client.GCInterval
   535  	conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys
   536  	conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold
   537  	conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold
   538  	conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs
   539  	if agentConfig.Client.NoHostUUID != nil {
   540  		conf.NoHostUUID = *agentConfig.Client.NoHostUUID
   541  	} else {
   542  		// Default no_host_uuid to true
   543  		conf.NoHostUUID = true
   544  	}
   545  
   546  	// Setup the ACLs
   547  	conf.ACLEnabled = agentConfig.ACL.Enabled
   548  	conf.ACLTokenTTL = agentConfig.ACL.TokenTTL
   549  	conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL
   550  
   551  	// Setup networking configration
   552  	conf.CNIPath = agentConfig.Client.CNIPath
   553  	conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName
   554  	conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet
   555  
   556  	return conf, nil
   557  }
   558  
   559  // setupServer is used to setup the server if enabled
   560  func (a *Agent) setupServer() error {
   561  	if !a.config.Server.Enabled {
   562  		return nil
   563  	}
   564  
   565  	// Setup the configuration
   566  	conf, err := a.serverConfig()
   567  	if err != nil {
   568  		return fmt.Errorf("server config setup failed: %s", err)
   569  	}
   570  
   571  	// Generate a node ID and persist it if it is the first instance, otherwise
   572  	// read the persisted node ID.
   573  	if err := a.setupNodeID(conf); err != nil {
   574  		return fmt.Errorf("setting up server node ID failed: %s", err)
   575  	}
   576  
   577  	// Sets up the keyring for gossip encryption
   578  	if err := a.setupKeyrings(conf); err != nil {
   579  		return fmt.Errorf("failed to configure keyring: %v", err)
   580  	}
   581  
   582  	// Create the server
   583  	server, err := nomad.NewServer(conf, a.consulCatalog)
   584  	if err != nil {
   585  		return fmt.Errorf("server setup failed: %v", err)
   586  	}
   587  	a.server = server
   588  
   589  	// Consul check addresses default to bind but can be toggled to use advertise
   590  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   591  	serfCheckAddr := a.config.normalizedAddrs.Serf
   592  	if *a.config.Consul.ChecksUseAdvertise {
   593  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   594  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   595  	}
   596  
   597  	// Create the Nomad Server services for Consul
   598  	if *a.config.Consul.AutoAdvertise {
   599  		httpServ := &structs.Service{
   600  			Name:      a.config.Consul.ServerServiceName,
   601  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   602  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   603  		}
   604  		const isServer = true
   605  		if check := a.agentHTTPCheck(isServer); check != nil {
   606  			httpServ.Checks = []*structs.ServiceCheck{check}
   607  		}
   608  		rpcServ := &structs.Service{
   609  			Name:      a.config.Consul.ServerServiceName,
   610  			PortLabel: a.config.AdvertiseAddrs.RPC,
   611  			Tags:      append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...),
   612  			Checks: []*structs.ServiceCheck{
   613  				{
   614  					Name:      a.config.Consul.ServerRPCCheckName,
   615  					Type:      "tcp",
   616  					Interval:  serverRpcCheckInterval,
   617  					Timeout:   serverRpcCheckTimeout,
   618  					PortLabel: rpcCheckAddr,
   619  				},
   620  			},
   621  		}
   622  		serfServ := &structs.Service{
   623  			Name:      a.config.Consul.ServerServiceName,
   624  			PortLabel: a.config.AdvertiseAddrs.Serf,
   625  			Tags:      append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...),
   626  			Checks: []*structs.ServiceCheck{
   627  				{
   628  					Name:      a.config.Consul.ServerSerfCheckName,
   629  					Type:      "tcp",
   630  					Interval:  serverSerfCheckInterval,
   631  					Timeout:   serverSerfCheckTimeout,
   632  					PortLabel: serfCheckAddr,
   633  				},
   634  			},
   635  		}
   636  
   637  		// Add the http port check if TLS isn't enabled
   638  		consulServices := []*structs.Service{
   639  			rpcServ,
   640  			serfServ,
   641  			httpServ,
   642  		}
   643  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   644  			return err
   645  		}
   646  	}
   647  
   648  	return nil
   649  }
   650  
   651  // setupNodeID will pull the persisted node ID, if any, or create a random one
   652  // and persist it.
   653  func (a *Agent) setupNodeID(config *nomad.Config) error {
   654  	// For dev mode we have no filesystem access so just make a node ID.
   655  	if a.config.DevMode {
   656  		config.NodeID = uuid.Generate()
   657  		return nil
   658  	}
   659  
   660  	// Load saved state, if any. Since a user could edit this, we also
   661  	// validate it. Saved state overwrites any configured node id
   662  	fileID := filepath.Join(config.DataDir, "node-id")
   663  	if _, err := os.Stat(fileID); err == nil {
   664  		rawID, err := ioutil.ReadFile(fileID)
   665  		if err != nil {
   666  			return err
   667  		}
   668  
   669  		nodeID := strings.TrimSpace(string(rawID))
   670  		nodeID = strings.ToLower(nodeID)
   671  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   672  			return err
   673  		}
   674  		config.NodeID = nodeID
   675  		return nil
   676  	}
   677  
   678  	// If they've configured a node ID manually then just use that, as
   679  	// long as it's valid.
   680  	if config.NodeID != "" {
   681  		config.NodeID = strings.ToLower(config.NodeID)
   682  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   683  			return err
   684  		}
   685  		// Persist this configured nodeID to our data directory
   686  		if err := lib.EnsurePath(fileID, false); err != nil {
   687  			return err
   688  		}
   689  		if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   690  			return err
   691  		}
   692  		return nil
   693  	}
   694  
   695  	// If we still don't have a valid node ID, make one.
   696  	if config.NodeID == "" {
   697  		id := uuid.Generate()
   698  		if err := lib.EnsurePath(fileID, false); err != nil {
   699  			return err
   700  		}
   701  		if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil {
   702  			return err
   703  		}
   704  
   705  		config.NodeID = id
   706  	}
   707  	return nil
   708  }
   709  
   710  // setupKeyrings is used to initialize and load keyrings during agent startup
   711  func (a *Agent) setupKeyrings(config *nomad.Config) error {
   712  	file := filepath.Join(a.config.DataDir, serfKeyring)
   713  
   714  	if a.config.Server.EncryptKey == "" {
   715  		goto LOAD
   716  	}
   717  	if _, err := os.Stat(file); err != nil {
   718  		if err := initKeyring(file, a.config.Server.EncryptKey); err != nil {
   719  			return err
   720  		}
   721  	}
   722  
   723  LOAD:
   724  	if _, err := os.Stat(file); err == nil {
   725  		config.SerfConfig.KeyringFile = file
   726  	}
   727  	if err := loadKeyringFile(config.SerfConfig); err != nil {
   728  		return err
   729  	}
   730  	// Success!
   731  	return nil
   732  }
   733  
   734  // setupClient is used to setup the client if enabled
   735  func (a *Agent) setupClient() error {
   736  	if !a.config.Client.Enabled {
   737  		return nil
   738  	}
   739  
   740  	// Setup the configuration
   741  	conf, err := a.clientConfig()
   742  	if err != nil {
   743  		return fmt.Errorf("client setup failed: %v", err)
   744  	}
   745  
   746  	// Reserve some ports for the plugins if we are on Windows
   747  	if runtime.GOOS == "windows" {
   748  		if err := a.reservePortsForClient(conf); err != nil {
   749  			return err
   750  		}
   751  	}
   752  	if conf.StateDBFactory == nil {
   753  		conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode)
   754  	}
   755  
   756  	client, err := client.NewClient(conf, a.consulCatalog, a.consulService)
   757  	if err != nil {
   758  		return fmt.Errorf("client setup failed: %v", err)
   759  	}
   760  	a.client = client
   761  
   762  	// Create the Nomad Client  services for Consul
   763  	if *a.config.Consul.AutoAdvertise {
   764  		httpServ := &structs.Service{
   765  			Name:      a.config.Consul.ClientServiceName,
   766  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   767  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   768  		}
   769  		const isServer = false
   770  		if check := a.agentHTTPCheck(isServer); check != nil {
   771  			httpServ.Checks = []*structs.ServiceCheck{check}
   772  		}
   773  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
   774  			return err
   775  		}
   776  	}
   777  
   778  	return nil
   779  }
   780  
   781  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
   782  // If no HTTP health check can be supported nil is returned.
   783  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
   784  	// Resolve the http check address
   785  	httpCheckAddr := a.config.normalizedAddrs.HTTP
   786  	if *a.config.Consul.ChecksUseAdvertise {
   787  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
   788  	}
   789  	check := structs.ServiceCheck{
   790  		Name:      a.config.Consul.ClientHTTPCheckName,
   791  		Type:      "http",
   792  		Path:      "/v1/agent/health?type=client",
   793  		Protocol:  "http",
   794  		Interval:  agentHttpCheckInterval,
   795  		Timeout:   agentHttpCheckTimeout,
   796  		PortLabel: httpCheckAddr,
   797  	}
   798  	// Switch to endpoint that doesn't require a leader for servers
   799  	if server {
   800  		check.Name = a.config.Consul.ServerHTTPCheckName
   801  		check.Path = "/v1/agent/health?type=server"
   802  	}
   803  	if !a.config.TLSConfig.EnableHTTP {
   804  		// No HTTPS, return a plain http check
   805  		return &check
   806  	}
   807  	if a.config.TLSConfig.VerifyHTTPSClient {
   808  		a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled")
   809  		return nil
   810  	}
   811  
   812  	// HTTPS enabled; skip verification
   813  	check.Protocol = "https"
   814  	check.TLSSkipVerify = true
   815  	return &check
   816  }
   817  
   818  // reservePortsForClient reserves a range of ports for the client to use when
   819  // it creates various plugins for log collection, executors, drivers, etc
   820  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
   821  	if conf.Node.ReservedResources == nil {
   822  		conf.Node.ReservedResources = &structs.NodeReservedResources{}
   823  	}
   824  
   825  	res := conf.Node.ReservedResources.Networks.ReservedHostPorts
   826  	if res == "" {
   827  		res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
   828  	} else {
   829  		res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
   830  	}
   831  	conf.Node.ReservedResources.Networks.ReservedHostPorts = res
   832  	return nil
   833  }
   834  
   835  // Leave is used gracefully exit. Clients will inform servers
   836  // of their departure so that allocations can be rescheduled.
   837  func (a *Agent) Leave() error {
   838  	if a.client != nil {
   839  		if err := a.client.Leave(); err != nil {
   840  			a.logger.Error("client leave failed", "error", err)
   841  		}
   842  	}
   843  	if a.server != nil {
   844  		if err := a.server.Leave(); err != nil {
   845  			a.logger.Error("server leave failed", "error", err)
   846  		}
   847  	}
   848  	return nil
   849  }
   850  
   851  // Shutdown is used to terminate the agent.
   852  func (a *Agent) Shutdown() error {
   853  	a.shutdownLock.Lock()
   854  	defer a.shutdownLock.Unlock()
   855  
   856  	if a.shutdown {
   857  		return nil
   858  	}
   859  
   860  	a.logger.Info("requesting shutdown")
   861  	if a.client != nil {
   862  		if err := a.client.Shutdown(); err != nil {
   863  			a.logger.Error("client shutdown failed", "error", err)
   864  		}
   865  	}
   866  	if a.server != nil {
   867  		if err := a.server.Shutdown(); err != nil {
   868  			a.logger.Error("server shutdown failed", "error", err)
   869  		}
   870  	}
   871  
   872  	if err := a.consulService.Shutdown(); err != nil {
   873  		a.logger.Error("shutting down Consul client failed", "error", err)
   874  	}
   875  
   876  	a.logger.Info("shutdown complete")
   877  	a.shutdown = true
   878  	close(a.shutdownCh)
   879  	return nil
   880  }
   881  
   882  // RPC is used to make an RPC call to the Nomad servers
   883  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
   884  	if a.server != nil {
   885  		return a.server.RPC(method, args, reply)
   886  	}
   887  	return a.client.RPC(method, args, reply)
   888  }
   889  
   890  // Client returns the configured client or nil
   891  func (a *Agent) Client() *client.Client {
   892  	return a.client
   893  }
   894  
   895  // Server returns the configured server or nil
   896  func (a *Agent) Server() *nomad.Server {
   897  	return a.server
   898  }
   899  
   900  // Stats is used to return statistics for debugging and insight
   901  // for various sub-systems
   902  func (a *Agent) Stats() map[string]map[string]string {
   903  	stats := make(map[string]map[string]string)
   904  	if a.server != nil {
   905  		subStat := a.server.Stats()
   906  		for k, v := range subStat {
   907  			stats[k] = v
   908  		}
   909  	}
   910  	if a.client != nil {
   911  		subStat := a.client.Stats()
   912  		for k, v := range subStat {
   913  			stats[k] = v
   914  		}
   915  	}
   916  	return stats
   917  }
   918  
   919  // ShouldReload determines if we should reload the configuration and agent
   920  // connections. If the TLS Configuration has not changed, we shouldn't reload.
   921  func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) {
   922  	a.configLock.Lock()
   923  	defer a.configLock.Unlock()
   924  
   925  	if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel {
   926  		agent = true
   927  	}
   928  
   929  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
   930  	if err != nil {
   931  		a.logger.Error("parsing TLS certificate", "error", err)
   932  		return agent, false
   933  	} else if !isEqual {
   934  		return true, true
   935  	}
   936  
   937  	// Allow the ability to only reload HTTP connections
   938  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
   939  		http = true
   940  		agent = true
   941  	}
   942  
   943  	// Allow the ability to only reload HTTP connections
   944  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
   945  		agent = true
   946  	}
   947  
   948  	return agent, http
   949  }
   950  
   951  // Reload handles configuration changes for the agent. Provides a method that
   952  // is easier to unit test, as this action is invoked via SIGHUP.
   953  func (a *Agent) Reload(newConfig *Config) error {
   954  	a.configLock.Lock()
   955  	defer a.configLock.Unlock()
   956  
   957  	updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel)
   958  
   959  	if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging {
   960  		return fmt.Errorf("cannot reload agent with nil configuration")
   961  	}
   962  
   963  	if updatedLogging {
   964  		a.config.LogLevel = newConfig.LogLevel
   965  		a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel))
   966  	}
   967  
   968  	fullUpdateTLSConfig := func() {
   969  		// Completely reload the agent's TLS configuration (moving from non-TLS to
   970  		// TLS, or vice versa)
   971  		// This does not handle errors in loading the new TLS configuration
   972  		a.config.TLSConfig = newConfig.TLSConfig.Copy()
   973  	}
   974  
   975  	if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
   976  		// This is just a TLS configuration reload, we don't need to refresh
   977  		// existing network connections
   978  
   979  		// Reload the certificates on the keyloader and on success store the
   980  		// updated TLS config. It is important to reuse the same keyloader
   981  		// as this allows us to dynamically reload configurations not only
   982  		// on the Agent but on the Server and Client too (they are
   983  		// referencing the same keyloader).
   984  		keyloader := a.config.TLSConfig.GetKeyLoader()
   985  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
   986  		if err != nil {
   987  			return err
   988  		}
   989  		a.config.TLSConfig = newConfig.TLSConfig
   990  		a.config.TLSConfig.KeyLoader = keyloader
   991  		return nil
   992  	} else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() {
   993  		a.logger.Warn("downgrading agent's existing TLS configuration to plaintext")
   994  		fullUpdateTLSConfig()
   995  	} else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() {
   996  		a.logger.Info("upgrading from plaintext configuration to TLS")
   997  		fullUpdateTLSConfig()
   998  	}
   999  
  1000  	return nil
  1001  }
  1002  
  1003  // GetConfig creates a locked reference to the agent's config
  1004  func (a *Agent) GetConfig() *Config {
  1005  	a.configLock.Lock()
  1006  	defer a.configLock.Unlock()
  1007  
  1008  	return a.config
  1009  }
  1010  
  1011  // setupConsul creates the Consul client and starts its main Run loop.
  1012  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
  1013  	apiConf, err := consulConfig.ApiConfig()
  1014  	if err != nil {
  1015  		return err
  1016  	}
  1017  	client, err := api.NewClient(apiConf)
  1018  	if err != nil {
  1019  		return err
  1020  	}
  1021  
  1022  	// Determine version for TLSSkipVerify
  1023  
  1024  	// Create Consul Catalog client for service discovery.
  1025  	a.consulCatalog = client.Catalog()
  1026  
  1027  	// Create Consul Service client for service advertisement and checks.
  1028  	isClient := false
  1029  	if a.config.Client != nil && a.config.Client.Enabled {
  1030  		isClient = true
  1031  	}
  1032  	a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient)
  1033  
  1034  	// Run the Consul service client's sync'ing main loop
  1035  	go a.consulService.Run()
  1036  	return nil
  1037  }