github.com/superfly/nomad@v0.10.5-fly/command/agent/agent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	golog "log"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/consul/lib"
    20  	log "github.com/hashicorp/go-hclog"
    21  	uuidparse "github.com/hashicorp/go-uuid"
    22  	"github.com/hashicorp/nomad/client"
    23  	clientconfig "github.com/hashicorp/nomad/client/config"
    24  	"github.com/hashicorp/nomad/client/state"
    25  	"github.com/hashicorp/nomad/command/agent/consul"
    26  	"github.com/hashicorp/nomad/helper/pluginutils/loader"
    27  	"github.com/hashicorp/nomad/helper/uuid"
    28  	"github.com/hashicorp/nomad/nomad"
    29  	"github.com/hashicorp/nomad/nomad/structs"
    30  	"github.com/hashicorp/nomad/nomad/structs/config"
    31  	"github.com/hashicorp/raft"
    32  )
    33  
    34  const (
    35  	agentHttpCheckInterval  = 10 * time.Second
    36  	agentHttpCheckTimeout   = 5 * time.Second
    37  	serverRpcCheckInterval  = 10 * time.Second
    38  	serverRpcCheckTimeout   = 3 * time.Second
    39  	serverSerfCheckInterval = 10 * time.Second
    40  	serverSerfCheckTimeout  = 3 * time.Second
    41  
    42  	// roles used in identifying Consul entries for Nomad agents
    43  	consulRoleServer = "server"
    44  	consulRoleClient = "client"
    45  )
    46  
    47  // Agent is a long running daemon that is used to run both
    48  // clients and servers. Servers are responsible for managing
    49  // state and making scheduling decisions. Clients can be
    50  // scheduled to, and are responsible for interfacing with
    51  // servers to run allocations.
    52  type Agent struct {
    53  	config     *Config
    54  	configLock sync.Mutex
    55  
    56  	logger     log.InterceptLogger
    57  	httpLogger log.Logger
    58  	logOutput  io.Writer
    59  
    60  	// consulService is Nomad's custom Consul client for managing services
    61  	// and checks.
    62  	consulService *consul.ServiceClient
    63  
    64  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    65  	consulCatalog consul.CatalogAPI
    66  
    67  	// consulACLs is Nomad's subset of Consul's ACL API Nomad uses.
    68  	consulACLs consul.ACLsAPI
    69  
    70  	// client is the launched Nomad Client. Can be nil if the agent isn't
    71  	// configured to run a client.
    72  	client *client.Client
    73  
    74  	// server is the launched Nomad Server. Can be nil if the agent isn't
    75  	// configured to run a server.
    76  	server *nomad.Server
    77  
    78  	// pluginLoader is used to load plugins
    79  	pluginLoader loader.PluginCatalog
    80  
    81  	// pluginSingletonLoader is a plugin loader that will returns singleton
    82  	// instances of the plugins.
    83  	pluginSingletonLoader loader.PluginCatalog
    84  
    85  	shutdown     bool
    86  	shutdownCh   chan struct{}
    87  	shutdownLock sync.Mutex
    88  
    89  	InmemSink *metrics.InmemSink
    90  }
    91  
    92  // NewAgent is used to create a new agent with the given configuration
    93  func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
    94  	a := &Agent{
    95  		config:     config,
    96  		logOutput:  logOutput,
    97  		shutdownCh: make(chan struct{}),
    98  		InmemSink:  inmem,
    99  	}
   100  
   101  	// Create the loggers
   102  	a.logger = logger
   103  	a.httpLogger = a.logger.ResetNamed("http")
   104  
   105  	// Global logger should match internal logger as much as possible
   106  	golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds)
   107  
   108  	if err := a.setupConsul(config.Consul); err != nil {
   109  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
   110  	}
   111  
   112  	if err := a.setupPlugins(); err != nil {
   113  		return nil, err
   114  	}
   115  
   116  	if err := a.setupServer(); err != nil {
   117  		return nil, err
   118  	}
   119  	if err := a.setupClient(); err != nil {
   120  		return nil, err
   121  	}
   122  	if a.client == nil && a.server == nil {
   123  		return nil, fmt.Errorf("must have at least client or server mode enabled")
   124  	}
   125  
   126  	return a, nil
   127  }
   128  
   129  // convertServerConfig takes an agent config and log output and returns a Nomad
   130  // Config. There may be missing fields that must be set by the agent. To do this
   131  // call finalizeServerConfig
   132  func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
   133  	conf := agentConfig.NomadConfig
   134  	if conf == nil {
   135  		conf = nomad.DefaultConfig()
   136  	}
   137  	conf.DevMode = agentConfig.DevMode
   138  	conf.EnableDebug = agentConfig.EnableDebug
   139  
   140  	conf.Build = agentConfig.Version.VersionNumber()
   141  	if agentConfig.Region != "" {
   142  		conf.Region = agentConfig.Region
   143  	}
   144  
   145  	// Set the Authoritative Region if set, otherwise default to
   146  	// the same as the local region.
   147  	if agentConfig.Server.AuthoritativeRegion != "" {
   148  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   149  	} else if agentConfig.Region != "" {
   150  		conf.AuthoritativeRegion = agentConfig.Region
   151  	}
   152  
   153  	if agentConfig.Datacenter != "" {
   154  		conf.Datacenter = agentConfig.Datacenter
   155  	}
   156  	if agentConfig.NodeName != "" {
   157  		conf.NodeName = agentConfig.NodeName
   158  	}
   159  	if agentConfig.Server.BootstrapExpect > 0 {
   160  		if agentConfig.Server.BootstrapExpect == 1 {
   161  			conf.Bootstrap = true
   162  		} else {
   163  			atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect))
   164  		}
   165  	}
   166  	if agentConfig.DataDir != "" {
   167  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   168  	}
   169  	if agentConfig.Server.DataDir != "" {
   170  		conf.DataDir = agentConfig.Server.DataDir
   171  	}
   172  	if agentConfig.Server.ProtocolVersion != 0 {
   173  		conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion)
   174  	}
   175  	if agentConfig.Server.RaftProtocol != 0 {
   176  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   177  	}
   178  	if agentConfig.Server.NumSchedulers != nil {
   179  		conf.NumSchedulers = *agentConfig.Server.NumSchedulers
   180  	}
   181  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   182  		// Convert to a set and require the core scheduler
   183  		set := make(map[string]struct{}, 4)
   184  		set[structs.JobTypeCore] = struct{}{}
   185  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   186  			set[sched] = struct{}{}
   187  		}
   188  
   189  		schedulers := make([]string, 0, len(set))
   190  		for k := range set {
   191  			schedulers = append(schedulers, k)
   192  		}
   193  
   194  		conf.EnabledSchedulers = schedulers
   195  
   196  	}
   197  	if agentConfig.ACL.Enabled {
   198  		conf.ACLEnabled = true
   199  	}
   200  	if agentConfig.ACL.ReplicationToken != "" {
   201  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   202  	}
   203  	if agentConfig.Sentinel != nil {
   204  		conf.SentinelConfig = agentConfig.Sentinel
   205  	}
   206  	if agentConfig.Server.NonVotingServer {
   207  		conf.NonVoter = true
   208  	}
   209  	if agentConfig.Server.RedundancyZone != "" {
   210  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   211  	}
   212  	if agentConfig.Server.UpgradeVersion != "" {
   213  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   214  	}
   215  	if agentConfig.Autopilot != nil {
   216  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   217  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   218  		}
   219  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   220  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   221  		}
   222  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   223  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   224  		}
   225  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   226  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   227  		}
   228  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   229  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   230  		}
   231  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   232  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   233  		}
   234  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   235  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   236  		}
   237  	}
   238  
   239  	// Set up the bind addresses
   240  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   241  	if err != nil {
   242  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   243  	}
   244  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   245  	if err != nil {
   246  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   247  	}
   248  	conf.RPCAddr.Port = rpcAddr.Port
   249  	conf.RPCAddr.IP = rpcAddr.IP
   250  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   251  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   252  
   253  	// Set up the advertise addresses
   254  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   255  	if err != nil {
   256  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   257  	}
   258  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   259  	if err != nil {
   260  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   261  	}
   262  
   263  	// Server address is the serf advertise address and rpc port. This is the
   264  	// address that all servers should be able to communicate over RPC with.
   265  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   266  	if err != nil {
   267  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   268  	}
   269  
   270  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   271  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   272  	conf.ClientRPCAdvertise = rpcAddr
   273  	conf.ServerRPCAdvertise = serverAddr
   274  
   275  	// Set up gc threshold and heartbeat grace period
   276  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   277  		dur, err := time.ParseDuration(gcThreshold)
   278  		if err != nil {
   279  			return nil, err
   280  		}
   281  		conf.NodeGCThreshold = dur
   282  	}
   283  	if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" {
   284  		dur, err := time.ParseDuration(gcInterval)
   285  		if err != nil {
   286  			return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err)
   287  		} else if dur <= time.Duration(0) {
   288  			return nil, fmt.Errorf("job_gc_interval should be greater than 0s")
   289  		}
   290  		conf.JobGCInterval = dur
   291  	}
   292  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   293  		dur, err := time.ParseDuration(gcThreshold)
   294  		if err != nil {
   295  			return nil, err
   296  		}
   297  		conf.JobGCThreshold = dur
   298  	}
   299  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   300  		dur, err := time.ParseDuration(gcThreshold)
   301  		if err != nil {
   302  			return nil, err
   303  		}
   304  		conf.EvalGCThreshold = dur
   305  	}
   306  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   307  		dur, err := time.ParseDuration(gcThreshold)
   308  		if err != nil {
   309  			return nil, err
   310  		}
   311  		conf.DeploymentGCThreshold = dur
   312  	}
   313  
   314  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   315  		conf.HeartbeatGrace = heartbeatGrace
   316  	}
   317  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   318  		conf.MinHeartbeatTTL = min
   319  	}
   320  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   321  		conf.MaxHeartbeatsPerSecond = maxHPS
   322  	}
   323  
   324  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   325  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   326  	}
   327  
   328  	// handle system scheduler preemption default
   329  	if agentConfig.Server.DefaultSchedulerConfig != nil {
   330  		conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig
   331  	}
   332  
   333  	// Add the Consul and Vault configs
   334  	conf.ConsulConfig = agentConfig.Consul
   335  	conf.VaultConfig = agentConfig.Vault
   336  
   337  	// Set the TLS config
   338  	conf.TLSConfig = agentConfig.TLSConfig
   339  
   340  	// Setup telemetry related config
   341  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   342  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   343  	conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics
   344  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   345  
   346  	// Parse Limits timeout from a string into durations
   347  	if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil {
   348  		return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err)
   349  	} else if d < 0 {
   350  		return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0")
   351  	} else {
   352  		conf.RPCHandshakeTimeout = d
   353  	}
   354  
   355  	// Set max rpc conns; nil/0 == unlimited
   356  	// Leave a little room for streaming RPCs
   357  	minLimit := config.LimitsNonStreamingConnsPerClient + 5
   358  	if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 {
   359  		conf.RPCMaxConnsPerClient = 0
   360  	} else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit {
   361  		return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit)
   362  	} else {
   363  		conf.RPCMaxConnsPerClient = limit
   364  	}
   365  
   366  	return conf, nil
   367  }
   368  
   369  // serverConfig is used to generate a new server configuration struct
   370  // for initializing a nomad server.
   371  func (a *Agent) serverConfig() (*nomad.Config, error) {
   372  	c, err := convertServerConfig(a.config)
   373  	if err != nil {
   374  		return nil, err
   375  	}
   376  
   377  	a.finalizeServerConfig(c)
   378  	return c, nil
   379  }
   380  
   381  // finalizeServerConfig sets configuration fields on the server config that are
   382  // not staticly convertable and are from the agent.
   383  func (a *Agent) finalizeServerConfig(c *nomad.Config) {
   384  	// Setup the logging
   385  	c.Logger = a.logger
   386  	c.LogOutput = a.logOutput
   387  
   388  	// Setup the plugin loaders
   389  	c.PluginLoader = a.pluginLoader
   390  	c.PluginSingletonLoader = a.pluginSingletonLoader
   391  }
   392  
   393  // clientConfig is used to generate a new client configuration struct for
   394  // initializing a Nomad client.
   395  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   396  	c, err := convertClientConfig(a.config)
   397  	if err != nil {
   398  		return nil, err
   399  	}
   400  
   401  	if err := a.finalizeClientConfig(c); err != nil {
   402  		return nil, err
   403  	}
   404  
   405  	return c, nil
   406  }
   407  
   408  // finalizeClientConfig sets configuration fields on the client config that are
   409  // not staticly convertable and are from the agent.
   410  func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error {
   411  	// Setup the logging
   412  	c.Logger = a.logger
   413  	c.LogOutput = a.logOutput
   414  
   415  	// If we are running a server, append both its bind and advertise address so
   416  	// we are able to at least talk to the local server even if that isn't
   417  	// configured explicitly. This handles both running server and client on one
   418  	// host and -dev mode.
   419  	if a.server != nil {
   420  		if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" {
   421  			return fmt.Errorf("AdvertiseAddrs is nil or empty")
   422  		} else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" {
   423  			return fmt.Errorf("normalizedAddrs is nil or empty")
   424  		}
   425  
   426  		c.Servers = append(c.Servers,
   427  			a.config.normalizedAddrs.RPC,
   428  			a.config.AdvertiseAddrs.RPC)
   429  	}
   430  
   431  	// Setup the plugin loaders
   432  	c.PluginLoader = a.pluginLoader
   433  	c.PluginSingletonLoader = a.pluginSingletonLoader
   434  
   435  	// Log deprecation messages about Consul related configuration in client
   436  	// options
   437  	var invalidConsulKeys []string
   438  	for key := range c.Options {
   439  		if strings.HasPrefix(key, "consul") {
   440  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   441  		}
   442  	}
   443  	if len(invalidConsulKeys) > 0 {
   444  		a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ","))
   445  		a.logger.Warn(`Nomad client ignores consul related configuration in client options.
   446  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   447  		to configure Nomad to work with Consul.`)
   448  	}
   449  
   450  	return nil
   451  }
   452  
   453  // convertClientConfig takes an agent config and log output and returns a client
   454  // Config. There may be missing fields that must be set by the agent. To do this
   455  // call finalizeServerConfig
   456  func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
   457  	// Setup the configuration
   458  	conf := agentConfig.ClientConfig
   459  	if conf == nil {
   460  		conf = clientconfig.DefaultConfig()
   461  	}
   462  
   463  	conf.Servers = agentConfig.Client.Servers
   464  	conf.LogLevel = agentConfig.LogLevel
   465  	conf.DevMode = agentConfig.DevMode
   466  	conf.EnableDebug = agentConfig.EnableDebug
   467  
   468  	if agentConfig.Region != "" {
   469  		conf.Region = agentConfig.Region
   470  	}
   471  	if agentConfig.DataDir != "" {
   472  		conf.StateDir = filepath.Join(agentConfig.DataDir, "client")
   473  		conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc")
   474  	}
   475  	if agentConfig.Client.StateDir != "" {
   476  		conf.StateDir = agentConfig.Client.StateDir
   477  	}
   478  	if agentConfig.Client.AllocDir != "" {
   479  		conf.AllocDir = agentConfig.Client.AllocDir
   480  	}
   481  	if agentConfig.Client.NetworkInterface != "" {
   482  		conf.NetworkInterface = agentConfig.Client.NetworkInterface
   483  	}
   484  	conf.ChrootEnv = agentConfig.Client.ChrootEnv
   485  	conf.Options = agentConfig.Client.Options
   486  	if agentConfig.Client.NetworkSpeed != 0 {
   487  		conf.NetworkSpeed = agentConfig.Client.NetworkSpeed
   488  	}
   489  	if agentConfig.Client.CpuCompute != 0 {
   490  		conf.CpuCompute = agentConfig.Client.CpuCompute
   491  	}
   492  	if agentConfig.Client.MemoryMB != 0 {
   493  		conf.MemoryMB = agentConfig.Client.MemoryMB
   494  	}
   495  	if agentConfig.Client.MaxKillTimeout != "" {
   496  		dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout)
   497  		if err != nil {
   498  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   499  		}
   500  		conf.MaxKillTimeout = dur
   501  	}
   502  	conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort)
   503  	conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort)
   504  	conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec
   505  	conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist
   506  	conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox
   507  
   508  	hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes))
   509  	for _, v := range agentConfig.Client.HostVolumes {
   510  		hvMap[v.Name] = v
   511  	}
   512  	conf.HostVolumes = hvMap
   513  
   514  	// Setup the node
   515  	conf.Node = new(structs.Node)
   516  	conf.Node.Datacenter = agentConfig.Datacenter
   517  	conf.Node.Name = agentConfig.NodeName
   518  	conf.Node.Meta = agentConfig.Client.Meta
   519  	conf.Node.NodeClass = agentConfig.Client.NodeClass
   520  
   521  	// Set up the HTTP advertise address
   522  	conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP
   523  
   524  	// Reserve resources on the node.
   525  	// COMPAT(0.10): Remove in 0.10
   526  	r := conf.Node.Reserved
   527  	if r == nil {
   528  		r = new(structs.Resources)
   529  		conf.Node.Reserved = r
   530  	}
   531  	r.CPU = agentConfig.Client.Reserved.CPU
   532  	r.MemoryMB = agentConfig.Client.Reserved.MemoryMB
   533  	r.DiskMB = agentConfig.Client.Reserved.DiskMB
   534  
   535  	res := conf.Node.ReservedResources
   536  	if res == nil {
   537  		res = new(structs.NodeReservedResources)
   538  		conf.Node.ReservedResources = res
   539  	}
   540  	res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU)
   541  	res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB)
   542  	res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB)
   543  	res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts
   544  
   545  	conf.Version = agentConfig.Version
   546  
   547  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" {
   548  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   549  	}
   550  
   551  	conf.ConsulConfig = agentConfig.Consul
   552  	conf.VaultConfig = agentConfig.Vault
   553  
   554  	// Set up Telemetry configuration
   555  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   556  	conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
   557  	conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
   558  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   559  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   560  
   561  	// Set the TLS related configs
   562  	conf.TLSConfig = agentConfig.TLSConfig
   563  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   564  
   565  	// Set the GC related configs
   566  	conf.GCInterval = agentConfig.Client.GCInterval
   567  	conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys
   568  	conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold
   569  	conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold
   570  	conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs
   571  	if agentConfig.Client.NoHostUUID != nil {
   572  		conf.NoHostUUID = *agentConfig.Client.NoHostUUID
   573  	} else {
   574  		// Default no_host_uuid to true
   575  		conf.NoHostUUID = true
   576  	}
   577  
   578  	// Setup the ACLs
   579  	conf.ACLEnabled = agentConfig.ACL.Enabled
   580  	conf.ACLTokenTTL = agentConfig.ACL.TokenTTL
   581  	conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL
   582  
   583  	// Setup networking configration
   584  	conf.CNIPath = agentConfig.Client.CNIPath
   585  	conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName
   586  	conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet
   587  
   588  	return conf, nil
   589  }
   590  
   591  // setupServer is used to setup the server if enabled
   592  func (a *Agent) setupServer() error {
   593  	if !a.config.Server.Enabled {
   594  		return nil
   595  	}
   596  
   597  	// Setup the configuration
   598  	conf, err := a.serverConfig()
   599  	if err != nil {
   600  		return fmt.Errorf("server config setup failed: %s", err)
   601  	}
   602  
   603  	// Generate a node ID and persist it if it is the first instance, otherwise
   604  	// read the persisted node ID.
   605  	if err := a.setupNodeID(conf); err != nil {
   606  		return fmt.Errorf("setting up server node ID failed: %s", err)
   607  	}
   608  
   609  	// Sets up the keyring for gossip encryption
   610  	if err := a.setupKeyrings(conf); err != nil {
   611  		return fmt.Errorf("failed to configure keyring: %v", err)
   612  	}
   613  
   614  	// Create the server
   615  	server, err := nomad.NewServer(conf, a.consulCatalog, a.consulACLs)
   616  	if err != nil {
   617  		return fmt.Errorf("server setup failed: %v", err)
   618  	}
   619  	a.server = server
   620  
   621  	// Consul check addresses default to bind but can be toggled to use advertise
   622  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   623  	serfCheckAddr := a.config.normalizedAddrs.Serf
   624  	if *a.config.Consul.ChecksUseAdvertise {
   625  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   626  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   627  	}
   628  
   629  	// Create the Nomad Server services for Consul
   630  	if *a.config.Consul.AutoAdvertise {
   631  		httpServ := &structs.Service{
   632  			Name:      a.config.Consul.ServerServiceName,
   633  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   634  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   635  		}
   636  		const isServer = true
   637  		if check := a.agentHTTPCheck(isServer); check != nil {
   638  			httpServ.Checks = []*structs.ServiceCheck{check}
   639  		}
   640  		rpcServ := &structs.Service{
   641  			Name:      a.config.Consul.ServerServiceName,
   642  			PortLabel: a.config.AdvertiseAddrs.RPC,
   643  			Tags:      append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...),
   644  			Checks: []*structs.ServiceCheck{
   645  				{
   646  					Name:      a.config.Consul.ServerRPCCheckName,
   647  					Type:      "tcp",
   648  					Interval:  serverRpcCheckInterval,
   649  					Timeout:   serverRpcCheckTimeout,
   650  					PortLabel: rpcCheckAddr,
   651  				},
   652  			},
   653  		}
   654  		serfServ := &structs.Service{
   655  			Name:      a.config.Consul.ServerServiceName,
   656  			PortLabel: a.config.AdvertiseAddrs.Serf,
   657  			Tags:      append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...),
   658  			Checks: []*structs.ServiceCheck{
   659  				{
   660  					Name:      a.config.Consul.ServerSerfCheckName,
   661  					Type:      "tcp",
   662  					Interval:  serverSerfCheckInterval,
   663  					Timeout:   serverSerfCheckTimeout,
   664  					PortLabel: serfCheckAddr,
   665  				},
   666  			},
   667  		}
   668  
   669  		// Add the http port check if TLS isn't enabled
   670  		consulServices := []*structs.Service{
   671  			rpcServ,
   672  			serfServ,
   673  			httpServ,
   674  		}
   675  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   676  			return err
   677  		}
   678  	}
   679  
   680  	return nil
   681  }
   682  
   683  // setupNodeID will pull the persisted node ID, if any, or create a random one
   684  // and persist it.
   685  func (a *Agent) setupNodeID(config *nomad.Config) error {
   686  	// For dev mode we have no filesystem access so just make a node ID.
   687  	if a.config.DevMode {
   688  		config.NodeID = uuid.Generate()
   689  		return nil
   690  	}
   691  
   692  	// Load saved state, if any. Since a user could edit this, we also
   693  	// validate it. Saved state overwrites any configured node id
   694  	fileID := filepath.Join(config.DataDir, "node-id")
   695  	if _, err := os.Stat(fileID); err == nil {
   696  		rawID, err := ioutil.ReadFile(fileID)
   697  		if err != nil {
   698  			return err
   699  		}
   700  
   701  		nodeID := strings.TrimSpace(string(rawID))
   702  		nodeID = strings.ToLower(nodeID)
   703  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   704  			return err
   705  		}
   706  		config.NodeID = nodeID
   707  		return nil
   708  	}
   709  
   710  	// If they've configured a node ID manually then just use that, as
   711  	// long as it's valid.
   712  	if config.NodeID != "" {
   713  		config.NodeID = strings.ToLower(config.NodeID)
   714  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   715  			return err
   716  		}
   717  		// Persist this configured nodeID to our data directory
   718  		if err := lib.EnsurePath(fileID, false); err != nil {
   719  			return err
   720  		}
   721  		if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   722  			return err
   723  		}
   724  		return nil
   725  	}
   726  
   727  	// If we still don't have a valid node ID, make one.
   728  	if config.NodeID == "" {
   729  		id := uuid.Generate()
   730  		if err := lib.EnsurePath(fileID, false); err != nil {
   731  			return err
   732  		}
   733  		if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil {
   734  			return err
   735  		}
   736  
   737  		config.NodeID = id
   738  	}
   739  	return nil
   740  }
   741  
   742  // setupKeyrings is used to initialize and load keyrings during agent startup
   743  func (a *Agent) setupKeyrings(config *nomad.Config) error {
   744  	file := filepath.Join(a.config.DataDir, serfKeyring)
   745  
   746  	if a.config.Server.EncryptKey == "" {
   747  		goto LOAD
   748  	}
   749  	if _, err := os.Stat(file); err != nil {
   750  		if err := initKeyring(file, a.config.Server.EncryptKey); err != nil {
   751  			return err
   752  		}
   753  	}
   754  
   755  LOAD:
   756  	if _, err := os.Stat(file); err == nil {
   757  		config.SerfConfig.KeyringFile = file
   758  	}
   759  	if err := loadKeyringFile(config.SerfConfig); err != nil {
   760  		return err
   761  	}
   762  	// Success!
   763  	return nil
   764  }
   765  
   766  // setupClient is used to setup the client if enabled
   767  func (a *Agent) setupClient() error {
   768  	if !a.config.Client.Enabled {
   769  		return nil
   770  	}
   771  
   772  	// Setup the configuration
   773  	conf, err := a.clientConfig()
   774  	if err != nil {
   775  		return fmt.Errorf("client setup failed: %v", err)
   776  	}
   777  
   778  	// Reserve some ports for the plugins if we are on Windows
   779  	if runtime.GOOS == "windows" {
   780  		if err := a.reservePortsForClient(conf); err != nil {
   781  			return err
   782  		}
   783  	}
   784  	if conf.StateDBFactory == nil {
   785  		conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode)
   786  	}
   787  
   788  	client, err := client.NewClient(conf, a.consulCatalog, a.consulService)
   789  	if err != nil {
   790  		return fmt.Errorf("client setup failed: %v", err)
   791  	}
   792  	a.client = client
   793  
   794  	// Create the Nomad Client  services for Consul
   795  	if *a.config.Consul.AutoAdvertise {
   796  		httpServ := &structs.Service{
   797  			Name:      a.config.Consul.ClientServiceName,
   798  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   799  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   800  		}
   801  		const isServer = false
   802  		if check := a.agentHTTPCheck(isServer); check != nil {
   803  			httpServ.Checks = []*structs.ServiceCheck{check}
   804  		}
   805  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
   806  			return err
   807  		}
   808  	}
   809  
   810  	return nil
   811  }
   812  
   813  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
   814  // If no HTTP health check can be supported nil is returned.
   815  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
   816  	// Resolve the http check address
   817  	httpCheckAddr := a.config.normalizedAddrs.HTTP
   818  	if *a.config.Consul.ChecksUseAdvertise {
   819  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
   820  	}
   821  	check := structs.ServiceCheck{
   822  		Name:      a.config.Consul.ClientHTTPCheckName,
   823  		Type:      "http",
   824  		Path:      "/v1/agent/health?type=client",
   825  		Protocol:  "http",
   826  		Interval:  agentHttpCheckInterval,
   827  		Timeout:   agentHttpCheckTimeout,
   828  		PortLabel: httpCheckAddr,
   829  	}
   830  	// Switch to endpoint that doesn't require a leader for servers
   831  	if server {
   832  		check.Name = a.config.Consul.ServerHTTPCheckName
   833  		check.Path = "/v1/agent/health?type=server"
   834  	}
   835  	if !a.config.TLSConfig.EnableHTTP {
   836  		// No HTTPS, return a plain http check
   837  		return &check
   838  	}
   839  	if a.config.TLSConfig.VerifyHTTPSClient {
   840  		a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled")
   841  		return nil
   842  	}
   843  
   844  	// HTTPS enabled; skip verification
   845  	check.Protocol = "https"
   846  	check.TLSSkipVerify = true
   847  	return &check
   848  }
   849  
   850  // reservePortsForClient reserves a range of ports for the client to use when
   851  // it creates various plugins for log collection, executors, drivers, etc
   852  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
   853  	if conf.Node.ReservedResources == nil {
   854  		conf.Node.ReservedResources = &structs.NodeReservedResources{}
   855  	}
   856  
   857  	res := conf.Node.ReservedResources.Networks.ReservedHostPorts
   858  	if res == "" {
   859  		res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
   860  	} else {
   861  		res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
   862  	}
   863  	conf.Node.ReservedResources.Networks.ReservedHostPorts = res
   864  	return nil
   865  }
   866  
   867  // Leave is used gracefully exit. Clients will inform servers
   868  // of their departure so that allocations can be rescheduled.
   869  func (a *Agent) Leave() error {
   870  	if a.client != nil {
   871  		if err := a.client.Leave(); err != nil {
   872  			a.logger.Error("client leave failed", "error", err)
   873  		}
   874  	}
   875  	if a.server != nil {
   876  		if err := a.server.Leave(); err != nil {
   877  			a.logger.Error("server leave failed", "error", err)
   878  		}
   879  	}
   880  	return nil
   881  }
   882  
   883  // Shutdown is used to terminate the agent.
   884  func (a *Agent) Shutdown() error {
   885  	a.shutdownLock.Lock()
   886  	defer a.shutdownLock.Unlock()
   887  
   888  	if a.shutdown {
   889  		return nil
   890  	}
   891  
   892  	a.logger.Info("requesting shutdown")
   893  	if a.client != nil {
   894  		if err := a.client.Shutdown(); err != nil {
   895  			a.logger.Error("client shutdown failed", "error", err)
   896  		}
   897  	}
   898  	if a.server != nil {
   899  		if err := a.server.Shutdown(); err != nil {
   900  			a.logger.Error("server shutdown failed", "error", err)
   901  		}
   902  	}
   903  
   904  	if err := a.consulService.Shutdown(); err != nil {
   905  		a.logger.Error("shutting down Consul client failed", "error", err)
   906  	}
   907  
   908  	a.logger.Info("shutdown complete")
   909  	a.shutdown = true
   910  	close(a.shutdownCh)
   911  	return nil
   912  }
   913  
   914  // RPC is used to make an RPC call to the Nomad servers
   915  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
   916  	if a.server != nil {
   917  		return a.server.RPC(method, args, reply)
   918  	}
   919  	return a.client.RPC(method, args, reply)
   920  }
   921  
   922  // Client returns the configured client or nil
   923  func (a *Agent) Client() *client.Client {
   924  	return a.client
   925  }
   926  
   927  // Server returns the configured server or nil
   928  func (a *Agent) Server() *nomad.Server {
   929  	return a.server
   930  }
   931  
   932  // Stats is used to return statistics for debugging and insight
   933  // for various sub-systems
   934  func (a *Agent) Stats() map[string]map[string]string {
   935  	stats := make(map[string]map[string]string)
   936  	if a.server != nil {
   937  		subStat := a.server.Stats()
   938  		for k, v := range subStat {
   939  			stats[k] = v
   940  		}
   941  	}
   942  	if a.client != nil {
   943  		subStat := a.client.Stats()
   944  		for k, v := range subStat {
   945  			stats[k] = v
   946  		}
   947  	}
   948  	return stats
   949  }
   950  
   951  // ShouldReload determines if we should reload the configuration and agent
   952  // connections. If the TLS Configuration has not changed, we shouldn't reload.
   953  func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) {
   954  	a.configLock.Lock()
   955  	defer a.configLock.Unlock()
   956  
   957  	if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel {
   958  		agent = true
   959  	}
   960  
   961  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
   962  	if err != nil {
   963  		a.logger.Error("parsing TLS certificate", "error", err)
   964  		return agent, false
   965  	} else if !isEqual {
   966  		return true, true
   967  	}
   968  
   969  	// Allow the ability to only reload HTTP connections
   970  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
   971  		http = true
   972  		agent = true
   973  	}
   974  
   975  	// Allow the ability to only reload HTTP connections
   976  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
   977  		agent = true
   978  	}
   979  
   980  	return agent, http
   981  }
   982  
   983  // Reload handles configuration changes for the agent. Provides a method that
   984  // is easier to unit test, as this action is invoked via SIGHUP.
   985  func (a *Agent) Reload(newConfig *Config) error {
   986  	a.configLock.Lock()
   987  	defer a.configLock.Unlock()
   988  
   989  	updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel)
   990  
   991  	if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging {
   992  		return fmt.Errorf("cannot reload agent with nil configuration")
   993  	}
   994  
   995  	if updatedLogging {
   996  		a.config.LogLevel = newConfig.LogLevel
   997  		a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel))
   998  	}
   999  
  1000  	fullUpdateTLSConfig := func() {
  1001  		// Completely reload the agent's TLS configuration (moving from non-TLS to
  1002  		// TLS, or vice versa)
  1003  		// This does not handle errors in loading the new TLS configuration
  1004  		a.config.TLSConfig = newConfig.TLSConfig.Copy()
  1005  	}
  1006  
  1007  	if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
  1008  		// This is just a TLS configuration reload, we don't need to refresh
  1009  		// existing network connections
  1010  
  1011  		// Reload the certificates on the keyloader and on success store the
  1012  		// updated TLS config. It is important to reuse the same keyloader
  1013  		// as this allows us to dynamically reload configurations not only
  1014  		// on the Agent but on the Server and Client too (they are
  1015  		// referencing the same keyloader).
  1016  		keyloader := a.config.TLSConfig.GetKeyLoader()
  1017  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
  1018  		if err != nil {
  1019  			return err
  1020  		}
  1021  		a.config.TLSConfig = newConfig.TLSConfig
  1022  		a.config.TLSConfig.KeyLoader = keyloader
  1023  		return nil
  1024  	} else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() {
  1025  		a.logger.Warn("downgrading agent's existing TLS configuration to plaintext")
  1026  		fullUpdateTLSConfig()
  1027  	} else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() {
  1028  		a.logger.Info("upgrading from plaintext configuration to TLS")
  1029  		fullUpdateTLSConfig()
  1030  	}
  1031  
  1032  	return nil
  1033  }
  1034  
  1035  // GetConfig creates a locked reference to the agent's config
  1036  func (a *Agent) GetConfig() *Config {
  1037  	a.configLock.Lock()
  1038  	defer a.configLock.Unlock()
  1039  
  1040  	return a.config
  1041  }
  1042  
  1043  // setupConsul creates the Consul client and starts its main Run loop.
  1044  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
  1045  	apiConf, err := consulConfig.ApiConfig()
  1046  	if err != nil {
  1047  		return err
  1048  	}
  1049  	client, err := api.NewClient(apiConf)
  1050  	if err != nil {
  1051  		return err
  1052  	}
  1053  
  1054  	// Create Consul Catalog client for service discovery.
  1055  	a.consulCatalog = client.Catalog()
  1056  
  1057  	// Create Consul ACL client for managing tokens.
  1058  	a.consulACLs = client.ACL()
  1059  
  1060  	// Create Consul Service client for service advertisement and checks.
  1061  	isClient := false
  1062  	if a.config.Client != nil && a.config.Client.Enabled {
  1063  		isClient = true
  1064  	}
  1065  	a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient)
  1066  
  1067  	// Run the Consul service client's sync'ing main loop
  1068  	go a.consulService.Run()
  1069  	return nil
  1070  }