github.com/smintz/nomad@v0.8.3/command/agent/agent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"log"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/consul/lib"
    20  	uuidparse "github.com/hashicorp/go-uuid"
    21  	"github.com/hashicorp/nomad/client"
    22  	clientconfig "github.com/hashicorp/nomad/client/config"
    23  	"github.com/hashicorp/nomad/command/agent/consul"
    24  	"github.com/hashicorp/nomad/helper/uuid"
    25  	"github.com/hashicorp/nomad/nomad"
    26  	"github.com/hashicorp/nomad/nomad/structs"
    27  	"github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/raft"
    29  )
    30  
    31  const (
    32  	agentHttpCheckInterval  = 10 * time.Second
    33  	agentHttpCheckTimeout   = 5 * time.Second
    34  	serverRpcCheckInterval  = 10 * time.Second
    35  	serverRpcCheckTimeout   = 3 * time.Second
    36  	serverSerfCheckInterval = 10 * time.Second
    37  	serverSerfCheckTimeout  = 3 * time.Second
    38  
    39  	// roles used in identifying Consul entries for Nomad agents
    40  	consulRoleServer = "server"
    41  	consulRoleClient = "client"
    42  )
    43  
    44  // Agent is a long running daemon that is used to run both
    45  // clients and servers. Servers are responsible for managing
    46  // state and making scheduling decisions. Clients can be
    47  // scheduled to, and are responsible for interfacing with
    48  // servers to run allocations.
    49  type Agent struct {
    50  	config     *Config
    51  	configLock sync.Mutex
    52  
    53  	logger    *log.Logger
    54  	logOutput io.Writer
    55  
    56  	// consulService is Nomad's custom Consul client for managing services
    57  	// and checks.
    58  	consulService *consul.ServiceClient
    59  
    60  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    61  	consulCatalog consul.CatalogAPI
    62  
    63  	client *client.Client
    64  
    65  	server *nomad.Server
    66  
    67  	shutdown     bool
    68  	shutdownCh   chan struct{}
    69  	shutdownLock sync.Mutex
    70  
    71  	InmemSink *metrics.InmemSink
    72  }
    73  
    74  // NewAgent is used to create a new agent with the given configuration
    75  func NewAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
    76  	a := &Agent{
    77  		config:     config,
    78  		logger:     log.New(logOutput, "", log.LstdFlags|log.Lmicroseconds),
    79  		logOutput:  logOutput,
    80  		shutdownCh: make(chan struct{}),
    81  		InmemSink:  inmem,
    82  	}
    83  
    84  	if err := a.setupConsul(config.Consul); err != nil {
    85  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
    86  	}
    87  	if err := a.setupServer(); err != nil {
    88  		return nil, err
    89  	}
    90  	if err := a.setupClient(); err != nil {
    91  		return nil, err
    92  	}
    93  	if a.client == nil && a.server == nil {
    94  		return nil, fmt.Errorf("must have at least client or server mode enabled")
    95  	}
    96  
    97  	return a, nil
    98  }
    99  
   100  // convertServerConfig takes an agent config and log output and returns a Nomad
   101  // Config.
   102  func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Config, error) {
   103  	conf := agentConfig.NomadConfig
   104  	if conf == nil {
   105  		conf = nomad.DefaultConfig()
   106  	}
   107  	conf.LogOutput = logOutput
   108  	conf.DevMode = agentConfig.DevMode
   109  	conf.Build = agentConfig.Version.VersionNumber()
   110  	if agentConfig.Region != "" {
   111  		conf.Region = agentConfig.Region
   112  	}
   113  
   114  	// Set the Authoritative Region if set, otherwise default to
   115  	// the same as the local region.
   116  	if agentConfig.Server.AuthoritativeRegion != "" {
   117  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   118  	} else if agentConfig.Region != "" {
   119  		conf.AuthoritativeRegion = agentConfig.Region
   120  	}
   121  
   122  	if agentConfig.Datacenter != "" {
   123  		conf.Datacenter = agentConfig.Datacenter
   124  	}
   125  	if agentConfig.NodeName != "" {
   126  		conf.NodeName = agentConfig.NodeName
   127  	}
   128  	if agentConfig.Server.BootstrapExpect > 0 {
   129  		if agentConfig.Server.BootstrapExpect == 1 {
   130  			conf.Bootstrap = true
   131  		} else {
   132  			atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect))
   133  		}
   134  	}
   135  	if agentConfig.DataDir != "" {
   136  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   137  	}
   138  	if agentConfig.Server.DataDir != "" {
   139  		conf.DataDir = agentConfig.Server.DataDir
   140  	}
   141  	if agentConfig.Server.ProtocolVersion != 0 {
   142  		conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion)
   143  	}
   144  	if agentConfig.Server.RaftProtocol != 0 {
   145  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   146  	}
   147  	if agentConfig.Server.NumSchedulers != 0 {
   148  		conf.NumSchedulers = agentConfig.Server.NumSchedulers
   149  	}
   150  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   151  		// Convert to a set and require the core scheduler
   152  		set := make(map[string]struct{}, 4)
   153  		set[structs.JobTypeCore] = struct{}{}
   154  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   155  			set[sched] = struct{}{}
   156  		}
   157  
   158  		schedulers := make([]string, 0, len(set))
   159  		for k := range set {
   160  			schedulers = append(schedulers, k)
   161  		}
   162  
   163  		conf.EnabledSchedulers = schedulers
   164  
   165  	}
   166  	if agentConfig.ACL.Enabled {
   167  		conf.ACLEnabled = true
   168  	}
   169  	if agentConfig.ACL.ReplicationToken != "" {
   170  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   171  	}
   172  	if agentConfig.Sentinel != nil {
   173  		conf.SentinelConfig = agentConfig.Sentinel
   174  	}
   175  	if agentConfig.Server.NonVotingServer {
   176  		conf.NonVoter = true
   177  	}
   178  	if agentConfig.Server.RedundancyZone != "" {
   179  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   180  	}
   181  	if agentConfig.Server.UpgradeVersion != "" {
   182  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   183  	}
   184  	if agentConfig.Autopilot != nil {
   185  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   186  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   187  		}
   188  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   189  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   190  		}
   191  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   192  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   193  		}
   194  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   195  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   196  		}
   197  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   198  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   199  		}
   200  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   201  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   202  		}
   203  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   204  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   205  		}
   206  	}
   207  
   208  	// Set up the bind addresses
   209  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   210  	if err != nil {
   211  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   212  	}
   213  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   214  	if err != nil {
   215  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   216  	}
   217  	conf.RPCAddr.Port = rpcAddr.Port
   218  	conf.RPCAddr.IP = rpcAddr.IP
   219  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   220  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   221  
   222  	// Set up the advertise addresses
   223  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   224  	if err != nil {
   225  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   226  	}
   227  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   228  	if err != nil {
   229  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   230  	}
   231  
   232  	// Server address is the serf advertise address and rpc port. This is the
   233  	// address that all servers should be able to communicate over RPC with.
   234  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   235  	if err != nil {
   236  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   237  	}
   238  
   239  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   240  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   241  	conf.ClientRPCAdvertise = rpcAddr
   242  	conf.ServerRPCAdvertise = serverAddr
   243  
   244  	// Set up gc threshold and heartbeat grace period
   245  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   246  		dur, err := time.ParseDuration(gcThreshold)
   247  		if err != nil {
   248  			return nil, err
   249  		}
   250  		conf.NodeGCThreshold = dur
   251  	}
   252  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   253  		dur, err := time.ParseDuration(gcThreshold)
   254  		if err != nil {
   255  			return nil, err
   256  		}
   257  		conf.JobGCThreshold = dur
   258  	}
   259  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   260  		dur, err := time.ParseDuration(gcThreshold)
   261  		if err != nil {
   262  			return nil, err
   263  		}
   264  		conf.EvalGCThreshold = dur
   265  	}
   266  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   267  		dur, err := time.ParseDuration(gcThreshold)
   268  		if err != nil {
   269  			return nil, err
   270  		}
   271  		conf.DeploymentGCThreshold = dur
   272  	}
   273  
   274  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   275  		conf.HeartbeatGrace = heartbeatGrace
   276  	}
   277  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   278  		conf.MinHeartbeatTTL = min
   279  	}
   280  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   281  		conf.MaxHeartbeatsPerSecond = maxHPS
   282  	}
   283  
   284  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   285  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   286  	}
   287  
   288  	// Add the Consul and Vault configs
   289  	conf.ConsulConfig = agentConfig.Consul
   290  	conf.VaultConfig = agentConfig.Vault
   291  
   292  	// Set the TLS config
   293  	conf.TLSConfig = agentConfig.TLSConfig
   294  
   295  	// Setup telemetry related config
   296  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   297  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   298  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   299  
   300  	return conf, nil
   301  }
   302  
   303  // serverConfig is used to generate a new server configuration struct
   304  // for initializing a nomad server.
   305  func (a *Agent) serverConfig() (*nomad.Config, error) {
   306  	return convertServerConfig(a.config, a.logOutput)
   307  }
   308  
   309  // clientConfig is used to generate a new client configuration struct
   310  // for initializing a Nomad client.
   311  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   312  	// Setup the configuration
   313  	conf := a.config.ClientConfig
   314  	if conf == nil {
   315  		conf = clientconfig.DefaultConfig()
   316  	}
   317  	if a.server != nil {
   318  		conf.RPCHandler = a.server
   319  	}
   320  	conf.LogOutput = a.logOutput
   321  	conf.LogLevel = a.config.LogLevel
   322  	conf.DevMode = a.config.DevMode
   323  	if a.config.Region != "" {
   324  		conf.Region = a.config.Region
   325  	}
   326  	if a.config.DataDir != "" {
   327  		conf.StateDir = filepath.Join(a.config.DataDir, "client")
   328  		conf.AllocDir = filepath.Join(a.config.DataDir, "alloc")
   329  	}
   330  	if a.config.Client.StateDir != "" {
   331  		conf.StateDir = a.config.Client.StateDir
   332  	}
   333  	if a.config.Client.AllocDir != "" {
   334  		conf.AllocDir = a.config.Client.AllocDir
   335  	}
   336  	conf.Servers = a.config.Client.Servers
   337  	if a.config.Client.NetworkInterface != "" {
   338  		conf.NetworkInterface = a.config.Client.NetworkInterface
   339  	}
   340  	conf.ChrootEnv = a.config.Client.ChrootEnv
   341  	conf.Options = a.config.Client.Options
   342  	// Logging deprecation messages about consul related configuration in client
   343  	// options
   344  	var invalidConsulKeys []string
   345  	for key := range conf.Options {
   346  		if strings.HasPrefix(key, "consul") {
   347  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   348  		}
   349  	}
   350  	if len(invalidConsulKeys) > 0 {
   351  		a.logger.Printf("[WARN] agent: Invalid keys: %v", strings.Join(invalidConsulKeys, ","))
   352  		a.logger.Printf(`Nomad client ignores consul related configuration in client options.
   353  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   354  		to configure Nomad to work with Consul.`)
   355  	}
   356  
   357  	if a.config.Client.NetworkSpeed != 0 {
   358  		conf.NetworkSpeed = a.config.Client.NetworkSpeed
   359  	}
   360  	if a.config.Client.CpuCompute != 0 {
   361  		conf.CpuCompute = a.config.Client.CpuCompute
   362  	}
   363  	if a.config.Client.MemoryMB != 0 {
   364  		conf.MemoryMB = a.config.Client.MemoryMB
   365  	}
   366  	if a.config.Client.MaxKillTimeout != "" {
   367  		dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout)
   368  		if err != nil {
   369  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   370  		}
   371  		conf.MaxKillTimeout = dur
   372  	}
   373  	conf.ClientMaxPort = uint(a.config.Client.ClientMaxPort)
   374  	conf.ClientMinPort = uint(a.config.Client.ClientMinPort)
   375  
   376  	// Setup the node
   377  	conf.Node = new(structs.Node)
   378  	conf.Node.Datacenter = a.config.Datacenter
   379  	conf.Node.Name = a.config.NodeName
   380  	conf.Node.Meta = a.config.Client.Meta
   381  	conf.Node.NodeClass = a.config.Client.NodeClass
   382  
   383  	// Set up the HTTP advertise address
   384  	conf.Node.HTTPAddr = a.config.AdvertiseAddrs.HTTP
   385  
   386  	// Reserve resources on the node.
   387  	r := conf.Node.Reserved
   388  	if r == nil {
   389  		r = new(structs.Resources)
   390  		conf.Node.Reserved = r
   391  	}
   392  	r.CPU = a.config.Client.Reserved.CPU
   393  	r.MemoryMB = a.config.Client.Reserved.MemoryMB
   394  	r.DiskMB = a.config.Client.Reserved.DiskMB
   395  	r.IOPS = a.config.Client.Reserved.IOPS
   396  	conf.GloballyReservedPorts = a.config.Client.Reserved.ParsedReservedPorts
   397  
   398  	conf.Version = a.config.Version
   399  
   400  	if *a.config.Consul.AutoAdvertise && a.config.Consul.ClientServiceName == "" {
   401  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   402  	}
   403  
   404  	conf.ConsulConfig = a.config.Consul
   405  	conf.VaultConfig = a.config.Vault
   406  
   407  	// Set up Telemetry configuration
   408  	conf.StatsCollectionInterval = a.config.Telemetry.collectionInterval
   409  	conf.PublishNodeMetrics = a.config.Telemetry.PublishNodeMetrics
   410  	conf.PublishAllocationMetrics = a.config.Telemetry.PublishAllocationMetrics
   411  	conf.DisableTaggedMetrics = a.config.Telemetry.DisableTaggedMetrics
   412  	conf.BackwardsCompatibleMetrics = a.config.Telemetry.BackwardsCompatibleMetrics
   413  
   414  	// Set the TLS related configs
   415  	conf.TLSConfig = a.config.TLSConfig
   416  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   417  
   418  	// Set the GC related configs
   419  	conf.GCInterval = a.config.Client.GCInterval
   420  	conf.GCParallelDestroys = a.config.Client.GCParallelDestroys
   421  	conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
   422  	conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold
   423  	conf.GCMaxAllocs = a.config.Client.GCMaxAllocs
   424  	if a.config.Client.NoHostUUID != nil {
   425  		conf.NoHostUUID = *a.config.Client.NoHostUUID
   426  	} else {
   427  		// Default no_host_uuid to true
   428  		conf.NoHostUUID = true
   429  	}
   430  
   431  	// Setup the ACLs
   432  	conf.ACLEnabled = a.config.ACL.Enabled
   433  	conf.ACLTokenTTL = a.config.ACL.TokenTTL
   434  	conf.ACLPolicyTTL = a.config.ACL.PolicyTTL
   435  
   436  	return conf, nil
   437  }
   438  
   439  // setupServer is used to setup the server if enabled
   440  func (a *Agent) setupServer() error {
   441  	if !a.config.Server.Enabled {
   442  		return nil
   443  	}
   444  
   445  	// Setup the configuration
   446  	conf, err := a.serverConfig()
   447  	if err != nil {
   448  		return fmt.Errorf("server config setup failed: %s", err)
   449  	}
   450  
   451  	// Generate a node ID and persist it if it is the first instance, otherwise
   452  	// read the persisted node ID.
   453  	if err := a.setupNodeID(conf); err != nil {
   454  		return fmt.Errorf("setting up server node ID failed: %s", err)
   455  	}
   456  
   457  	// Sets up the keyring for gossip encryption
   458  	if err := a.setupKeyrings(conf); err != nil {
   459  		return fmt.Errorf("failed to configure keyring: %v", err)
   460  	}
   461  
   462  	// Create the server
   463  	server, err := nomad.NewServer(conf, a.consulCatalog, a.logger)
   464  	if err != nil {
   465  		return fmt.Errorf("server setup failed: %v", err)
   466  	}
   467  	a.server = server
   468  
   469  	// Consul check addresses default to bind but can be toggled to use advertise
   470  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   471  	serfCheckAddr := a.config.normalizedAddrs.Serf
   472  	if *a.config.Consul.ChecksUseAdvertise {
   473  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   474  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   475  	}
   476  
   477  	// Create the Nomad Server services for Consul
   478  	if *a.config.Consul.AutoAdvertise {
   479  		httpServ := &structs.Service{
   480  			Name:      a.config.Consul.ServerServiceName,
   481  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   482  			Tags:      []string{consul.ServiceTagHTTP},
   483  		}
   484  		const isServer = true
   485  		if check := a.agentHTTPCheck(isServer); check != nil {
   486  			httpServ.Checks = []*structs.ServiceCheck{check}
   487  		}
   488  		rpcServ := &structs.Service{
   489  			Name:      a.config.Consul.ServerServiceName,
   490  			PortLabel: a.config.AdvertiseAddrs.RPC,
   491  			Tags:      []string{consul.ServiceTagRPC},
   492  			Checks: []*structs.ServiceCheck{
   493  				{
   494  					Name:      a.config.Consul.ServerRPCCheckName,
   495  					Type:      "tcp",
   496  					Interval:  serverRpcCheckInterval,
   497  					Timeout:   serverRpcCheckTimeout,
   498  					PortLabel: rpcCheckAddr,
   499  				},
   500  			},
   501  		}
   502  		serfServ := &structs.Service{
   503  			Name:      a.config.Consul.ServerServiceName,
   504  			PortLabel: a.config.AdvertiseAddrs.Serf,
   505  			Tags:      []string{consul.ServiceTagSerf},
   506  			Checks: []*structs.ServiceCheck{
   507  				{
   508  					Name:      a.config.Consul.ServerSerfCheckName,
   509  					Type:      "tcp",
   510  					Interval:  serverSerfCheckInterval,
   511  					Timeout:   serverSerfCheckTimeout,
   512  					PortLabel: serfCheckAddr,
   513  				},
   514  			},
   515  		}
   516  
   517  		// Add the http port check if TLS isn't enabled
   518  		consulServices := []*structs.Service{
   519  			rpcServ,
   520  			serfServ,
   521  			httpServ,
   522  		}
   523  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   524  			return err
   525  		}
   526  	}
   527  
   528  	return nil
   529  }
   530  
   531  // setupNodeID will pull the persisted node ID, if any, or create a random one
   532  // and persist it.
   533  func (a *Agent) setupNodeID(config *nomad.Config) error {
   534  	// For dev mode we have no filesystem access so just make a node ID.
   535  	if a.config.DevMode {
   536  		config.NodeID = uuid.Generate()
   537  		return nil
   538  	}
   539  
   540  	// Load saved state, if any. Since a user could edit this, we also
   541  	// validate it. Saved state overwrites any configured node id
   542  	fileID := filepath.Join(config.DataDir, "node-id")
   543  	if _, err := os.Stat(fileID); err == nil {
   544  		rawID, err := ioutil.ReadFile(fileID)
   545  		if err != nil {
   546  			return err
   547  		}
   548  
   549  		nodeID := strings.TrimSpace(string(rawID))
   550  		nodeID = strings.ToLower(nodeID)
   551  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   552  			return err
   553  		}
   554  		config.NodeID = nodeID
   555  		return nil
   556  	}
   557  
   558  	// If they've configured a node ID manually then just use that, as
   559  	// long as it's valid.
   560  	if config.NodeID != "" {
   561  		config.NodeID = strings.ToLower(config.NodeID)
   562  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   563  			return err
   564  		}
   565  		// Persist this configured nodeID to our data directory
   566  		if err := lib.EnsurePath(fileID, false); err != nil {
   567  			return err
   568  		}
   569  		if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   570  			return err
   571  		}
   572  		return nil
   573  	}
   574  
   575  	// If we still don't have a valid node ID, make one.
   576  	if config.NodeID == "" {
   577  		id := uuid.Generate()
   578  		if err := lib.EnsurePath(fileID, false); err != nil {
   579  			return err
   580  		}
   581  		if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil {
   582  			return err
   583  		}
   584  
   585  		config.NodeID = id
   586  	}
   587  	return nil
   588  }
   589  
   590  // setupKeyrings is used to initialize and load keyrings during agent startup
   591  func (a *Agent) setupKeyrings(config *nomad.Config) error {
   592  	file := filepath.Join(a.config.DataDir, serfKeyring)
   593  
   594  	if a.config.Server.EncryptKey == "" {
   595  		goto LOAD
   596  	}
   597  	if _, err := os.Stat(file); err != nil {
   598  		if err := initKeyring(file, a.config.Server.EncryptKey); err != nil {
   599  			return err
   600  		}
   601  	}
   602  
   603  LOAD:
   604  	if _, err := os.Stat(file); err == nil {
   605  		config.SerfConfig.KeyringFile = file
   606  	}
   607  	if err := loadKeyringFile(config.SerfConfig); err != nil {
   608  		return err
   609  	}
   610  	// Success!
   611  	return nil
   612  }
   613  
   614  // setupClient is used to setup the client if enabled
   615  func (a *Agent) setupClient() error {
   616  	if !a.config.Client.Enabled {
   617  		return nil
   618  	}
   619  
   620  	// Setup the configuration
   621  	conf, err := a.clientConfig()
   622  	if err != nil {
   623  		return fmt.Errorf("client setup failed: %v", err)
   624  	}
   625  
   626  	// Reserve some ports for the plugins if we are on Windows
   627  	if runtime.GOOS == "windows" {
   628  		if err := a.reservePortsForClient(conf); err != nil {
   629  			return err
   630  		}
   631  	}
   632  
   633  	client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger)
   634  	if err != nil {
   635  		return fmt.Errorf("client setup failed: %v", err)
   636  	}
   637  	a.client = client
   638  
   639  	// Create the Nomad Client  services for Consul
   640  	if *a.config.Consul.AutoAdvertise {
   641  		httpServ := &structs.Service{
   642  			Name:      a.config.Consul.ClientServiceName,
   643  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   644  			Tags:      []string{consul.ServiceTagHTTP},
   645  		}
   646  		const isServer = false
   647  		if check := a.agentHTTPCheck(isServer); check != nil {
   648  			httpServ.Checks = []*structs.ServiceCheck{check}
   649  		}
   650  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
   651  			return err
   652  		}
   653  	}
   654  
   655  	return nil
   656  }
   657  
   658  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
   659  // If no HTTP health check can be supported nil is returned.
   660  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
   661  	// Resolve the http check address
   662  	httpCheckAddr := a.config.normalizedAddrs.HTTP
   663  	if *a.config.Consul.ChecksUseAdvertise {
   664  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
   665  	}
   666  	check := structs.ServiceCheck{
   667  		Name:      a.config.Consul.ClientHTTPCheckName,
   668  		Type:      "http",
   669  		Path:      "/v1/agent/health?type=client",
   670  		Protocol:  "http",
   671  		Interval:  agentHttpCheckInterval,
   672  		Timeout:   agentHttpCheckTimeout,
   673  		PortLabel: httpCheckAddr,
   674  	}
   675  	// Switch to endpoint that doesn't require a leader for servers
   676  	if server {
   677  		check.Name = a.config.Consul.ServerHTTPCheckName
   678  		check.Path = "/v1/agent/health?type=server"
   679  	}
   680  	if !a.config.TLSConfig.EnableHTTP {
   681  		// No HTTPS, return a plain http check
   682  		return &check
   683  	}
   684  	if a.config.TLSConfig.VerifyHTTPSClient {
   685  		a.logger.Printf("[WARN] agent: not registering Nomad HTTPS Health Check because verify_https_client enabled")
   686  		return nil
   687  	}
   688  
   689  	// HTTPS enabled; skip verification
   690  	check.Protocol = "https"
   691  	check.TLSSkipVerify = true
   692  	return &check
   693  }
   694  
   695  // reservePortsForClient reserves a range of ports for the client to use when
   696  // it creates various plugins for log collection, executors, drivers, etc
   697  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
   698  	// finding the device name for loopback
   699  	deviceName, addr, mask, err := a.findLoopbackDevice()
   700  	if err != nil {
   701  		return fmt.Errorf("error finding the device name for loopback: %v", err)
   702  	}
   703  
   704  	// seeing if the user has already reserved some resources on this device
   705  	var nr *structs.NetworkResource
   706  	if conf.Node.Reserved == nil {
   707  		conf.Node.Reserved = &structs.Resources{}
   708  	}
   709  	for _, n := range conf.Node.Reserved.Networks {
   710  		if n.Device == deviceName {
   711  			nr = n
   712  		}
   713  	}
   714  	// If the user hasn't already created the device, we create it
   715  	if nr == nil {
   716  		nr = &structs.NetworkResource{
   717  			Device:        deviceName,
   718  			IP:            addr,
   719  			CIDR:          mask,
   720  			ReservedPorts: make([]structs.Port, 0),
   721  		}
   722  	}
   723  	// appending the port ranges we want to use for the client to the list of
   724  	// reserved ports for this device
   725  	for i := conf.ClientMinPort; i <= conf.ClientMaxPort; i++ {
   726  		nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: int(i)})
   727  	}
   728  	conf.Node.Reserved.Networks = append(conf.Node.Reserved.Networks, nr)
   729  	return nil
   730  }
   731  
   732  // findLoopbackDevice iterates through all the interfaces on a machine and
   733  // returns the ip addr, mask of the loopback device
   734  func (a *Agent) findLoopbackDevice() (string, string, string, error) {
   735  	var ifcs []net.Interface
   736  	var err error
   737  	ifcs, err = net.Interfaces()
   738  	if err != nil {
   739  		return "", "", "", err
   740  	}
   741  	for _, ifc := range ifcs {
   742  		addrs, err := ifc.Addrs()
   743  		if err != nil {
   744  			return "", "", "", err
   745  		}
   746  		for _, addr := range addrs {
   747  			var ip net.IP
   748  			switch v := addr.(type) {
   749  			case *net.IPNet:
   750  				ip = v.IP
   751  			case *net.IPAddr:
   752  				ip = v.IP
   753  			}
   754  			if ip.IsLoopback() {
   755  				if ip.To4() == nil {
   756  					continue
   757  				}
   758  				return ifc.Name, ip.String(), addr.String(), nil
   759  			}
   760  		}
   761  	}
   762  
   763  	return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found")
   764  }
   765  
   766  // Leave is used gracefully exit. Clients will inform servers
   767  // of their departure so that allocations can be rescheduled.
   768  func (a *Agent) Leave() error {
   769  	if a.client != nil {
   770  		if err := a.client.Leave(); err != nil {
   771  			a.logger.Printf("[ERR] agent: client leave failed: %v", err)
   772  		}
   773  	}
   774  	if a.server != nil {
   775  		if err := a.server.Leave(); err != nil {
   776  			a.logger.Printf("[ERR] agent: server leave failed: %v", err)
   777  		}
   778  	}
   779  	return nil
   780  }
   781  
   782  // Shutdown is used to terminate the agent.
   783  func (a *Agent) Shutdown() error {
   784  	a.shutdownLock.Lock()
   785  	defer a.shutdownLock.Unlock()
   786  
   787  	if a.shutdown {
   788  		return nil
   789  	}
   790  
   791  	a.logger.Println("[INFO] agent: requesting shutdown")
   792  	if a.client != nil {
   793  		if err := a.client.Shutdown(); err != nil {
   794  			a.logger.Printf("[ERR] agent: client shutdown failed: %v", err)
   795  		}
   796  	}
   797  	if a.server != nil {
   798  		if err := a.server.Shutdown(); err != nil {
   799  			a.logger.Printf("[ERR] agent: server shutdown failed: %v", err)
   800  		}
   801  	}
   802  
   803  	if err := a.consulService.Shutdown(); err != nil {
   804  		a.logger.Printf("[ERR] agent: shutting down Consul client failed: %v", err)
   805  	}
   806  
   807  	a.logger.Println("[INFO] agent: shutdown complete")
   808  	a.shutdown = true
   809  	close(a.shutdownCh)
   810  	return nil
   811  }
   812  
   813  // RPC is used to make an RPC call to the Nomad servers
   814  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
   815  	if a.server != nil {
   816  		return a.server.RPC(method, args, reply)
   817  	}
   818  	return a.client.RPC(method, args, reply)
   819  }
   820  
   821  // Client returns the configured client or nil
   822  func (a *Agent) Client() *client.Client {
   823  	return a.client
   824  }
   825  
   826  // Server returns the configured server or nil
   827  func (a *Agent) Server() *nomad.Server {
   828  	return a.server
   829  }
   830  
   831  // Stats is used to return statistics for debugging and insight
   832  // for various sub-systems
   833  func (a *Agent) Stats() map[string]map[string]string {
   834  	stats := make(map[string]map[string]string)
   835  	if a.server != nil {
   836  		subStat := a.server.Stats()
   837  		for k, v := range subStat {
   838  			stats[k] = v
   839  		}
   840  	}
   841  	if a.client != nil {
   842  		subStat := a.client.Stats()
   843  		for k, v := range subStat {
   844  			stats[k] = v
   845  		}
   846  	}
   847  	return stats
   848  }
   849  
   850  // ShouldReload determines if we should reload the configuration and agent
   851  // connections. If the TLS Configuration has not changed, we shouldn't reload.
   852  func (a *Agent) ShouldReload(newConfig *Config) (agent, http, rpc bool) {
   853  	a.configLock.Lock()
   854  	defer a.configLock.Unlock()
   855  
   856  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
   857  	if err != nil {
   858  		a.logger.Printf("[INFO] agent: error when parsing TLS certificate %v", err)
   859  		return false, false, false
   860  	} else if !isEqual {
   861  		return true, true, true
   862  	}
   863  
   864  	// Allow the ability to only reload HTTP connections
   865  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
   866  		http = true
   867  		agent = true
   868  	}
   869  
   870  	// Allow the ability to only reload HTTP connections
   871  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
   872  		rpc = true
   873  		agent = true
   874  	}
   875  
   876  	return agent, http, rpc
   877  }
   878  
   879  // Reload handles configuration changes for the agent. Provides a method that
   880  // is easier to unit test, as this action is invoked via SIGHUP.
   881  func (a *Agent) Reload(newConfig *Config) error {
   882  	a.configLock.Lock()
   883  	defer a.configLock.Unlock()
   884  
   885  	if newConfig == nil || newConfig.TLSConfig == nil {
   886  		return fmt.Errorf("cannot reload agent with nil configuration")
   887  	}
   888  
   889  	// This is just a TLS configuration reload, we don't need to refresh
   890  	// existing network connections
   891  	if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
   892  
   893  		// Reload the certificates on the keyloader and on success store the
   894  		// updated TLS config. It is important to reuse the same keyloader
   895  		// as this allows us to dynamically reload configurations not only
   896  		// on the Agent but on the Server and Client too (they are
   897  		// referencing the same keyloader).
   898  		keyloader := a.config.TLSConfig.GetKeyLoader()
   899  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
   900  		if err != nil {
   901  			return err
   902  		}
   903  		a.config.TLSConfig = newConfig.TLSConfig
   904  		a.config.TLSConfig.KeyLoader = keyloader
   905  		return nil
   906  	}
   907  
   908  	// Completely reload the agent's TLS configuration (moving from non-TLS to
   909  	// TLS, or vice versa)
   910  	// This does not handle errors in loading the new TLS configuration
   911  	a.config.TLSConfig = newConfig.TLSConfig.Copy()
   912  
   913  	if newConfig.TLSConfig.IsEmpty() {
   914  		a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext")
   915  	} else {
   916  		a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS")
   917  	}
   918  
   919  	return nil
   920  }
   921  
   922  // GetConfig creates a locked reference to the agent's config
   923  func (a *Agent) GetConfig() *Config {
   924  	a.configLock.Lock()
   925  	defer a.configLock.Unlock()
   926  
   927  	return a.config
   928  }
   929  
   930  // setupConsul creates the Consul client and starts its main Run loop.
   931  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
   932  	apiConf, err := consulConfig.ApiConfig()
   933  	if err != nil {
   934  		return err
   935  	}
   936  	client, err := api.NewClient(apiConf)
   937  	if err != nil {
   938  		return err
   939  	}
   940  
   941  	// Determine version for TLSSkipVerify
   942  
   943  	// Create Consul Catalog client for service discovery.
   944  	a.consulCatalog = client.Catalog()
   945  
   946  	// Create Consul Service client for service advertisement and checks.
   947  	a.consulService = consul.NewServiceClient(client.Agent(), a.logger)
   948  
   949  	// Run the Consul service client's sync'ing main loop
   950  	go a.consulService.Run()
   951  	return nil
   952  }