github.com/quite/nomad@v0.8.6/command/agent/agent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"log"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/consul/lib"
    20  	uuidparse "github.com/hashicorp/go-uuid"
    21  	"github.com/hashicorp/nomad/client"
    22  	clientconfig "github.com/hashicorp/nomad/client/config"
    23  	"github.com/hashicorp/nomad/command/agent/consul"
    24  	"github.com/hashicorp/nomad/helper/uuid"
    25  	"github.com/hashicorp/nomad/nomad"
    26  	"github.com/hashicorp/nomad/nomad/structs"
    27  	"github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/raft"
    29  )
    30  
    31  const (
    32  	agentHttpCheckInterval  = 10 * time.Second
    33  	agentHttpCheckTimeout   = 5 * time.Second
    34  	serverRpcCheckInterval  = 10 * time.Second
    35  	serverRpcCheckTimeout   = 3 * time.Second
    36  	serverSerfCheckInterval = 10 * time.Second
    37  	serverSerfCheckTimeout  = 3 * time.Second
    38  
    39  	// roles used in identifying Consul entries for Nomad agents
    40  	consulRoleServer = "server"
    41  	consulRoleClient = "client"
    42  )
    43  
    44  // Agent is a long running daemon that is used to run both
    45  // clients and servers. Servers are responsible for managing
    46  // state and making scheduling decisions. Clients can be
    47  // scheduled to, and are responsible for interfacing with
    48  // servers to run allocations.
    49  type Agent struct {
    50  	config     *Config
    51  	configLock sync.Mutex
    52  
    53  	logger    *log.Logger
    54  	logOutput io.Writer
    55  
    56  	// consulService is Nomad's custom Consul client for managing services
    57  	// and checks.
    58  	consulService *consul.ServiceClient
    59  
    60  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    61  	consulCatalog consul.CatalogAPI
    62  
    63  	client *client.Client
    64  
    65  	server *nomad.Server
    66  
    67  	shutdown     bool
    68  	shutdownCh   chan struct{}
    69  	shutdownLock sync.Mutex
    70  
    71  	InmemSink *metrics.InmemSink
    72  }
    73  
    74  // NewAgent is used to create a new agent with the given configuration
    75  func NewAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
    76  	a := &Agent{
    77  		config:     config,
    78  		logger:     log.New(logOutput, "", log.LstdFlags|log.Lmicroseconds),
    79  		logOutput:  logOutput,
    80  		shutdownCh: make(chan struct{}),
    81  		InmemSink:  inmem,
    82  	}
    83  
    84  	// Global logger should match internal logger as much as possible
    85  	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
    86  
    87  	if err := a.setupConsul(config.Consul); err != nil {
    88  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
    89  	}
    90  	if err := a.setupServer(); err != nil {
    91  		return nil, err
    92  	}
    93  	if err := a.setupClient(); err != nil {
    94  		return nil, err
    95  	}
    96  	if a.client == nil && a.server == nil {
    97  		return nil, fmt.Errorf("must have at least client or server mode enabled")
    98  	}
    99  
   100  	return a, nil
   101  }
   102  
   103  // convertServerConfig takes an agent config and log output and returns a Nomad
   104  // Config.
   105  func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Config, error) {
   106  	conf := agentConfig.NomadConfig
   107  	if conf == nil {
   108  		conf = nomad.DefaultConfig()
   109  	}
   110  	conf.LogOutput = logOutput
   111  	conf.DevMode = agentConfig.DevMode
   112  	conf.Build = agentConfig.Version.VersionNumber()
   113  	if agentConfig.Region != "" {
   114  		conf.Region = agentConfig.Region
   115  	}
   116  
   117  	// Set the Authoritative Region if set, otherwise default to
   118  	// the same as the local region.
   119  	if agentConfig.Server.AuthoritativeRegion != "" {
   120  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   121  	} else if agentConfig.Region != "" {
   122  		conf.AuthoritativeRegion = agentConfig.Region
   123  	}
   124  
   125  	if agentConfig.Datacenter != "" {
   126  		conf.Datacenter = agentConfig.Datacenter
   127  	}
   128  	if agentConfig.NodeName != "" {
   129  		conf.NodeName = agentConfig.NodeName
   130  	}
   131  	if agentConfig.Server.BootstrapExpect > 0 {
   132  		if agentConfig.Server.BootstrapExpect == 1 {
   133  			conf.Bootstrap = true
   134  		} else {
   135  			atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect))
   136  		}
   137  	}
   138  	if agentConfig.DataDir != "" {
   139  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   140  	}
   141  	if agentConfig.Server.DataDir != "" {
   142  		conf.DataDir = agentConfig.Server.DataDir
   143  	}
   144  	if agentConfig.Server.ProtocolVersion != 0 {
   145  		conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion)
   146  	}
   147  	if agentConfig.Server.RaftProtocol != 0 {
   148  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   149  	}
   150  	if agentConfig.Server.NumSchedulers != nil {
   151  		conf.NumSchedulers = *agentConfig.Server.NumSchedulers
   152  	}
   153  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   154  		// Convert to a set and require the core scheduler
   155  		set := make(map[string]struct{}, 4)
   156  		set[structs.JobTypeCore] = struct{}{}
   157  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   158  			set[sched] = struct{}{}
   159  		}
   160  
   161  		schedulers := make([]string, 0, len(set))
   162  		for k := range set {
   163  			schedulers = append(schedulers, k)
   164  		}
   165  
   166  		conf.EnabledSchedulers = schedulers
   167  
   168  	}
   169  	if agentConfig.ACL.Enabled {
   170  		conf.ACLEnabled = true
   171  	}
   172  	if agentConfig.ACL.ReplicationToken != "" {
   173  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   174  	}
   175  	if agentConfig.Sentinel != nil {
   176  		conf.SentinelConfig = agentConfig.Sentinel
   177  	}
   178  	if agentConfig.Server.NonVotingServer {
   179  		conf.NonVoter = true
   180  	}
   181  	if agentConfig.Server.RedundancyZone != "" {
   182  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   183  	}
   184  	if agentConfig.Server.UpgradeVersion != "" {
   185  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   186  	}
   187  	if agentConfig.Autopilot != nil {
   188  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   189  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   190  		}
   191  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   192  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   193  		}
   194  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   195  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   196  		}
   197  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   198  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   199  		}
   200  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   201  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   202  		}
   203  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   204  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   205  		}
   206  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   207  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   208  		}
   209  	}
   210  
   211  	// Set up the bind addresses
   212  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   213  	if err != nil {
   214  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   215  	}
   216  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   217  	if err != nil {
   218  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   219  	}
   220  	conf.RPCAddr.Port = rpcAddr.Port
   221  	conf.RPCAddr.IP = rpcAddr.IP
   222  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   223  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   224  
   225  	// Set up the advertise addresses
   226  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   227  	if err != nil {
   228  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   229  	}
   230  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   231  	if err != nil {
   232  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   233  	}
   234  
   235  	// Server address is the serf advertise address and rpc port. This is the
   236  	// address that all servers should be able to communicate over RPC with.
   237  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   238  	if err != nil {
   239  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   240  	}
   241  
   242  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   243  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   244  	conf.ClientRPCAdvertise = rpcAddr
   245  	conf.ServerRPCAdvertise = serverAddr
   246  
   247  	// Set up gc threshold and heartbeat grace period
   248  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   249  		dur, err := time.ParseDuration(gcThreshold)
   250  		if err != nil {
   251  			return nil, err
   252  		}
   253  		conf.NodeGCThreshold = dur
   254  	}
   255  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   256  		dur, err := time.ParseDuration(gcThreshold)
   257  		if err != nil {
   258  			return nil, err
   259  		}
   260  		conf.JobGCThreshold = dur
   261  	}
   262  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   263  		dur, err := time.ParseDuration(gcThreshold)
   264  		if err != nil {
   265  			return nil, err
   266  		}
   267  		conf.EvalGCThreshold = dur
   268  	}
   269  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   270  		dur, err := time.ParseDuration(gcThreshold)
   271  		if err != nil {
   272  			return nil, err
   273  		}
   274  		conf.DeploymentGCThreshold = dur
   275  	}
   276  
   277  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   278  		conf.HeartbeatGrace = heartbeatGrace
   279  	}
   280  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   281  		conf.MinHeartbeatTTL = min
   282  	}
   283  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   284  		conf.MaxHeartbeatsPerSecond = maxHPS
   285  	}
   286  
   287  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   288  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   289  	}
   290  
   291  	// Add the Consul and Vault configs
   292  	conf.ConsulConfig = agentConfig.Consul
   293  	conf.VaultConfig = agentConfig.Vault
   294  
   295  	// Set the TLS config
   296  	conf.TLSConfig = agentConfig.TLSConfig
   297  
   298  	// Setup telemetry related config
   299  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   300  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   301  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   302  
   303  	return conf, nil
   304  }
   305  
   306  // serverConfig is used to generate a new server configuration struct
   307  // for initializing a nomad server.
   308  func (a *Agent) serverConfig() (*nomad.Config, error) {
   309  	return convertServerConfig(a.config, a.logOutput)
   310  }
   311  
   312  // clientConfig is used to generate a new client configuration struct
   313  // for initializing a Nomad client.
   314  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   315  	// Setup the configuration
   316  	conf := a.config.ClientConfig
   317  	if conf == nil {
   318  		conf = clientconfig.DefaultConfig()
   319  	}
   320  
   321  	// If we are running a server, append both its bind and advertise address so
   322  	// we are able to at least talk to the local server even if that isn't
   323  	// configured explicitly. This handles both running server and client on one
   324  	// host and -dev mode.
   325  	conf.Servers = a.config.Client.Servers
   326  	if a.server != nil {
   327  		if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" {
   328  			return nil, fmt.Errorf("AdvertiseAddrs is nil or empty")
   329  		} else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" {
   330  			return nil, fmt.Errorf("normalizedAddrs is nil or empty")
   331  		}
   332  
   333  		conf.Servers = append(conf.Servers,
   334  			a.config.normalizedAddrs.RPC,
   335  			a.config.AdvertiseAddrs.RPC)
   336  	}
   337  
   338  	conf.LogOutput = a.logOutput
   339  	conf.LogLevel = a.config.LogLevel
   340  	conf.DevMode = a.config.DevMode
   341  	if a.config.Region != "" {
   342  		conf.Region = a.config.Region
   343  	}
   344  	if a.config.DataDir != "" {
   345  		conf.StateDir = filepath.Join(a.config.DataDir, "client")
   346  		conf.AllocDir = filepath.Join(a.config.DataDir, "alloc")
   347  	}
   348  	if a.config.Client.StateDir != "" {
   349  		conf.StateDir = a.config.Client.StateDir
   350  	}
   351  	if a.config.Client.AllocDir != "" {
   352  		conf.AllocDir = a.config.Client.AllocDir
   353  	}
   354  	if a.config.Client.NetworkInterface != "" {
   355  		conf.NetworkInterface = a.config.Client.NetworkInterface
   356  	}
   357  	conf.ChrootEnv = a.config.Client.ChrootEnv
   358  	conf.Options = a.config.Client.Options
   359  	// Logging deprecation messages about consul related configuration in client
   360  	// options
   361  	var invalidConsulKeys []string
   362  	for key := range conf.Options {
   363  		if strings.HasPrefix(key, "consul") {
   364  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   365  		}
   366  	}
   367  	if len(invalidConsulKeys) > 0 {
   368  		a.logger.Printf("[WARN] agent: Invalid keys: %v", strings.Join(invalidConsulKeys, ","))
   369  		a.logger.Printf(`Nomad client ignores consul related configuration in client options.
   370  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   371  		to configure Nomad to work with Consul.`)
   372  	}
   373  
   374  	if a.config.Client.NetworkSpeed != 0 {
   375  		conf.NetworkSpeed = a.config.Client.NetworkSpeed
   376  	}
   377  	if a.config.Client.CpuCompute != 0 {
   378  		conf.CpuCompute = a.config.Client.CpuCompute
   379  	}
   380  	if a.config.Client.MemoryMB != 0 {
   381  		conf.MemoryMB = a.config.Client.MemoryMB
   382  	}
   383  	if a.config.Client.MaxKillTimeout != "" {
   384  		dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout)
   385  		if err != nil {
   386  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   387  		}
   388  		conf.MaxKillTimeout = dur
   389  	}
   390  	conf.ClientMaxPort = uint(a.config.Client.ClientMaxPort)
   391  	conf.ClientMinPort = uint(a.config.Client.ClientMinPort)
   392  
   393  	// Setup the node
   394  	conf.Node = new(structs.Node)
   395  	conf.Node.Datacenter = a.config.Datacenter
   396  	conf.Node.Name = a.config.NodeName
   397  	conf.Node.Meta = a.config.Client.Meta
   398  	conf.Node.NodeClass = a.config.Client.NodeClass
   399  
   400  	// Set up the HTTP advertise address
   401  	conf.Node.HTTPAddr = a.config.AdvertiseAddrs.HTTP
   402  
   403  	// Reserve resources on the node.
   404  	r := conf.Node.Reserved
   405  	if r == nil {
   406  		r = new(structs.Resources)
   407  		conf.Node.Reserved = r
   408  	}
   409  	r.CPU = a.config.Client.Reserved.CPU
   410  	r.MemoryMB = a.config.Client.Reserved.MemoryMB
   411  	r.DiskMB = a.config.Client.Reserved.DiskMB
   412  	r.IOPS = a.config.Client.Reserved.IOPS
   413  	conf.GloballyReservedPorts = a.config.Client.Reserved.ParsedReservedPorts
   414  
   415  	conf.Version = a.config.Version
   416  
   417  	if *a.config.Consul.AutoAdvertise && a.config.Consul.ClientServiceName == "" {
   418  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   419  	}
   420  
   421  	conf.ConsulConfig = a.config.Consul
   422  	conf.VaultConfig = a.config.Vault
   423  
   424  	// Set up Telemetry configuration
   425  	conf.StatsCollectionInterval = a.config.Telemetry.collectionInterval
   426  	conf.PublishNodeMetrics = a.config.Telemetry.PublishNodeMetrics
   427  	conf.PublishAllocationMetrics = a.config.Telemetry.PublishAllocationMetrics
   428  	conf.DisableTaggedMetrics = a.config.Telemetry.DisableTaggedMetrics
   429  	conf.BackwardsCompatibleMetrics = a.config.Telemetry.BackwardsCompatibleMetrics
   430  
   431  	// Set the TLS related configs
   432  	conf.TLSConfig = a.config.TLSConfig
   433  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   434  
   435  	// Set the GC related configs
   436  	conf.GCInterval = a.config.Client.GCInterval
   437  	conf.GCParallelDestroys = a.config.Client.GCParallelDestroys
   438  	conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
   439  	conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold
   440  	conf.GCMaxAllocs = a.config.Client.GCMaxAllocs
   441  	if a.config.Client.NoHostUUID != nil {
   442  		conf.NoHostUUID = *a.config.Client.NoHostUUID
   443  	} else {
   444  		// Default no_host_uuid to true
   445  		conf.NoHostUUID = true
   446  	}
   447  
   448  	// Setup the ACLs
   449  	conf.ACLEnabled = a.config.ACL.Enabled
   450  	conf.ACLTokenTTL = a.config.ACL.TokenTTL
   451  	conf.ACLPolicyTTL = a.config.ACL.PolicyTTL
   452  
   453  	return conf, nil
   454  }
   455  
   456  // setupServer is used to setup the server if enabled
   457  func (a *Agent) setupServer() error {
   458  	if !a.config.Server.Enabled {
   459  		return nil
   460  	}
   461  
   462  	// Setup the configuration
   463  	conf, err := a.serverConfig()
   464  	if err != nil {
   465  		return fmt.Errorf("server config setup failed: %s", err)
   466  	}
   467  
   468  	// Generate a node ID and persist it if it is the first instance, otherwise
   469  	// read the persisted node ID.
   470  	if err := a.setupNodeID(conf); err != nil {
   471  		return fmt.Errorf("setting up server node ID failed: %s", err)
   472  	}
   473  
   474  	// Sets up the keyring for gossip encryption
   475  	if err := a.setupKeyrings(conf); err != nil {
   476  		return fmt.Errorf("failed to configure keyring: %v", err)
   477  	}
   478  
   479  	// Create the server
   480  	server, err := nomad.NewServer(conf, a.consulCatalog, a.logger)
   481  	if err != nil {
   482  		return fmt.Errorf("server setup failed: %v", err)
   483  	}
   484  	a.server = server
   485  
   486  	// Consul check addresses default to bind but can be toggled to use advertise
   487  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   488  	serfCheckAddr := a.config.normalizedAddrs.Serf
   489  	if *a.config.Consul.ChecksUseAdvertise {
   490  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   491  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   492  	}
   493  
   494  	// Create the Nomad Server services for Consul
   495  	if *a.config.Consul.AutoAdvertise {
   496  		httpServ := &structs.Service{
   497  			Name:      a.config.Consul.ServerServiceName,
   498  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   499  			Tags:      []string{consul.ServiceTagHTTP},
   500  		}
   501  		const isServer = true
   502  		if check := a.agentHTTPCheck(isServer); check != nil {
   503  			httpServ.Checks = []*structs.ServiceCheck{check}
   504  		}
   505  		rpcServ := &structs.Service{
   506  			Name:      a.config.Consul.ServerServiceName,
   507  			PortLabel: a.config.AdvertiseAddrs.RPC,
   508  			Tags:      []string{consul.ServiceTagRPC},
   509  			Checks: []*structs.ServiceCheck{
   510  				{
   511  					Name:      a.config.Consul.ServerRPCCheckName,
   512  					Type:      "tcp",
   513  					Interval:  serverRpcCheckInterval,
   514  					Timeout:   serverRpcCheckTimeout,
   515  					PortLabel: rpcCheckAddr,
   516  				},
   517  			},
   518  		}
   519  		serfServ := &structs.Service{
   520  			Name:      a.config.Consul.ServerServiceName,
   521  			PortLabel: a.config.AdvertiseAddrs.Serf,
   522  			Tags:      []string{consul.ServiceTagSerf},
   523  			Checks: []*structs.ServiceCheck{
   524  				{
   525  					Name:      a.config.Consul.ServerSerfCheckName,
   526  					Type:      "tcp",
   527  					Interval:  serverSerfCheckInterval,
   528  					Timeout:   serverSerfCheckTimeout,
   529  					PortLabel: serfCheckAddr,
   530  				},
   531  			},
   532  		}
   533  
   534  		// Add the http port check if TLS isn't enabled
   535  		consulServices := []*structs.Service{
   536  			rpcServ,
   537  			serfServ,
   538  			httpServ,
   539  		}
   540  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   541  			return err
   542  		}
   543  	}
   544  
   545  	return nil
   546  }
   547  
   548  // setupNodeID will pull the persisted node ID, if any, or create a random one
   549  // and persist it.
   550  func (a *Agent) setupNodeID(config *nomad.Config) error {
   551  	// For dev mode we have no filesystem access so just make a node ID.
   552  	if a.config.DevMode {
   553  		config.NodeID = uuid.Generate()
   554  		return nil
   555  	}
   556  
   557  	// Load saved state, if any. Since a user could edit this, we also
   558  	// validate it. Saved state overwrites any configured node id
   559  	fileID := filepath.Join(config.DataDir, "node-id")
   560  	if _, err := os.Stat(fileID); err == nil {
   561  		rawID, err := ioutil.ReadFile(fileID)
   562  		if err != nil {
   563  			return err
   564  		}
   565  
   566  		nodeID := strings.TrimSpace(string(rawID))
   567  		nodeID = strings.ToLower(nodeID)
   568  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   569  			return err
   570  		}
   571  		config.NodeID = nodeID
   572  		return nil
   573  	}
   574  
   575  	// If they've configured a node ID manually then just use that, as
   576  	// long as it's valid.
   577  	if config.NodeID != "" {
   578  		config.NodeID = strings.ToLower(config.NodeID)
   579  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   580  			return err
   581  		}
   582  		// Persist this configured nodeID to our data directory
   583  		if err := lib.EnsurePath(fileID, false); err != nil {
   584  			return err
   585  		}
   586  		if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   587  			return err
   588  		}
   589  		return nil
   590  	}
   591  
   592  	// If we still don't have a valid node ID, make one.
   593  	if config.NodeID == "" {
   594  		id := uuid.Generate()
   595  		if err := lib.EnsurePath(fileID, false); err != nil {
   596  			return err
   597  		}
   598  		if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil {
   599  			return err
   600  		}
   601  
   602  		config.NodeID = id
   603  	}
   604  	return nil
   605  }
   606  
   607  // setupKeyrings is used to initialize and load keyrings during agent startup
   608  func (a *Agent) setupKeyrings(config *nomad.Config) error {
   609  	file := filepath.Join(a.config.DataDir, serfKeyring)
   610  
   611  	if a.config.Server.EncryptKey == "" {
   612  		goto LOAD
   613  	}
   614  	if _, err := os.Stat(file); err != nil {
   615  		if err := initKeyring(file, a.config.Server.EncryptKey); err != nil {
   616  			return err
   617  		}
   618  	}
   619  
   620  LOAD:
   621  	if _, err := os.Stat(file); err == nil {
   622  		config.SerfConfig.KeyringFile = file
   623  	}
   624  	if err := loadKeyringFile(config.SerfConfig); err != nil {
   625  		return err
   626  	}
   627  	// Success!
   628  	return nil
   629  }
   630  
   631  // setupClient is used to setup the client if enabled
   632  func (a *Agent) setupClient() error {
   633  	if !a.config.Client.Enabled {
   634  		return nil
   635  	}
   636  
   637  	// Setup the configuration
   638  	conf, err := a.clientConfig()
   639  	if err != nil {
   640  		return fmt.Errorf("client setup failed: %v", err)
   641  	}
   642  
   643  	// Reserve some ports for the plugins if we are on Windows
   644  	if runtime.GOOS == "windows" {
   645  		if err := a.reservePortsForClient(conf); err != nil {
   646  			return err
   647  		}
   648  	}
   649  
   650  	client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger)
   651  	if err != nil {
   652  		return fmt.Errorf("client setup failed: %v", err)
   653  	}
   654  	a.client = client
   655  
   656  	// Create the Nomad Client  services for Consul
   657  	if *a.config.Consul.AutoAdvertise {
   658  		httpServ := &structs.Service{
   659  			Name:      a.config.Consul.ClientServiceName,
   660  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   661  			Tags:      []string{consul.ServiceTagHTTP},
   662  		}
   663  		const isServer = false
   664  		if check := a.agentHTTPCheck(isServer); check != nil {
   665  			httpServ.Checks = []*structs.ServiceCheck{check}
   666  		}
   667  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
   668  			return err
   669  		}
   670  	}
   671  
   672  	return nil
   673  }
   674  
   675  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
   676  // If no HTTP health check can be supported nil is returned.
   677  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
   678  	// Resolve the http check address
   679  	httpCheckAddr := a.config.normalizedAddrs.HTTP
   680  	if *a.config.Consul.ChecksUseAdvertise {
   681  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
   682  	}
   683  	check := structs.ServiceCheck{
   684  		Name:      a.config.Consul.ClientHTTPCheckName,
   685  		Type:      "http",
   686  		Path:      "/v1/agent/health?type=client",
   687  		Protocol:  "http",
   688  		Interval:  agentHttpCheckInterval,
   689  		Timeout:   agentHttpCheckTimeout,
   690  		PortLabel: httpCheckAddr,
   691  	}
   692  	// Switch to endpoint that doesn't require a leader for servers
   693  	if server {
   694  		check.Name = a.config.Consul.ServerHTTPCheckName
   695  		check.Path = "/v1/agent/health?type=server"
   696  	}
   697  	if !a.config.TLSConfig.EnableHTTP {
   698  		// No HTTPS, return a plain http check
   699  		return &check
   700  	}
   701  	if a.config.TLSConfig.VerifyHTTPSClient {
   702  		a.logger.Printf("[WARN] agent: not registering Nomad HTTPS Health Check because verify_https_client enabled")
   703  		return nil
   704  	}
   705  
   706  	// HTTPS enabled; skip verification
   707  	check.Protocol = "https"
   708  	check.TLSSkipVerify = true
   709  	return &check
   710  }
   711  
   712  // reservePortsForClient reserves a range of ports for the client to use when
   713  // it creates various plugins for log collection, executors, drivers, etc
   714  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
   715  	// finding the device name for loopback
   716  	deviceName, addr, mask, err := a.findLoopbackDevice()
   717  	if err != nil {
   718  		return fmt.Errorf("error finding the device name for loopback: %v", err)
   719  	}
   720  
   721  	// seeing if the user has already reserved some resources on this device
   722  	var nr *structs.NetworkResource
   723  	if conf.Node.Reserved == nil {
   724  		conf.Node.Reserved = &structs.Resources{}
   725  	}
   726  	for _, n := range conf.Node.Reserved.Networks {
   727  		if n.Device == deviceName {
   728  			nr = n
   729  		}
   730  	}
   731  	// If the user hasn't already created the device, we create it
   732  	if nr == nil {
   733  		nr = &structs.NetworkResource{
   734  			Device:        deviceName,
   735  			IP:            addr,
   736  			CIDR:          mask,
   737  			ReservedPorts: make([]structs.Port, 0),
   738  		}
   739  	}
   740  	// appending the port ranges we want to use for the client to the list of
   741  	// reserved ports for this device
   742  	for i := conf.ClientMinPort; i <= conf.ClientMaxPort; i++ {
   743  		nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: int(i)})
   744  	}
   745  	conf.Node.Reserved.Networks = append(conf.Node.Reserved.Networks, nr)
   746  	return nil
   747  }
   748  
   749  // findLoopbackDevice iterates through all the interfaces on a machine and
   750  // returns the ip addr, mask of the loopback device
   751  func (a *Agent) findLoopbackDevice() (string, string, string, error) {
   752  	var ifcs []net.Interface
   753  	var err error
   754  	ifcs, err = net.Interfaces()
   755  	if err != nil {
   756  		return "", "", "", err
   757  	}
   758  	for _, ifc := range ifcs {
   759  		addrs, err := ifc.Addrs()
   760  		if err != nil {
   761  			return "", "", "", err
   762  		}
   763  		for _, addr := range addrs {
   764  			var ip net.IP
   765  			switch v := addr.(type) {
   766  			case *net.IPNet:
   767  				ip = v.IP
   768  			case *net.IPAddr:
   769  				ip = v.IP
   770  			}
   771  			if ip.IsLoopback() {
   772  				if ip.To4() == nil {
   773  					continue
   774  				}
   775  				return ifc.Name, ip.String(), addr.String(), nil
   776  			}
   777  		}
   778  	}
   779  
   780  	return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found")
   781  }
   782  
   783  // Leave is used gracefully exit. Clients will inform servers
   784  // of their departure so that allocations can be rescheduled.
   785  func (a *Agent) Leave() error {
   786  	if a.client != nil {
   787  		if err := a.client.Leave(); err != nil {
   788  			a.logger.Printf("[ERR] agent: client leave failed: %v", err)
   789  		}
   790  	}
   791  	if a.server != nil {
   792  		if err := a.server.Leave(); err != nil {
   793  			a.logger.Printf("[ERR] agent: server leave failed: %v", err)
   794  		}
   795  	}
   796  	return nil
   797  }
   798  
   799  // Shutdown is used to terminate the agent.
   800  func (a *Agent) Shutdown() error {
   801  	a.shutdownLock.Lock()
   802  	defer a.shutdownLock.Unlock()
   803  
   804  	if a.shutdown {
   805  		return nil
   806  	}
   807  
   808  	a.logger.Println("[INFO] agent: requesting shutdown")
   809  	if a.client != nil {
   810  		if err := a.client.Shutdown(); err != nil {
   811  			a.logger.Printf("[ERR] agent: client shutdown failed: %v", err)
   812  		}
   813  	}
   814  	if a.server != nil {
   815  		if err := a.server.Shutdown(); err != nil {
   816  			a.logger.Printf("[ERR] agent: server shutdown failed: %v", err)
   817  		}
   818  	}
   819  
   820  	if err := a.consulService.Shutdown(); err != nil {
   821  		a.logger.Printf("[ERR] agent: shutting down Consul client failed: %v", err)
   822  	}
   823  
   824  	a.logger.Println("[INFO] agent: shutdown complete")
   825  	a.shutdown = true
   826  	close(a.shutdownCh)
   827  	return nil
   828  }
   829  
   830  // RPC is used to make an RPC call to the Nomad servers
   831  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
   832  	if a.server != nil {
   833  		return a.server.RPC(method, args, reply)
   834  	}
   835  	return a.client.RPC(method, args, reply)
   836  }
   837  
   838  // Client returns the configured client or nil
   839  func (a *Agent) Client() *client.Client {
   840  	return a.client
   841  }
   842  
   843  // Server returns the configured server or nil
   844  func (a *Agent) Server() *nomad.Server {
   845  	return a.server
   846  }
   847  
   848  // Stats is used to return statistics for debugging and insight
   849  // for various sub-systems
   850  func (a *Agent) Stats() map[string]map[string]string {
   851  	stats := make(map[string]map[string]string)
   852  	if a.server != nil {
   853  		subStat := a.server.Stats()
   854  		for k, v := range subStat {
   855  			stats[k] = v
   856  		}
   857  	}
   858  	if a.client != nil {
   859  		subStat := a.client.Stats()
   860  		for k, v := range subStat {
   861  			stats[k] = v
   862  		}
   863  	}
   864  	return stats
   865  }
   866  
   867  // ShouldReload determines if we should reload the configuration and agent
   868  // connections. If the TLS Configuration has not changed, we shouldn't reload.
   869  func (a *Agent) ShouldReload(newConfig *Config) (agent, http, rpc bool) {
   870  	a.configLock.Lock()
   871  	defer a.configLock.Unlock()
   872  
   873  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
   874  	if err != nil {
   875  		a.logger.Printf("[INFO] agent: error when parsing TLS certificate %v", err)
   876  		return false, false, false
   877  	} else if !isEqual {
   878  		return true, true, true
   879  	}
   880  
   881  	// Allow the ability to only reload HTTP connections
   882  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
   883  		http = true
   884  		agent = true
   885  	}
   886  
   887  	// Allow the ability to only reload HTTP connections
   888  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
   889  		rpc = true
   890  		agent = true
   891  	}
   892  
   893  	return agent, http, rpc
   894  }
   895  
   896  // Reload handles configuration changes for the agent. Provides a method that
   897  // is easier to unit test, as this action is invoked via SIGHUP.
   898  func (a *Agent) Reload(newConfig *Config) error {
   899  	a.configLock.Lock()
   900  	defer a.configLock.Unlock()
   901  
   902  	if newConfig == nil || newConfig.TLSConfig == nil {
   903  		return fmt.Errorf("cannot reload agent with nil configuration")
   904  	}
   905  
   906  	// This is just a TLS configuration reload, we don't need to refresh
   907  	// existing network connections
   908  	if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
   909  
   910  		// Reload the certificates on the keyloader and on success store the
   911  		// updated TLS config. It is important to reuse the same keyloader
   912  		// as this allows us to dynamically reload configurations not only
   913  		// on the Agent but on the Server and Client too (they are
   914  		// referencing the same keyloader).
   915  		keyloader := a.config.TLSConfig.GetKeyLoader()
   916  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
   917  		if err != nil {
   918  			return err
   919  		}
   920  		a.config.TLSConfig = newConfig.TLSConfig
   921  		a.config.TLSConfig.KeyLoader = keyloader
   922  		return nil
   923  	}
   924  
   925  	// Completely reload the agent's TLS configuration (moving from non-TLS to
   926  	// TLS, or vice versa)
   927  	// This does not handle errors in loading the new TLS configuration
   928  	a.config.TLSConfig = newConfig.TLSConfig.Copy()
   929  
   930  	if newConfig.TLSConfig.IsEmpty() {
   931  		a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext")
   932  	} else {
   933  		a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS")
   934  	}
   935  
   936  	return nil
   937  }
   938  
   939  // GetConfig creates a locked reference to the agent's config
   940  func (a *Agent) GetConfig() *Config {
   941  	a.configLock.Lock()
   942  	defer a.configLock.Unlock()
   943  
   944  	return a.config
   945  }
   946  
   947  // setupConsul creates the Consul client and starts its main Run loop.
   948  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
   949  	apiConf, err := consulConfig.ApiConfig()
   950  	if err != nil {
   951  		return err
   952  	}
   953  	client, err := api.NewClient(apiConf)
   954  	if err != nil {
   955  		return err
   956  	}
   957  
   958  	// Determine version for TLSSkipVerify
   959  
   960  	// Create Consul Catalog client for service discovery.
   961  	a.consulCatalog = client.Catalog()
   962  
   963  	// Create Consul Service client for service advertisement and checks.
   964  	isClient := false
   965  	if a.config.Client != nil && a.config.Client.Enabled {
   966  		isClient = true
   967  	}
   968  	a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient)
   969  
   970  	// Run the Consul service client's sync'ing main loop
   971  	go a.consulService.Run()
   972  	return nil
   973  }