github.com/zhizhiboom/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/command/agent/agent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"log"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/consul/api"
    19  	"github.com/hashicorp/consul/lib"
    20  	uuidparse "github.com/hashicorp/go-uuid"
    21  	"github.com/hashicorp/nomad/client"
    22  	clientconfig "github.com/hashicorp/nomad/client/config"
    23  	"github.com/hashicorp/nomad/command/agent/consul"
    24  	"github.com/hashicorp/nomad/helper/uuid"
    25  	"github.com/hashicorp/nomad/nomad"
    26  	"github.com/hashicorp/nomad/nomad/structs"
    27  	"github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/raft"
    29  )
    30  
    31  const (
    32  	agentHttpCheckInterval  = 10 * time.Second
    33  	agentHttpCheckTimeout   = 5 * time.Second
    34  	serverRpcCheckInterval  = 10 * time.Second
    35  	serverRpcCheckTimeout   = 3 * time.Second
    36  	serverSerfCheckInterval = 10 * time.Second
    37  	serverSerfCheckTimeout  = 3 * time.Second
    38  
    39  	// roles used in identifying Consul entries for Nomad agents
    40  	consulRoleServer = "server"
    41  	consulRoleClient = "client"
    42  )
    43  
    44  // Agent is a long running daemon that is used to run both
    45  // clients and servers. Servers are responsible for managing
    46  // state and making scheduling decisions. Clients can be
    47  // scheduled to, and are responsible for interfacing with
    48  // servers to run allocations.
    49  type Agent struct {
    50  	config     *Config
    51  	configLock sync.Mutex
    52  
    53  	logger    *log.Logger
    54  	logOutput io.Writer
    55  
    56  	// consulService is Nomad's custom Consul client for managing services
    57  	// and checks.
    58  	consulService *consul.ServiceClient
    59  
    60  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    61  	consulCatalog consul.CatalogAPI
    62  
    63  	client *client.Client
    64  
    65  	server *nomad.Server
    66  
    67  	shutdown     bool
    68  	shutdownCh   chan struct{}
    69  	shutdownLock sync.Mutex
    70  
    71  	InmemSink *metrics.InmemSink
    72  }
    73  
    74  // NewAgent is used to create a new agent with the given configuration
    75  func NewAgent(config *Config, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
    76  	a := &Agent{
    77  		config:     config,
    78  		logger:     log.New(logOutput, "", log.LstdFlags|log.Lmicroseconds),
    79  		logOutput:  logOutput,
    80  		shutdownCh: make(chan struct{}),
    81  		InmemSink:  inmem,
    82  	}
    83  
    84  	// Global logger should match internal logger as much as possible
    85  	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
    86  
    87  	if err := a.setupConsul(config.Consul); err != nil {
    88  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
    89  	}
    90  
    91  	// TODO setup plugin loader
    92  
    93  	if err := a.setupServer(); err != nil {
    94  		return nil, err
    95  	}
    96  	if err := a.setupClient(); err != nil {
    97  		return nil, err
    98  	}
    99  	if a.client == nil && a.server == nil {
   100  		return nil, fmt.Errorf("must have at least client or server mode enabled")
   101  	}
   102  
   103  	return a, nil
   104  }
   105  
   106  // convertServerConfig takes an agent config and log output and returns a Nomad
   107  // Config.
   108  func convertServerConfig(agentConfig *Config, logOutput io.Writer) (*nomad.Config, error) {
   109  	conf := agentConfig.NomadConfig
   110  	if conf == nil {
   111  		conf = nomad.DefaultConfig()
   112  	}
   113  	conf.LogOutput = logOutput
   114  	conf.DevMode = agentConfig.DevMode
   115  	conf.Build = agentConfig.Version.VersionNumber()
   116  	if agentConfig.Region != "" {
   117  		conf.Region = agentConfig.Region
   118  	}
   119  
   120  	// Set the Authoritative Region if set, otherwise default to
   121  	// the same as the local region.
   122  	if agentConfig.Server.AuthoritativeRegion != "" {
   123  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   124  	} else if agentConfig.Region != "" {
   125  		conf.AuthoritativeRegion = agentConfig.Region
   126  	}
   127  
   128  	if agentConfig.Datacenter != "" {
   129  		conf.Datacenter = agentConfig.Datacenter
   130  	}
   131  	if agentConfig.NodeName != "" {
   132  		conf.NodeName = agentConfig.NodeName
   133  	}
   134  	if agentConfig.Server.BootstrapExpect > 0 {
   135  		if agentConfig.Server.BootstrapExpect == 1 {
   136  			conf.Bootstrap = true
   137  		} else {
   138  			atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect))
   139  		}
   140  	}
   141  	if agentConfig.DataDir != "" {
   142  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   143  	}
   144  	if agentConfig.Server.DataDir != "" {
   145  		conf.DataDir = agentConfig.Server.DataDir
   146  	}
   147  	if agentConfig.Server.ProtocolVersion != 0 {
   148  		conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion)
   149  	}
   150  	if agentConfig.Server.RaftProtocol != 0 {
   151  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   152  	}
   153  	if agentConfig.Server.NumSchedulers != nil {
   154  		conf.NumSchedulers = *agentConfig.Server.NumSchedulers
   155  	}
   156  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   157  		// Convert to a set and require the core scheduler
   158  		set := make(map[string]struct{}, 4)
   159  		set[structs.JobTypeCore] = struct{}{}
   160  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   161  			set[sched] = struct{}{}
   162  		}
   163  
   164  		schedulers := make([]string, 0, len(set))
   165  		for k := range set {
   166  			schedulers = append(schedulers, k)
   167  		}
   168  
   169  		conf.EnabledSchedulers = schedulers
   170  
   171  	}
   172  	if agentConfig.ACL.Enabled {
   173  		conf.ACLEnabled = true
   174  	}
   175  	if agentConfig.ACL.ReplicationToken != "" {
   176  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   177  	}
   178  	if agentConfig.Sentinel != nil {
   179  		conf.SentinelConfig = agentConfig.Sentinel
   180  	}
   181  	if agentConfig.Server.NonVotingServer {
   182  		conf.NonVoter = true
   183  	}
   184  	if agentConfig.Server.RedundancyZone != "" {
   185  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   186  	}
   187  	if agentConfig.Server.UpgradeVersion != "" {
   188  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   189  	}
   190  	if agentConfig.Autopilot != nil {
   191  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   192  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   193  		}
   194  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   195  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   196  		}
   197  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   198  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   199  		}
   200  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   201  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   202  		}
   203  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   204  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   205  		}
   206  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   207  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   208  		}
   209  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   210  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   211  		}
   212  	}
   213  
   214  	// Set up the bind addresses
   215  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   216  	if err != nil {
   217  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   218  	}
   219  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   220  	if err != nil {
   221  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   222  	}
   223  	conf.RPCAddr.Port = rpcAddr.Port
   224  	conf.RPCAddr.IP = rpcAddr.IP
   225  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   226  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   227  
   228  	// Set up the advertise addresses
   229  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   230  	if err != nil {
   231  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   232  	}
   233  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   234  	if err != nil {
   235  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   236  	}
   237  
   238  	// Server address is the serf advertise address and rpc port. This is the
   239  	// address that all servers should be able to communicate over RPC with.
   240  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   241  	if err != nil {
   242  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   243  	}
   244  
   245  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   246  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   247  	conf.ClientRPCAdvertise = rpcAddr
   248  	conf.ServerRPCAdvertise = serverAddr
   249  
   250  	// Set up gc threshold and heartbeat grace period
   251  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   252  		dur, err := time.ParseDuration(gcThreshold)
   253  		if err != nil {
   254  			return nil, err
   255  		}
   256  		conf.NodeGCThreshold = dur
   257  	}
   258  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   259  		dur, err := time.ParseDuration(gcThreshold)
   260  		if err != nil {
   261  			return nil, err
   262  		}
   263  		conf.JobGCThreshold = dur
   264  	}
   265  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   266  		dur, err := time.ParseDuration(gcThreshold)
   267  		if err != nil {
   268  			return nil, err
   269  		}
   270  		conf.EvalGCThreshold = dur
   271  	}
   272  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   273  		dur, err := time.ParseDuration(gcThreshold)
   274  		if err != nil {
   275  			return nil, err
   276  		}
   277  		conf.DeploymentGCThreshold = dur
   278  	}
   279  
   280  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   281  		conf.HeartbeatGrace = heartbeatGrace
   282  	}
   283  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   284  		conf.MinHeartbeatTTL = min
   285  	}
   286  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   287  		conf.MaxHeartbeatsPerSecond = maxHPS
   288  	}
   289  
   290  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   291  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   292  	}
   293  
   294  	// Add the Consul and Vault configs
   295  	conf.ConsulConfig = agentConfig.Consul
   296  	conf.VaultConfig = agentConfig.Vault
   297  
   298  	// Set the TLS config
   299  	conf.TLSConfig = agentConfig.TLSConfig
   300  
   301  	// Setup telemetry related config
   302  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   303  	conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics
   304  	conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics
   305  
   306  	return conf, nil
   307  }
   308  
   309  // serverConfig is used to generate a new server configuration struct
   310  // for initializing a nomad server.
   311  func (a *Agent) serverConfig() (*nomad.Config, error) {
   312  	return convertServerConfig(a.config, a.logOutput)
   313  }
   314  
   315  // clientConfig is used to generate a new client configuration struct
   316  // for initializing a Nomad client.
   317  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   318  	// Setup the configuration
   319  	conf := a.config.ClientConfig
   320  	if conf == nil {
   321  		conf = clientconfig.DefaultConfig()
   322  	}
   323  
   324  	// If we are running a server, append both its bind and advertise address so
   325  	// we are able to at least talk to the local server even if that isn't
   326  	// configured explicitly. This handles both running server and client on one
   327  	// host and -dev mode.
   328  	conf.Servers = a.config.Client.Servers
   329  	if a.server != nil {
   330  		if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" {
   331  			return nil, fmt.Errorf("AdvertiseAddrs is nil or empty")
   332  		} else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" {
   333  			return nil, fmt.Errorf("normalizedAddrs is nil or empty")
   334  		}
   335  
   336  		conf.Servers = append(conf.Servers,
   337  			a.config.normalizedAddrs.RPC,
   338  			a.config.AdvertiseAddrs.RPC)
   339  	}
   340  
   341  	conf.LogOutput = a.logOutput
   342  	conf.LogLevel = a.config.LogLevel
   343  	conf.DevMode = a.config.DevMode
   344  	if a.config.Region != "" {
   345  		conf.Region = a.config.Region
   346  	}
   347  	if a.config.DataDir != "" {
   348  		conf.StateDir = filepath.Join(a.config.DataDir, "client")
   349  		conf.AllocDir = filepath.Join(a.config.DataDir, "alloc")
   350  	}
   351  	if a.config.Client.StateDir != "" {
   352  		conf.StateDir = a.config.Client.StateDir
   353  	}
   354  	if a.config.Client.AllocDir != "" {
   355  		conf.AllocDir = a.config.Client.AllocDir
   356  	}
   357  	if a.config.Client.NetworkInterface != "" {
   358  		conf.NetworkInterface = a.config.Client.NetworkInterface
   359  	}
   360  	conf.ChrootEnv = a.config.Client.ChrootEnv
   361  	conf.Options = a.config.Client.Options
   362  	// Logging deprecation messages about consul related configuration in client
   363  	// options
   364  	var invalidConsulKeys []string
   365  	for key := range conf.Options {
   366  		if strings.HasPrefix(key, "consul") {
   367  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   368  		}
   369  	}
   370  	if len(invalidConsulKeys) > 0 {
   371  		a.logger.Printf("[WARN] agent: Invalid keys: %v", strings.Join(invalidConsulKeys, ","))
   372  		a.logger.Printf(`Nomad client ignores consul related configuration in client options.
   373  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   374  		to configure Nomad to work with Consul.`)
   375  	}
   376  
   377  	if a.config.Client.NetworkSpeed != 0 {
   378  		conf.NetworkSpeed = a.config.Client.NetworkSpeed
   379  	}
   380  	if a.config.Client.CpuCompute != 0 {
   381  		conf.CpuCompute = a.config.Client.CpuCompute
   382  	}
   383  	if a.config.Client.MemoryMB != 0 {
   384  		conf.MemoryMB = a.config.Client.MemoryMB
   385  	}
   386  	if a.config.Client.MaxKillTimeout != "" {
   387  		dur, err := time.ParseDuration(a.config.Client.MaxKillTimeout)
   388  		if err != nil {
   389  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   390  		}
   391  		conf.MaxKillTimeout = dur
   392  	}
   393  	conf.ClientMaxPort = uint(a.config.Client.ClientMaxPort)
   394  	conf.ClientMinPort = uint(a.config.Client.ClientMinPort)
   395  
   396  	// Setup the node
   397  	conf.Node = new(structs.Node)
   398  	conf.Node.Datacenter = a.config.Datacenter
   399  	conf.Node.Name = a.config.NodeName
   400  	conf.Node.Meta = a.config.Client.Meta
   401  	conf.Node.NodeClass = a.config.Client.NodeClass
   402  
   403  	// Set up the HTTP advertise address
   404  	conf.Node.HTTPAddr = a.config.AdvertiseAddrs.HTTP
   405  
   406  	// Reserve resources on the node.
   407  	r := conf.Node.Reserved
   408  	if r == nil {
   409  		r = new(structs.Resources)
   410  		conf.Node.Reserved = r
   411  	}
   412  	r.CPU = a.config.Client.Reserved.CPU
   413  	r.MemoryMB = a.config.Client.Reserved.MemoryMB
   414  	r.DiskMB = a.config.Client.Reserved.DiskMB
   415  	r.IOPS = a.config.Client.Reserved.IOPS
   416  	conf.GloballyReservedPorts = a.config.Client.Reserved.ParsedReservedPorts
   417  
   418  	conf.Version = a.config.Version
   419  
   420  	if *a.config.Consul.AutoAdvertise && a.config.Consul.ClientServiceName == "" {
   421  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   422  	}
   423  
   424  	conf.ConsulConfig = a.config.Consul
   425  	conf.VaultConfig = a.config.Vault
   426  
   427  	// Set up Telemetry configuration
   428  	conf.StatsCollectionInterval = a.config.Telemetry.collectionInterval
   429  	conf.PublishNodeMetrics = a.config.Telemetry.PublishNodeMetrics
   430  	conf.PublishAllocationMetrics = a.config.Telemetry.PublishAllocationMetrics
   431  	conf.DisableTaggedMetrics = a.config.Telemetry.DisableTaggedMetrics
   432  	conf.BackwardsCompatibleMetrics = a.config.Telemetry.BackwardsCompatibleMetrics
   433  
   434  	// Set the TLS related configs
   435  	conf.TLSConfig = a.config.TLSConfig
   436  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   437  
   438  	// Set the GC related configs
   439  	conf.GCInterval = a.config.Client.GCInterval
   440  	conf.GCParallelDestroys = a.config.Client.GCParallelDestroys
   441  	conf.GCDiskUsageThreshold = a.config.Client.GCDiskUsageThreshold
   442  	conf.GCInodeUsageThreshold = a.config.Client.GCInodeUsageThreshold
   443  	conf.GCMaxAllocs = a.config.Client.GCMaxAllocs
   444  	if a.config.Client.NoHostUUID != nil {
   445  		conf.NoHostUUID = *a.config.Client.NoHostUUID
   446  	} else {
   447  		// Default no_host_uuid to true
   448  		conf.NoHostUUID = true
   449  	}
   450  
   451  	// Setup the ACLs
   452  	conf.ACLEnabled = a.config.ACL.Enabled
   453  	conf.ACLTokenTTL = a.config.ACL.TokenTTL
   454  	conf.ACLPolicyTTL = a.config.ACL.PolicyTTL
   455  
   456  	return conf, nil
   457  }
   458  
   459  // setupServer is used to setup the server if enabled
   460  func (a *Agent) setupServer() error {
   461  	if !a.config.Server.Enabled {
   462  		return nil
   463  	}
   464  
   465  	// Setup the configuration
   466  	conf, err := a.serverConfig()
   467  	if err != nil {
   468  		return fmt.Errorf("server config setup failed: %s", err)
   469  	}
   470  
   471  	// Generate a node ID and persist it if it is the first instance, otherwise
   472  	// read the persisted node ID.
   473  	if err := a.setupNodeID(conf); err != nil {
   474  		return fmt.Errorf("setting up server node ID failed: %s", err)
   475  	}
   476  
   477  	// Sets up the keyring for gossip encryption
   478  	if err := a.setupKeyrings(conf); err != nil {
   479  		return fmt.Errorf("failed to configure keyring: %v", err)
   480  	}
   481  
   482  	// Create the server
   483  	server, err := nomad.NewServer(conf, a.consulCatalog, a.logger)
   484  	if err != nil {
   485  		return fmt.Errorf("server setup failed: %v", err)
   486  	}
   487  	a.server = server
   488  
   489  	// Consul check addresses default to bind but can be toggled to use advertise
   490  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   491  	serfCheckAddr := a.config.normalizedAddrs.Serf
   492  	if *a.config.Consul.ChecksUseAdvertise {
   493  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   494  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   495  	}
   496  
   497  	// Create the Nomad Server services for Consul
   498  	if *a.config.Consul.AutoAdvertise {
   499  		httpServ := &structs.Service{
   500  			Name:      a.config.Consul.ServerServiceName,
   501  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   502  			Tags:      []string{consul.ServiceTagHTTP},
   503  		}
   504  		const isServer = true
   505  		if check := a.agentHTTPCheck(isServer); check != nil {
   506  			httpServ.Checks = []*structs.ServiceCheck{check}
   507  		}
   508  		rpcServ := &structs.Service{
   509  			Name:      a.config.Consul.ServerServiceName,
   510  			PortLabel: a.config.AdvertiseAddrs.RPC,
   511  			Tags:      []string{consul.ServiceTagRPC},
   512  			Checks: []*structs.ServiceCheck{
   513  				{
   514  					Name:      a.config.Consul.ServerRPCCheckName,
   515  					Type:      "tcp",
   516  					Interval:  serverRpcCheckInterval,
   517  					Timeout:   serverRpcCheckTimeout,
   518  					PortLabel: rpcCheckAddr,
   519  				},
   520  			},
   521  		}
   522  		serfServ := &structs.Service{
   523  			Name:      a.config.Consul.ServerServiceName,
   524  			PortLabel: a.config.AdvertiseAddrs.Serf,
   525  			Tags:      []string{consul.ServiceTagSerf},
   526  			Checks: []*structs.ServiceCheck{
   527  				{
   528  					Name:      a.config.Consul.ServerSerfCheckName,
   529  					Type:      "tcp",
   530  					Interval:  serverSerfCheckInterval,
   531  					Timeout:   serverSerfCheckTimeout,
   532  					PortLabel: serfCheckAddr,
   533  				},
   534  			},
   535  		}
   536  
   537  		// Add the http port check if TLS isn't enabled
   538  		consulServices := []*structs.Service{
   539  			rpcServ,
   540  			serfServ,
   541  			httpServ,
   542  		}
   543  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   544  			return err
   545  		}
   546  	}
   547  
   548  	return nil
   549  }
   550  
   551  // setupNodeID will pull the persisted node ID, if any, or create a random one
   552  // and persist it.
   553  func (a *Agent) setupNodeID(config *nomad.Config) error {
   554  	// For dev mode we have no filesystem access so just make a node ID.
   555  	if a.config.DevMode {
   556  		config.NodeID = uuid.Generate()
   557  		return nil
   558  	}
   559  
   560  	// Load saved state, if any. Since a user could edit this, we also
   561  	// validate it. Saved state overwrites any configured node id
   562  	fileID := filepath.Join(config.DataDir, "node-id")
   563  	if _, err := os.Stat(fileID); err == nil {
   564  		rawID, err := ioutil.ReadFile(fileID)
   565  		if err != nil {
   566  			return err
   567  		}
   568  
   569  		nodeID := strings.TrimSpace(string(rawID))
   570  		nodeID = strings.ToLower(nodeID)
   571  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   572  			return err
   573  		}
   574  		config.NodeID = nodeID
   575  		return nil
   576  	}
   577  
   578  	// If they've configured a node ID manually then just use that, as
   579  	// long as it's valid.
   580  	if config.NodeID != "" {
   581  		config.NodeID = strings.ToLower(config.NodeID)
   582  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   583  			return err
   584  		}
   585  		// Persist this configured nodeID to our data directory
   586  		if err := lib.EnsurePath(fileID, false); err != nil {
   587  			return err
   588  		}
   589  		if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   590  			return err
   591  		}
   592  		return nil
   593  	}
   594  
   595  	// If we still don't have a valid node ID, make one.
   596  	if config.NodeID == "" {
   597  		id := uuid.Generate()
   598  		if err := lib.EnsurePath(fileID, false); err != nil {
   599  			return err
   600  		}
   601  		if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil {
   602  			return err
   603  		}
   604  
   605  		config.NodeID = id
   606  	}
   607  	return nil
   608  }
   609  
   610  // setupKeyrings is used to initialize and load keyrings during agent startup
   611  func (a *Agent) setupKeyrings(config *nomad.Config) error {
   612  	file := filepath.Join(a.config.DataDir, serfKeyring)
   613  
   614  	if a.config.Server.EncryptKey == "" {
   615  		goto LOAD
   616  	}
   617  	if _, err := os.Stat(file); err != nil {
   618  		if err := initKeyring(file, a.config.Server.EncryptKey); err != nil {
   619  			return err
   620  		}
   621  	}
   622  
   623  LOAD:
   624  	if _, err := os.Stat(file); err == nil {
   625  		config.SerfConfig.KeyringFile = file
   626  	}
   627  	if err := loadKeyringFile(config.SerfConfig); err != nil {
   628  		return err
   629  	}
   630  	// Success!
   631  	return nil
   632  }
   633  
   634  // setupClient is used to setup the client if enabled
   635  func (a *Agent) setupClient() error {
   636  	if !a.config.Client.Enabled {
   637  		return nil
   638  	}
   639  
   640  	// Setup the configuration
   641  	conf, err := a.clientConfig()
   642  	if err != nil {
   643  		return fmt.Errorf("client setup failed: %v", err)
   644  	}
   645  
   646  	// Reserve some ports for the plugins if we are on Windows
   647  	if runtime.GOOS == "windows" {
   648  		if err := a.reservePortsForClient(conf); err != nil {
   649  			return err
   650  		}
   651  	}
   652  
   653  	client, err := client.NewClient(conf, a.consulCatalog, a.consulService, a.logger)
   654  	if err != nil {
   655  		return fmt.Errorf("client setup failed: %v", err)
   656  	}
   657  	a.client = client
   658  
   659  	// Create the Nomad Client  services for Consul
   660  	if *a.config.Consul.AutoAdvertise {
   661  		httpServ := &structs.Service{
   662  			Name:      a.config.Consul.ClientServiceName,
   663  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   664  			Tags:      []string{consul.ServiceTagHTTP},
   665  		}
   666  		const isServer = false
   667  		if check := a.agentHTTPCheck(isServer); check != nil {
   668  			httpServ.Checks = []*structs.ServiceCheck{check}
   669  		}
   670  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
   671  			return err
   672  		}
   673  	}
   674  
   675  	return nil
   676  }
   677  
   678  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
   679  // If no HTTP health check can be supported nil is returned.
   680  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
   681  	// Resolve the http check address
   682  	httpCheckAddr := a.config.normalizedAddrs.HTTP
   683  	if *a.config.Consul.ChecksUseAdvertise {
   684  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
   685  	}
   686  	check := structs.ServiceCheck{
   687  		Name:      a.config.Consul.ClientHTTPCheckName,
   688  		Type:      "http",
   689  		Path:      "/v1/agent/health?type=client",
   690  		Protocol:  "http",
   691  		Interval:  agentHttpCheckInterval,
   692  		Timeout:   agentHttpCheckTimeout,
   693  		PortLabel: httpCheckAddr,
   694  	}
   695  	// Switch to endpoint that doesn't require a leader for servers
   696  	if server {
   697  		check.Name = a.config.Consul.ServerHTTPCheckName
   698  		check.Path = "/v1/agent/health?type=server"
   699  	}
   700  	if !a.config.TLSConfig.EnableHTTP {
   701  		// No HTTPS, return a plain http check
   702  		return &check
   703  	}
   704  	if a.config.TLSConfig.VerifyHTTPSClient {
   705  		a.logger.Printf("[WARN] agent: not registering Nomad HTTPS Health Check because verify_https_client enabled")
   706  		return nil
   707  	}
   708  
   709  	// HTTPS enabled; skip verification
   710  	check.Protocol = "https"
   711  	check.TLSSkipVerify = true
   712  	return &check
   713  }
   714  
   715  // reservePortsForClient reserves a range of ports for the client to use when
   716  // it creates various plugins for log collection, executors, drivers, etc
   717  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
   718  	// finding the device name for loopback
   719  	deviceName, addr, mask, err := a.findLoopbackDevice()
   720  	if err != nil {
   721  		return fmt.Errorf("error finding the device name for loopback: %v", err)
   722  	}
   723  
   724  	// seeing if the user has already reserved some resources on this device
   725  	var nr *structs.NetworkResource
   726  	if conf.Node.Reserved == nil {
   727  		conf.Node.Reserved = &structs.Resources{}
   728  	}
   729  	for _, n := range conf.Node.Reserved.Networks {
   730  		if n.Device == deviceName {
   731  			nr = n
   732  		}
   733  	}
   734  	// If the user hasn't already created the device, we create it
   735  	if nr == nil {
   736  		nr = &structs.NetworkResource{
   737  			Device:        deviceName,
   738  			IP:            addr,
   739  			CIDR:          mask,
   740  			ReservedPorts: make([]structs.Port, 0),
   741  		}
   742  	}
   743  	// appending the port ranges we want to use for the client to the list of
   744  	// reserved ports for this device
   745  	for i := conf.ClientMinPort; i <= conf.ClientMaxPort; i++ {
   746  		nr.ReservedPorts = append(nr.ReservedPorts, structs.Port{Label: fmt.Sprintf("plugin-%d", i), Value: int(i)})
   747  	}
   748  	conf.Node.Reserved.Networks = append(conf.Node.Reserved.Networks, nr)
   749  	return nil
   750  }
   751  
   752  // findLoopbackDevice iterates through all the interfaces on a machine and
   753  // returns the ip addr, mask of the loopback device
   754  func (a *Agent) findLoopbackDevice() (string, string, string, error) {
   755  	var ifcs []net.Interface
   756  	var err error
   757  	ifcs, err = net.Interfaces()
   758  	if err != nil {
   759  		return "", "", "", err
   760  	}
   761  	for _, ifc := range ifcs {
   762  		addrs, err := ifc.Addrs()
   763  		if err != nil {
   764  			return "", "", "", err
   765  		}
   766  		for _, addr := range addrs {
   767  			var ip net.IP
   768  			switch v := addr.(type) {
   769  			case *net.IPNet:
   770  				ip = v.IP
   771  			case *net.IPAddr:
   772  				ip = v.IP
   773  			}
   774  			if ip.IsLoopback() {
   775  				if ip.To4() == nil {
   776  					continue
   777  				}
   778  				return ifc.Name, ip.String(), addr.String(), nil
   779  			}
   780  		}
   781  	}
   782  
   783  	return "", "", "", fmt.Errorf("no loopback devices with IPV4 addr found")
   784  }
   785  
   786  // Leave is used gracefully exit. Clients will inform servers
   787  // of their departure so that allocations can be rescheduled.
   788  func (a *Agent) Leave() error {
   789  	if a.client != nil {
   790  		if err := a.client.Leave(); err != nil {
   791  			a.logger.Printf("[ERR] agent: client leave failed: %v", err)
   792  		}
   793  	}
   794  	if a.server != nil {
   795  		if err := a.server.Leave(); err != nil {
   796  			a.logger.Printf("[ERR] agent: server leave failed: %v", err)
   797  		}
   798  	}
   799  	return nil
   800  }
   801  
   802  // Shutdown is used to terminate the agent.
   803  func (a *Agent) Shutdown() error {
   804  	a.shutdownLock.Lock()
   805  	defer a.shutdownLock.Unlock()
   806  
   807  	if a.shutdown {
   808  		return nil
   809  	}
   810  
   811  	a.logger.Println("[INFO] agent: requesting shutdown")
   812  	if a.client != nil {
   813  		if err := a.client.Shutdown(); err != nil {
   814  			a.logger.Printf("[ERR] agent: client shutdown failed: %v", err)
   815  		}
   816  	}
   817  	if a.server != nil {
   818  		if err := a.server.Shutdown(); err != nil {
   819  			a.logger.Printf("[ERR] agent: server shutdown failed: %v", err)
   820  		}
   821  	}
   822  
   823  	if err := a.consulService.Shutdown(); err != nil {
   824  		a.logger.Printf("[ERR] agent: shutting down Consul client failed: %v", err)
   825  	}
   826  
   827  	a.logger.Println("[INFO] agent: shutdown complete")
   828  	a.shutdown = true
   829  	close(a.shutdownCh)
   830  	return nil
   831  }
   832  
   833  // RPC is used to make an RPC call to the Nomad servers
   834  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
   835  	if a.server != nil {
   836  		return a.server.RPC(method, args, reply)
   837  	}
   838  	return a.client.RPC(method, args, reply)
   839  }
   840  
   841  // Client returns the configured client or nil
   842  func (a *Agent) Client() *client.Client {
   843  	return a.client
   844  }
   845  
   846  // Server returns the configured server or nil
   847  func (a *Agent) Server() *nomad.Server {
   848  	return a.server
   849  }
   850  
   851  // Stats is used to return statistics for debugging and insight
   852  // for various sub-systems
   853  func (a *Agent) Stats() map[string]map[string]string {
   854  	stats := make(map[string]map[string]string)
   855  	if a.server != nil {
   856  		subStat := a.server.Stats()
   857  		for k, v := range subStat {
   858  			stats[k] = v
   859  		}
   860  	}
   861  	if a.client != nil {
   862  		subStat := a.client.Stats()
   863  		for k, v := range subStat {
   864  			stats[k] = v
   865  		}
   866  	}
   867  	return stats
   868  }
   869  
   870  // ShouldReload determines if we should reload the configuration and agent
   871  // connections. If the TLS Configuration has not changed, we shouldn't reload.
   872  func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) {
   873  	a.configLock.Lock()
   874  	defer a.configLock.Unlock()
   875  
   876  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
   877  	if err != nil {
   878  		a.logger.Printf("[INFO] agent: error when parsing TLS certificate %v", err)
   879  		return false, false
   880  	} else if !isEqual {
   881  		return true, true
   882  	}
   883  
   884  	// Allow the ability to only reload HTTP connections
   885  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
   886  		http = true
   887  		agent = true
   888  	}
   889  
   890  	// Allow the ability to only reload HTTP connections
   891  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
   892  		agent = true
   893  	}
   894  
   895  	return agent, http
   896  }
   897  
   898  // Reload handles configuration changes for the agent. Provides a method that
   899  // is easier to unit test, as this action is invoked via SIGHUP.
   900  func (a *Agent) Reload(newConfig *Config) error {
   901  	a.configLock.Lock()
   902  	defer a.configLock.Unlock()
   903  
   904  	if newConfig == nil || newConfig.TLSConfig == nil {
   905  		return fmt.Errorf("cannot reload agent with nil configuration")
   906  	}
   907  
   908  	// This is just a TLS configuration reload, we don't need to refresh
   909  	// existing network connections
   910  	if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
   911  
   912  		// Reload the certificates on the keyloader and on success store the
   913  		// updated TLS config. It is important to reuse the same keyloader
   914  		// as this allows us to dynamically reload configurations not only
   915  		// on the Agent but on the Server and Client too (they are
   916  		// referencing the same keyloader).
   917  		keyloader := a.config.TLSConfig.GetKeyLoader()
   918  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
   919  		if err != nil {
   920  			return err
   921  		}
   922  		a.config.TLSConfig = newConfig.TLSConfig
   923  		a.config.TLSConfig.KeyLoader = keyloader
   924  		return nil
   925  	}
   926  
   927  	// Completely reload the agent's TLS configuration (moving from non-TLS to
   928  	// TLS, or vice versa)
   929  	// This does not handle errors in loading the new TLS configuration
   930  	a.config.TLSConfig = newConfig.TLSConfig.Copy()
   931  
   932  	if newConfig.TLSConfig.IsEmpty() {
   933  		a.logger.Println("[WARN] agent: Downgrading agent's existing TLS configuration to plaintext")
   934  	} else {
   935  		a.logger.Println("[INFO] agent: Upgrading from plaintext configuration to TLS")
   936  	}
   937  
   938  	return nil
   939  }
   940  
   941  // GetConfig creates a locked reference to the agent's config
   942  func (a *Agent) GetConfig() *Config {
   943  	a.configLock.Lock()
   944  	defer a.configLock.Unlock()
   945  
   946  	return a.config
   947  }
   948  
   949  // setupConsul creates the Consul client and starts its main Run loop.
   950  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
   951  	apiConf, err := consulConfig.ApiConfig()
   952  	if err != nil {
   953  		return err
   954  	}
   955  	client, err := api.NewClient(apiConf)
   956  	if err != nil {
   957  		return err
   958  	}
   959  
   960  	// Determine version for TLSSkipVerify
   961  
   962  	// Create Consul Catalog client for service discovery.
   963  	a.consulCatalog = client.Catalog()
   964  
   965  	// Create Consul Service client for service advertisement and checks.
   966  	isClient := false
   967  	if a.config.Client != nil && a.config.Client.Enabled {
   968  		isClient = true
   969  	}
   970  	a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient)
   971  
   972  	// Run the Consul service client's sync'ing main loop
   973  	go a.consulService.Run()
   974  	return nil
   975  }