github.com/hernad/nomad@v1.6.112/command/agent/agent.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package agent
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	golog "log"
    11  	"net"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime"
    15  	"strings"
    16  	"sync"
    17  	"time"
    18  
    19  	metrics "github.com/armon/go-metrics"
    20  	"github.com/dustin/go-humanize"
    21  	consulapi "github.com/hashicorp/consul/api"
    22  	log "github.com/hashicorp/go-hclog"
    23  	uuidparse "github.com/hashicorp/go-uuid"
    24  	"github.com/hernad/nomad/client"
    25  	clientconfig "github.com/hernad/nomad/client/config"
    26  	"github.com/hernad/nomad/client/lib/cgutil"
    27  	"github.com/hernad/nomad/client/state"
    28  	"github.com/hernad/nomad/command/agent/consul"
    29  	"github.com/hernad/nomad/command/agent/event"
    30  	"github.com/hernad/nomad/helper/bufconndialer"
    31  	"github.com/hernad/nomad/helper/escapingfs"
    32  	"github.com/hernad/nomad/helper/pluginutils/loader"
    33  	"github.com/hernad/nomad/helper/pointer"
    34  	"github.com/hernad/nomad/helper/uuid"
    35  	"github.com/hernad/nomad/lib/cpuset"
    36  	"github.com/hernad/nomad/nomad"
    37  	"github.com/hernad/nomad/nomad/deploymentwatcher"
    38  	"github.com/hernad/nomad/nomad/structs"
    39  	"github.com/hernad/nomad/nomad/structs/config"
    40  	"github.com/hashicorp/raft"
    41  )
    42  
    43  const (
    44  	agentHttpCheckInterval  = 10 * time.Second
    45  	agentHttpCheckTimeout   = 5 * time.Second
    46  	serverRpcCheckInterval  = 10 * time.Second
    47  	serverRpcCheckTimeout   = 3 * time.Second
    48  	serverSerfCheckInterval = 10 * time.Second
    49  	serverSerfCheckTimeout  = 3 * time.Second
    50  
    51  	// roles used in identifying Consul entries for Nomad agents
    52  	consulRoleServer = "server"
    53  	consulRoleClient = "client"
    54  
    55  	// DefaultRaftMultiplier is used as a baseline Raft configuration that
    56  	// will be reliable on a very basic server.
    57  	DefaultRaftMultiplier = 1
    58  
    59  	// MaxRaftMultiplier is a fairly arbitrary upper bound that limits the
    60  	// amount of performance detuning that's possible.
    61  	MaxRaftMultiplier = 10
    62  )
    63  
    64  // Agent is a long running daemon that is used to run both
    65  // clients and servers. Servers are responsible for managing
    66  // state and making scheduling decisions. Clients can be
    67  // scheduled to, and are responsible for interfacing with
    68  // servers to run allocations.
    69  type Agent struct {
    70  	config     *Config
    71  	configLock sync.Mutex
    72  
    73  	logger     log.InterceptLogger
    74  	auditor    event.Auditor
    75  	httpLogger log.Logger
    76  	logOutput  io.Writer
    77  
    78  	// EnterpriseAgent holds information and methods for enterprise functionality
    79  	EnterpriseAgent *EnterpriseAgent
    80  
    81  	// consulService is Nomad's custom Consul client for managing services
    82  	// and checks.
    83  	consulService *consul.ServiceClient
    84  
    85  	// consulProxies is the subset of Consul's Agent API Nomad uses.
    86  	consulProxies *consul.ConnectProxies
    87  
    88  	// consulCatalog is the subset of Consul's Catalog API Nomad uses.
    89  	consulCatalog consul.CatalogAPI
    90  
    91  	// consulConfigEntries is the subset of Consul's Configuration Entries API Nomad uses.
    92  	consulConfigEntries consul.ConfigAPI
    93  
    94  	// consulACLs is Nomad's subset of Consul's ACL API Nomad uses.
    95  	consulACLs consul.ACLsAPI
    96  
    97  	// client is the launched Nomad Client. Can be nil if the agent isn't
    98  	// configured to run a client.
    99  	client *client.Client
   100  
   101  	// server is the launched Nomad Server. Can be nil if the agent isn't
   102  	// configured to run a server.
   103  	server *nomad.Server
   104  
   105  	// pluginLoader is used to load plugins
   106  	pluginLoader loader.PluginCatalog
   107  
   108  	// pluginSingletonLoader is a plugin loader that will returns singleton
   109  	// instances of the plugins.
   110  	pluginSingletonLoader loader.PluginCatalog
   111  
   112  	shutdown     bool
   113  	shutdownCh   chan struct{}
   114  	shutdownLock sync.Mutex
   115  
   116  	// builtinDialer dials the builtinListener. It is used for connecting
   117  	// consul-template to the HTTP API in process. In the event this agent is
   118  	// not running in client mode, these two fields will be nil.
   119  	builtinListener net.Listener
   120  	builtinDialer   *bufconndialer.BufConnWrapper
   121  
   122  	// taskAPIServer is an HTTP server for attaching per-task listeners. Always
   123  	// requires auth.
   124  	taskAPIServer *builtinAPI
   125  
   126  	inmemSink *metrics.InmemSink
   127  }
   128  
   129  // NewAgent is used to create a new agent with the given configuration
   130  func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) {
   131  	a := &Agent{
   132  		config:     config,
   133  		logOutput:  logOutput,
   134  		shutdownCh: make(chan struct{}),
   135  		inmemSink:  inmem,
   136  	}
   137  
   138  	// Create the loggers
   139  	a.logger = logger
   140  	a.httpLogger = a.logger.ResetNamed("http")
   141  
   142  	// Global logger should match internal logger as much as possible
   143  	golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds)
   144  
   145  	if err := a.setupConsul(config.Consul); err != nil {
   146  		return nil, fmt.Errorf("Failed to initialize Consul client: %v", err)
   147  	}
   148  
   149  	if err := a.setupServer(); err != nil {
   150  		return nil, err
   151  	}
   152  	if err := a.setupClient(); err != nil {
   153  		return nil, err
   154  	}
   155  
   156  	if err := a.setupEnterpriseAgent(logger); err != nil {
   157  		return nil, err
   158  	}
   159  	if a.client == nil && a.server == nil {
   160  		return nil, fmt.Errorf("must have at least client or server mode enabled")
   161  	}
   162  
   163  	return a, nil
   164  }
   165  
   166  // convertServerConfig takes an agent config and log output and returns a Nomad
   167  // Config. There may be missing fields that must be set by the agent. To do this
   168  // call finalizeServerConfig.
   169  func convertServerConfig(agentConfig *Config) (*nomad.Config, error) {
   170  	conf := agentConfig.NomadConfig
   171  	if conf == nil {
   172  		conf = nomad.DefaultConfig()
   173  	}
   174  	conf.DevMode = agentConfig.DevMode
   175  	conf.EnableDebug = agentConfig.EnableDebug
   176  
   177  	conf.Build = agentConfig.Version.VersionNumber()
   178  	conf.Revision = agentConfig.Version.Revision
   179  	if agentConfig.Region != "" {
   180  		conf.Region = agentConfig.Region
   181  	}
   182  
   183  	// Set the Authoritative Region if set, otherwise default to
   184  	// the same as the local region.
   185  	if agentConfig.Server.AuthoritativeRegion != "" {
   186  		conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion
   187  	} else if agentConfig.Region != "" {
   188  		conf.AuthoritativeRegion = agentConfig.Region
   189  	}
   190  
   191  	if agentConfig.Datacenter != "" {
   192  		conf.Datacenter = agentConfig.Datacenter
   193  	}
   194  	if agentConfig.NodeName != "" {
   195  		conf.NodeName = agentConfig.NodeName
   196  	}
   197  	if agentConfig.Server.BootstrapExpect > 0 {
   198  		conf.BootstrapExpect = agentConfig.Server.BootstrapExpect
   199  	}
   200  	if agentConfig.DataDir != "" {
   201  		conf.DataDir = filepath.Join(agentConfig.DataDir, "server")
   202  	}
   203  	if agentConfig.Server.DataDir != "" {
   204  		conf.DataDir = agentConfig.Server.DataDir
   205  	}
   206  	if agentConfig.Server.RaftProtocol != 0 {
   207  		conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol)
   208  	}
   209  	if v := conf.RaftConfig.ProtocolVersion; v != 3 {
   210  		return nil, fmt.Errorf("raft_protocol must be 3 in Nomad v1.4 and later, got %d", v)
   211  	}
   212  	raftMultiplier := int(DefaultRaftMultiplier)
   213  	if agentConfig.Server.RaftMultiplier != nil && *agentConfig.Server.RaftMultiplier != 0 {
   214  		raftMultiplier = *agentConfig.Server.RaftMultiplier
   215  		if raftMultiplier < 1 || raftMultiplier > MaxRaftMultiplier {
   216  			return nil, fmt.Errorf("raft_multiplier cannot be %d. Must be between 1 and %d", *agentConfig.Server.RaftMultiplier, MaxRaftMultiplier)
   217  		}
   218  	}
   219  
   220  	if vPtr := agentConfig.Server.RaftTrailingLogs; vPtr != nil {
   221  		if *vPtr < 1 {
   222  			return nil, fmt.Errorf("raft_trailing_logs must be non-negative, got %d", *vPtr)
   223  		}
   224  		conf.RaftConfig.TrailingLogs = uint64(*vPtr)
   225  	}
   226  
   227  	if vPtr := agentConfig.Server.RaftSnapshotInterval; vPtr != nil {
   228  		dur, err := time.ParseDuration(*vPtr)
   229  		if err != nil {
   230  			return nil, err
   231  		}
   232  		if dur < 5*time.Millisecond {
   233  			return nil, fmt.Errorf("raft_snapshot_interval must be greater than 5ms, got %q", *vPtr)
   234  		}
   235  		conf.RaftConfig.SnapshotInterval = dur
   236  	}
   237  
   238  	if vPtr := agentConfig.Server.RaftSnapshotThreshold; vPtr != nil {
   239  		if *vPtr < 1 {
   240  			return nil, fmt.Errorf("raft_snapshot_threshold must be non-negative, got %d", *vPtr)
   241  		}
   242  		conf.RaftConfig.SnapshotThreshold = uint64(*vPtr)
   243  	}
   244  
   245  	conf.RaftConfig.ElectionTimeout *= time.Duration(raftMultiplier)
   246  	conf.RaftConfig.HeartbeatTimeout *= time.Duration(raftMultiplier)
   247  	conf.RaftConfig.LeaderLeaseTimeout *= time.Duration(raftMultiplier)
   248  	conf.RaftConfig.CommitTimeout *= time.Duration(raftMultiplier)
   249  
   250  	if agentConfig.Server.NumSchedulers != nil {
   251  		conf.NumSchedulers = *agentConfig.Server.NumSchedulers
   252  	}
   253  	if len(agentConfig.Server.EnabledSchedulers) != 0 {
   254  		// Convert to a set and require the core scheduler
   255  		set := make(map[string]struct{}, 4)
   256  		set[structs.JobTypeCore] = struct{}{}
   257  		for _, sched := range agentConfig.Server.EnabledSchedulers {
   258  			set[sched] = struct{}{}
   259  		}
   260  
   261  		schedulers := make([]string, 0, len(set))
   262  		for k := range set {
   263  			schedulers = append(schedulers, k)
   264  		}
   265  
   266  		conf.EnabledSchedulers = schedulers
   267  
   268  	}
   269  	if agentConfig.ACL.Enabled {
   270  		conf.ACLEnabled = true
   271  	}
   272  	if agentConfig.ACL.ReplicationToken != "" {
   273  		conf.ReplicationToken = agentConfig.ACL.ReplicationToken
   274  	}
   275  	if agentConfig.ACL.TokenMinExpirationTTL != 0 {
   276  		conf.ACLTokenMinExpirationTTL = agentConfig.ACL.TokenMinExpirationTTL
   277  	}
   278  	if agentConfig.ACL.TokenMaxExpirationTTL != 0 {
   279  		conf.ACLTokenMaxExpirationTTL = agentConfig.ACL.TokenMaxExpirationTTL
   280  	}
   281  	if agentConfig.Sentinel != nil {
   282  		conf.SentinelConfig = agentConfig.Sentinel
   283  	}
   284  	if agentConfig.Server.NonVotingServer {
   285  		conf.NonVoter = true
   286  	}
   287  	if agentConfig.Server.RedundancyZone != "" {
   288  		conf.RedundancyZone = agentConfig.Server.RedundancyZone
   289  	}
   290  	if agentConfig.Server.UpgradeVersion != "" {
   291  		conf.UpgradeVersion = agentConfig.Server.UpgradeVersion
   292  	}
   293  	if agentConfig.Server.EnableEventBroker != nil {
   294  		conf.EnableEventBroker = *agentConfig.Server.EnableEventBroker
   295  	}
   296  	if agentConfig.Server.EventBufferSize != nil {
   297  		if *agentConfig.Server.EventBufferSize < 0 {
   298  			return nil, fmt.Errorf("Invalid Config, event_buffer_size must be non-negative")
   299  		}
   300  		conf.EventBufferSize = int64(*agentConfig.Server.EventBufferSize)
   301  	}
   302  	if agentConfig.Autopilot != nil {
   303  		if agentConfig.Autopilot.CleanupDeadServers != nil {
   304  			conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers
   305  		}
   306  		if agentConfig.Autopilot.ServerStabilizationTime != 0 {
   307  			conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime
   308  		}
   309  		if agentConfig.Autopilot.LastContactThreshold != 0 {
   310  			conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold
   311  		}
   312  		if agentConfig.Autopilot.MaxTrailingLogs != 0 {
   313  			conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs)
   314  		}
   315  		if agentConfig.Autopilot.MinQuorum != 0 {
   316  			conf.AutopilotConfig.MinQuorum = uint(agentConfig.Autopilot.MinQuorum)
   317  		}
   318  		if agentConfig.Autopilot.EnableRedundancyZones != nil {
   319  			conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones
   320  		}
   321  		if agentConfig.Autopilot.DisableUpgradeMigration != nil {
   322  			conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration
   323  		}
   324  		if agentConfig.Autopilot.EnableCustomUpgrades != nil {
   325  			conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades
   326  		}
   327  	}
   328  
   329  	jobMaxPriority := structs.JobDefaultMaxPriority
   330  	if agentConfig.Server.JobMaxPriority != nil && *agentConfig.Server.JobMaxPriority != 0 {
   331  		jobMaxPriority = *agentConfig.Server.JobMaxPriority
   332  		if jobMaxPriority < structs.JobDefaultMaxPriority || jobMaxPriority > structs.JobMaxPriority {
   333  			return nil, fmt.Errorf("job_max_priority cannot be %d. Must be between %d and %d", *agentConfig.Server.JobMaxPriority, structs.JobDefaultMaxPriority, structs.JobMaxPriority)
   334  		}
   335  	}
   336  	jobDefaultPriority := structs.JobDefaultPriority
   337  	if agentConfig.Server.JobDefaultPriority != nil && *agentConfig.Server.JobDefaultPriority != 0 {
   338  		jobDefaultPriority = *agentConfig.Server.JobDefaultPriority
   339  		if jobDefaultPriority < structs.JobDefaultPriority || jobDefaultPriority >= jobMaxPriority {
   340  			return nil, fmt.Errorf("job_default_priority cannot be %d. Must be between %d and %d", *agentConfig.Server.JobDefaultPriority, structs.JobDefaultPriority, jobMaxPriority)
   341  		}
   342  	}
   343  	conf.JobMaxPriority = jobMaxPriority
   344  	conf.JobDefaultPriority = jobDefaultPriority
   345  
   346  	// Set up the bind addresses
   347  	rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
   348  	if err != nil {
   349  		return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
   350  	}
   351  	serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf)
   352  	if err != nil {
   353  		return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err)
   354  	}
   355  	conf.RPCAddr.Port = rpcAddr.Port
   356  	conf.RPCAddr.IP = rpcAddr.IP
   357  	conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port
   358  	conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String()
   359  	conf.SerfConfig.RejoinAfterLeave = agentConfig.Server.RejoinAfterLeave
   360  
   361  	// Set up the advertise addresses
   362  	rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC)
   363  	if err != nil {
   364  		return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err)
   365  	}
   366  	serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf)
   367  	if err != nil {
   368  		return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   369  	}
   370  
   371  	// Server address is the serf advertise address and rpc port. This is the
   372  	// address that all servers should be able to communicate over RPC with.
   373  	serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port)))
   374  	if err != nil {
   375  		return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err)
   376  	}
   377  
   378  	conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String()
   379  	conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port
   380  	conf.ClientRPCAdvertise = rpcAddr
   381  	conf.ServerRPCAdvertise = serverAddr
   382  
   383  	// Set up gc threshold and heartbeat grace period
   384  	if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" {
   385  		dur, err := time.ParseDuration(gcThreshold)
   386  		if err != nil {
   387  			return nil, err
   388  		}
   389  		conf.NodeGCThreshold = dur
   390  	}
   391  	if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" {
   392  		dur, err := time.ParseDuration(gcInterval)
   393  		if err != nil {
   394  			return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err)
   395  		} else if dur <= time.Duration(0) {
   396  			return nil, fmt.Errorf("job_gc_interval should be greater than 0s")
   397  		}
   398  		conf.JobGCInterval = dur
   399  	}
   400  	if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" {
   401  		dur, err := time.ParseDuration(gcThreshold)
   402  		if err != nil {
   403  			return nil, err
   404  		}
   405  		conf.JobGCThreshold = dur
   406  	}
   407  	if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" {
   408  		dur, err := time.ParseDuration(gcThreshold)
   409  		if err != nil {
   410  			return nil, err
   411  		}
   412  		conf.EvalGCThreshold = dur
   413  	}
   414  	if gcThreshold := agentConfig.Server.BatchEvalGCThreshold; gcThreshold != "" {
   415  		dur, err := time.ParseDuration(gcThreshold)
   416  		if err != nil {
   417  			return nil, err
   418  		}
   419  		conf.BatchEvalGCThreshold = dur
   420  	}
   421  	if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" {
   422  		dur, err := time.ParseDuration(gcThreshold)
   423  		if err != nil {
   424  			return nil, err
   425  		}
   426  		conf.DeploymentGCThreshold = dur
   427  	}
   428  	if gcInterval := agentConfig.Server.CSIVolumeClaimGCInterval; gcInterval != "" {
   429  		dur, err := time.ParseDuration(gcInterval)
   430  		if err != nil {
   431  			return nil, err
   432  		} else if dur <= time.Duration(0) {
   433  			return nil, fmt.Errorf("csi_volume_claim_gc_interval should be greater than 0s")
   434  		}
   435  		conf.CSIVolumeClaimGCInterval = dur
   436  	}
   437  	if gcThreshold := agentConfig.Server.CSIVolumeClaimGCThreshold; gcThreshold != "" {
   438  		dur, err := time.ParseDuration(gcThreshold)
   439  		if err != nil {
   440  			return nil, err
   441  		}
   442  		conf.CSIVolumeClaimGCThreshold = dur
   443  	}
   444  	if gcThreshold := agentConfig.Server.CSIPluginGCThreshold; gcThreshold != "" {
   445  		dur, err := time.ParseDuration(gcThreshold)
   446  		if err != nil {
   447  			return nil, err
   448  		}
   449  		conf.CSIPluginGCThreshold = dur
   450  	}
   451  	if gcThreshold := agentConfig.Server.ACLTokenGCThreshold; gcThreshold != "" {
   452  		dur, err := time.ParseDuration(gcThreshold)
   453  		if err != nil {
   454  			return nil, err
   455  		}
   456  		conf.ACLTokenExpirationGCThreshold = dur
   457  	}
   458  	if gcThreshold := agentConfig.Server.RootKeyGCThreshold; gcThreshold != "" {
   459  		dur, err := time.ParseDuration(gcThreshold)
   460  		if err != nil {
   461  			return nil, err
   462  		}
   463  		conf.RootKeyGCThreshold = dur
   464  	}
   465  	if gcInterval := agentConfig.Server.RootKeyGCInterval; gcInterval != "" {
   466  		dur, err := time.ParseDuration(gcInterval)
   467  		if err != nil {
   468  			return nil, err
   469  		}
   470  		conf.RootKeyGCInterval = dur
   471  	}
   472  	if rotationThreshold := agentConfig.Server.RootKeyRotationThreshold; rotationThreshold != "" {
   473  		dur, err := time.ParseDuration(rotationThreshold)
   474  		if err != nil {
   475  			return nil, err
   476  		}
   477  		conf.RootKeyRotationThreshold = dur
   478  	}
   479  
   480  	if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 {
   481  		conf.HeartbeatGrace = heartbeatGrace
   482  	}
   483  	if min := agentConfig.Server.MinHeartbeatTTL; min != 0 {
   484  		conf.MinHeartbeatTTL = min
   485  	}
   486  	if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 {
   487  		conf.MaxHeartbeatsPerSecond = maxHPS
   488  	}
   489  	if failoverTTL := agentConfig.Server.FailoverHeartbeatTTL; failoverTTL != 0 {
   490  		conf.FailoverHeartbeatTTL = failoverTTL
   491  	}
   492  
   493  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" {
   494  		return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled")
   495  	}
   496  
   497  	// handle system scheduler preemption default
   498  	if agentConfig.Server.DefaultSchedulerConfig != nil {
   499  		conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig
   500  	}
   501  
   502  	// Add the Consul and Vault configs
   503  	conf.ConsulConfig = agentConfig.Consul
   504  	conf.VaultConfig = agentConfig.Vault
   505  
   506  	// Set the TLS config
   507  	conf.TLSConfig = agentConfig.TLSConfig
   508  
   509  	// Setup telemetry related config
   510  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   511  	conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics
   512  	conf.DisableRPCRateMetricsLabels = agentConfig.Telemetry.DisableRPCRateMetricsLabels
   513  
   514  	if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil {
   515  		return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err)
   516  	} else if d < 0 {
   517  		return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0")
   518  	} else {
   519  		conf.RPCHandshakeTimeout = d
   520  	}
   521  
   522  	// Set max rpc conns; nil/0 == unlimited
   523  	// Leave a little room for streaming RPCs
   524  	minLimit := config.LimitsNonStreamingConnsPerClient + 5
   525  	if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 {
   526  		conf.RPCMaxConnsPerClient = 0
   527  	} else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit {
   528  		return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit)
   529  	} else {
   530  		conf.RPCMaxConnsPerClient = limit
   531  	}
   532  
   533  	// Set deployment rate limit
   534  	if rate := agentConfig.Server.DeploymentQueryRateLimit; rate == 0 {
   535  		conf.DeploymentQueryRateLimit = deploymentwatcher.LimitStateQueriesPerSecond
   536  	} else if rate > 0 {
   537  		conf.DeploymentQueryRateLimit = rate
   538  	} else {
   539  		return nil, fmt.Errorf("deploy_query_rate_limit must be greater than 0")
   540  	}
   541  
   542  	// Set plan rejection tracker configuration.
   543  	if planRejectConf := agentConfig.Server.PlanRejectionTracker; planRejectConf != nil {
   544  		if planRejectConf.Enabled != nil {
   545  			conf.NodePlanRejectionEnabled = *planRejectConf.Enabled
   546  		}
   547  		conf.NodePlanRejectionThreshold = planRejectConf.NodeThreshold
   548  
   549  		if planRejectConf.NodeWindow == 0 {
   550  			return nil, fmt.Errorf("plan_rejection_tracker.node_window must be greater than 0")
   551  		} else {
   552  			conf.NodePlanRejectionWindow = planRejectConf.NodeWindow
   553  		}
   554  	}
   555  
   556  	// Add Enterprise license configs
   557  	conf.LicenseConfig = &nomad.LicenseConfig{
   558  		BuildDate:         agentConfig.Version.BuildDate,
   559  		AdditionalPubKeys: agentConfig.Server.licenseAdditionalPublicKeys,
   560  		LicenseEnvBytes:   agentConfig.Server.LicenseEnv,
   561  		LicensePath:       agentConfig.Server.LicensePath,
   562  	}
   563  
   564  	// Add the search configuration
   565  	if search := agentConfig.Server.Search; search != nil {
   566  		conf.SearchConfig = &structs.SearchConfig{
   567  			FuzzyEnabled:  search.FuzzyEnabled,
   568  			LimitQuery:    search.LimitQuery,
   569  			LimitResults:  search.LimitResults,
   570  			MinTermLength: search.MinTermLength,
   571  		}
   572  	}
   573  
   574  	// Set the raft bolt parameters
   575  	if bolt := agentConfig.Server.RaftBoltConfig; bolt != nil {
   576  		conf.RaftBoltNoFreelistSync = bolt.NoFreelistSync
   577  	}
   578  
   579  	// Interpret job_max_source_size as bytes from string value
   580  	if agentConfig.Server.JobMaxSourceSize == nil {
   581  		agentConfig.Server.JobMaxSourceSize = pointer.Of("1M")
   582  	}
   583  	jobMaxSourceBytes, err := humanize.ParseBytes(*agentConfig.Server.JobMaxSourceSize)
   584  	if err != nil {
   585  		return nil, fmt.Errorf("failed to parse max job source bytes: %w", err)
   586  	}
   587  	conf.JobMaxSourceSize = int(jobMaxSourceBytes)
   588  
   589  	return conf, nil
   590  }
   591  
   592  // serverConfig is used to generate a new server configuration struct
   593  // for initializing a nomad server.
   594  func (a *Agent) serverConfig() (*nomad.Config, error) {
   595  	c, err := convertServerConfig(a.config)
   596  	if err != nil {
   597  		return nil, err
   598  	}
   599  
   600  	a.finalizeServerConfig(c)
   601  	return c, nil
   602  }
   603  
   604  // finalizeServerConfig sets configuration fields on the server config that are
   605  // not statically convertible and are from the agent.
   606  func (a *Agent) finalizeServerConfig(c *nomad.Config) {
   607  	// Setup the logging
   608  	c.Logger = a.logger
   609  	c.LogOutput = a.logOutput
   610  	c.AgentShutdown = func() error { return a.Shutdown() }
   611  }
   612  
   613  // clientConfig is used to generate a new client configuration struct for
   614  // initializing a Nomad client.
   615  func (a *Agent) clientConfig() (*clientconfig.Config, error) {
   616  	c, err := convertClientConfig(a.config)
   617  	if err != nil {
   618  		return nil, err
   619  	}
   620  
   621  	if err = a.finalizeClientConfig(c); err != nil {
   622  		return nil, err
   623  	}
   624  
   625  	return c, nil
   626  }
   627  
   628  // finalizeClientConfig sets configuration fields on the client config that are
   629  // not statically convertible and are from the agent.
   630  func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error {
   631  	// Setup the logging
   632  	c.Logger = a.logger
   633  
   634  	// If we are running a server, append both its bind and advertise address so
   635  	// we are able to at least talk to the local server even if that isn't
   636  	// configured explicitly. This handles both running server and client on one
   637  	// host and -dev mode.
   638  	if a.server != nil {
   639  		advertised := a.config.AdvertiseAddrs
   640  		normalized := a.config.normalizedAddrs
   641  
   642  		if advertised == nil || advertised.RPC == "" {
   643  			return fmt.Errorf("AdvertiseAddrs is nil or empty")
   644  		} else if normalized == nil || normalized.RPC == "" {
   645  			return fmt.Errorf("normalizedAddrs is nil or empty")
   646  		}
   647  
   648  		if normalized.RPC == advertised.RPC {
   649  			c.Servers = append(c.Servers, normalized.RPC)
   650  		} else {
   651  			c.Servers = append(c.Servers, normalized.RPC, advertised.RPC)
   652  		}
   653  	}
   654  
   655  	// Setup the plugin loaders
   656  	c.PluginLoader = a.pluginLoader
   657  	c.PluginSingletonLoader = a.pluginSingletonLoader
   658  
   659  	// Log deprecation messages about Consul related configuration in client
   660  	// options
   661  	var invalidConsulKeys []string
   662  	for key := range c.Options {
   663  		if strings.HasPrefix(key, "consul") {
   664  			invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key))
   665  		}
   666  	}
   667  	if len(invalidConsulKeys) > 0 {
   668  		a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ","))
   669  		a.logger.Warn(`Nomad client ignores consul related configuration in client options.
   670  		Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html
   671  		to configure Nomad to work with Consul.`)
   672  	}
   673  
   674  	return nil
   675  }
   676  
   677  // convertClientConfig takes an agent config and log output and returns a client
   678  // Config. There may be missing fields that must be set by the agent. To do this
   679  // call finalizeServerConfig
   680  func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
   681  	// Set up the configuration
   682  	conf := agentConfig.ClientConfig
   683  	if conf == nil {
   684  		conf = clientconfig.DefaultConfig()
   685  	}
   686  
   687  	conf.Servers = agentConfig.Client.Servers
   688  	conf.DevMode = agentConfig.DevMode
   689  	conf.EnableDebug = agentConfig.EnableDebug
   690  
   691  	if agentConfig.Region != "" {
   692  		conf.Region = agentConfig.Region
   693  	}
   694  	if agentConfig.DataDir != "" {
   695  		conf.StateDir = filepath.Join(agentConfig.DataDir, "client")
   696  		conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc")
   697  	}
   698  	if agentConfig.Client.StateDir != "" {
   699  		conf.StateDir = agentConfig.Client.StateDir
   700  	}
   701  	if agentConfig.Client.AllocDir != "" {
   702  		conf.AllocDir = agentConfig.Client.AllocDir
   703  	}
   704  	if agentConfig.Client.NetworkInterface != "" {
   705  		conf.NetworkInterface = agentConfig.Client.NetworkInterface
   706  	}
   707  	conf.ChrootEnv = agentConfig.Client.ChrootEnv
   708  	conf.Options = agentConfig.Client.Options
   709  	if agentConfig.Client.NetworkSpeed != 0 {
   710  		conf.NetworkSpeed = agentConfig.Client.NetworkSpeed
   711  	}
   712  	if agentConfig.Client.CpuCompute != 0 {
   713  		conf.CpuCompute = agentConfig.Client.CpuCompute
   714  	}
   715  	if agentConfig.Client.MemoryMB != 0 {
   716  		conf.MemoryMB = agentConfig.Client.MemoryMB
   717  	}
   718  	if agentConfig.Client.DiskTotalMB != 0 {
   719  		conf.DiskTotalMB = agentConfig.Client.DiskTotalMB
   720  	}
   721  	if agentConfig.Client.DiskFreeMB != 0 {
   722  		conf.DiskFreeMB = agentConfig.Client.DiskFreeMB
   723  	}
   724  	if agentConfig.Client.MaxKillTimeout != "" {
   725  		dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout)
   726  		if err != nil {
   727  			return nil, fmt.Errorf("Error parsing max kill timeout: %s", err)
   728  		}
   729  		conf.MaxKillTimeout = dur
   730  	}
   731  	conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort)
   732  	conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort)
   733  	conf.MaxDynamicPort = agentConfig.Client.MaxDynamicPort
   734  	conf.MinDynamicPort = agentConfig.Client.MinDynamicPort
   735  	conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec
   736  
   737  	if agentConfig.Client.TemplateConfig != nil {
   738  		conf.TemplateConfig = agentConfig.Client.TemplateConfig.Copy()
   739  	}
   740  
   741  	hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes))
   742  	for _, v := range agentConfig.Client.HostVolumes {
   743  		hvMap[v.Name] = v
   744  	}
   745  	conf.HostVolumes = hvMap
   746  
   747  	// Setup the node
   748  	conf.Node = new(structs.Node)
   749  	conf.Node.Datacenter = agentConfig.Datacenter
   750  	conf.Node.Name = agentConfig.NodeName
   751  	conf.Node.Meta = agentConfig.Client.Meta
   752  	conf.Node.NodeClass = agentConfig.Client.NodeClass
   753  	conf.Node.NodePool = agentConfig.Client.NodePool
   754  
   755  	// Set up the HTTP advertise address
   756  	conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP
   757  
   758  	// Canonicalize Node struct
   759  	conf.Node.Canonicalize()
   760  
   761  	// Reserve resources on the node.
   762  	// COMPAT(0.10): Remove in 0.10
   763  	r := conf.Node.Reserved
   764  	if r == nil {
   765  		r = new(structs.Resources)
   766  		conf.Node.Reserved = r
   767  	}
   768  	r.CPU = agentConfig.Client.Reserved.CPU
   769  	r.MemoryMB = agentConfig.Client.Reserved.MemoryMB
   770  	r.DiskMB = agentConfig.Client.Reserved.DiskMB
   771  
   772  	res := conf.Node.ReservedResources
   773  	if res == nil {
   774  		res = new(structs.NodeReservedResources)
   775  		conf.Node.ReservedResources = res
   776  	}
   777  	res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU)
   778  	res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB)
   779  	res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB)
   780  	res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts
   781  	if agentConfig.Client.Reserved.Cores != "" {
   782  		cores, err := cpuset.Parse(agentConfig.Client.Reserved.Cores)
   783  		if err != nil {
   784  			return nil, fmt.Errorf("failed to parse client > reserved > cores value %q: %v", agentConfig.Client.Reserved.Cores, err)
   785  		}
   786  		res.Cpu.ReservedCpuCores = cores.ToSlice()
   787  	}
   788  
   789  	conf.Version = agentConfig.Version
   790  
   791  	if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" {
   792  		return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled")
   793  	}
   794  
   795  	conf.ConsulConfig = agentConfig.Consul
   796  	conf.VaultConfig = agentConfig.Vault
   797  
   798  	// Set up Telemetry configuration
   799  	conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval
   800  	conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics
   801  	conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics
   802  
   803  	// Set the TLS related configs
   804  	conf.TLSConfig = agentConfig.TLSConfig
   805  	conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP
   806  
   807  	// Set the GC related configs
   808  	conf.GCInterval = agentConfig.Client.GCInterval
   809  	conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys
   810  	conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold
   811  	conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold
   812  	conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs
   813  	if agentConfig.Client.NoHostUUID != nil {
   814  		conf.NoHostUUID = *agentConfig.Client.NoHostUUID
   815  	} else {
   816  		// Default no_host_uuid to true
   817  		conf.NoHostUUID = true
   818  	}
   819  
   820  	// Setup the ACLs
   821  	conf.ACLEnabled = agentConfig.ACL.Enabled
   822  	conf.ACLTokenTTL = agentConfig.ACL.TokenTTL
   823  	conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL
   824  	conf.ACLRoleTTL = agentConfig.ACL.RoleTTL
   825  
   826  	// Setup networking configuration
   827  	conf.CNIPath = agentConfig.Client.CNIPath
   828  	conf.CNIConfigDir = agentConfig.Client.CNIConfigDir
   829  	conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName
   830  	conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet
   831  	conf.BridgeNetworkHairpinMode = agentConfig.Client.BridgeNetworkHairpinMode
   832  
   833  	for _, hn := range agentConfig.Client.HostNetworks {
   834  		conf.HostNetworks[hn.Name] = hn
   835  	}
   836  	conf.BindWildcardDefaultHostNetwork = agentConfig.Client.BindWildcardDefaultHostNetwork
   837  
   838  	conf.CgroupParent = cgutil.GetCgroupParent(agentConfig.Client.CgroupParent)
   839  	if agentConfig.Client.ReserveableCores != "" {
   840  		cores, err := cpuset.Parse(agentConfig.Client.ReserveableCores)
   841  		if err != nil {
   842  			return nil, fmt.Errorf("failed to parse 'reservable_cores': %v", err)
   843  		}
   844  		conf.ReservableCores = cores.ToSlice()
   845  	}
   846  
   847  	if agentConfig.Client.NomadServiceDiscovery != nil {
   848  		conf.NomadServiceDiscovery = *agentConfig.Client.NomadServiceDiscovery
   849  	}
   850  
   851  	artifactConfig, err := clientconfig.ArtifactConfigFromAgent(agentConfig.Client.Artifact)
   852  	if err != nil {
   853  		return nil, fmt.Errorf("invalid artifact config: %v", err)
   854  	}
   855  	conf.Artifact = artifactConfig
   856  
   857  	drainConfig, err := clientconfig.DrainConfigFromAgent(agentConfig.Client.Drain)
   858  	if err != nil {
   859  		return nil, fmt.Errorf("invalid drain_on_shutdown config: %v", err)
   860  	}
   861  	conf.Drain = drainConfig
   862  
   863  	return conf, nil
   864  }
   865  
   866  // setupServer is used to setup the server if enabled
   867  func (a *Agent) setupServer() error {
   868  	if !a.config.Server.Enabled {
   869  		return nil
   870  	}
   871  
   872  	// Setup the configuration
   873  	conf, err := a.serverConfig()
   874  	if err != nil {
   875  		return fmt.Errorf("server config setup failed: %s", err)
   876  	}
   877  
   878  	// Generate a node ID and persist it if it is the first instance, otherwise
   879  	// read the persisted node ID.
   880  	if err := a.setupNodeID(conf); err != nil {
   881  		return fmt.Errorf("setting up server node ID failed: %s", err)
   882  	}
   883  
   884  	// Sets up the keyring for gossip encryption
   885  	if err := a.setupKeyrings(conf); err != nil {
   886  		return fmt.Errorf("failed to configure keyring: %v", err)
   887  	}
   888  
   889  	// Create the server
   890  	server, err := nomad.NewServer(conf, a.consulCatalog, a.consulConfigEntries, a.consulACLs)
   891  	if err != nil {
   892  		return fmt.Errorf("server setup failed: %v", err)
   893  	}
   894  	a.server = server
   895  
   896  	// Consul check addresses default to bind but can be toggled to use advertise
   897  	rpcCheckAddr := a.config.normalizedAddrs.RPC
   898  	serfCheckAddr := a.config.normalizedAddrs.Serf
   899  	if *a.config.Consul.ChecksUseAdvertise {
   900  		rpcCheckAddr = a.config.AdvertiseAddrs.RPC
   901  		serfCheckAddr = a.config.AdvertiseAddrs.Serf
   902  	}
   903  
   904  	// Create the Nomad Server services for Consul
   905  	if *a.config.Consul.AutoAdvertise {
   906  		httpServ := &structs.Service{
   907  			Name:      a.config.Consul.ServerServiceName,
   908  			PortLabel: a.config.AdvertiseAddrs.HTTP,
   909  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
   910  		}
   911  		const isServer = true
   912  		if check := a.agentHTTPCheck(isServer); check != nil {
   913  			httpServ.Checks = []*structs.ServiceCheck{check}
   914  		}
   915  		rpcServ := &structs.Service{
   916  			Name:      a.config.Consul.ServerServiceName,
   917  			PortLabel: a.config.AdvertiseAddrs.RPC,
   918  			Tags:      append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...),
   919  			Checks: []*structs.ServiceCheck{
   920  				{
   921  					Name:      a.config.Consul.ServerRPCCheckName,
   922  					Type:      "tcp",
   923  					Interval:  serverRpcCheckInterval,
   924  					Timeout:   serverRpcCheckTimeout,
   925  					PortLabel: rpcCheckAddr,
   926  				},
   927  			},
   928  		}
   929  		serfServ := &structs.Service{
   930  			Name:      a.config.Consul.ServerServiceName,
   931  			PortLabel: a.config.AdvertiseAddrs.Serf,
   932  			Tags:      append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...),
   933  			Checks: []*structs.ServiceCheck{
   934  				{
   935  					Name:      a.config.Consul.ServerSerfCheckName,
   936  					Type:      "tcp",
   937  					Interval:  serverSerfCheckInterval,
   938  					Timeout:   serverSerfCheckTimeout,
   939  					PortLabel: serfCheckAddr,
   940  				},
   941  			},
   942  		}
   943  
   944  		// Add the http port check if TLS isn't enabled
   945  		consulServices := []*structs.Service{
   946  			rpcServ,
   947  			serfServ,
   948  			httpServ,
   949  		}
   950  		if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil {
   951  			return err
   952  		}
   953  	}
   954  
   955  	return nil
   956  }
   957  
   958  // setupNodeID will pull the persisted node ID, if any, or create a random one
   959  // and persist it.
   960  func (a *Agent) setupNodeID(config *nomad.Config) error {
   961  	// For dev mode we have no filesystem access so just make a node ID.
   962  	if a.config.DevMode {
   963  		config.NodeID = uuid.Generate()
   964  		return nil
   965  	}
   966  
   967  	// Load saved state, if any. Since a user could edit this, we also
   968  	// validate it. Saved state overwrites any configured node id
   969  	fileID := filepath.Join(config.DataDir, "node-id")
   970  	if _, err := os.Stat(fileID); err == nil {
   971  		rawID, err := os.ReadFile(fileID)
   972  		if err != nil {
   973  			return err
   974  		}
   975  
   976  		nodeID := strings.TrimSpace(string(rawID))
   977  		nodeID = strings.ToLower(nodeID)
   978  		if _, err := uuidparse.ParseUUID(nodeID); err != nil {
   979  			return err
   980  		}
   981  		config.NodeID = nodeID
   982  		return nil
   983  	}
   984  
   985  	// If they've configured a node ID manually then just use that, as
   986  	// long as it's valid.
   987  	if config.NodeID != "" {
   988  		config.NodeID = strings.ToLower(config.NodeID)
   989  		if _, err := uuidparse.ParseUUID(config.NodeID); err != nil {
   990  			return err
   991  		}
   992  		// Persist this configured nodeID to our data directory
   993  		if err := escapingfs.EnsurePath(fileID, false); err != nil {
   994  			return err
   995  		}
   996  		if err := os.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil {
   997  			return err
   998  		}
   999  		return nil
  1000  	}
  1001  
  1002  	// If we still don't have a valid node ID, make one.
  1003  	if config.NodeID == "" {
  1004  		id := uuid.Generate()
  1005  		if err := escapingfs.EnsurePath(fileID, false); err != nil {
  1006  			return err
  1007  		}
  1008  		if err := os.WriteFile(fileID, []byte(id), 0600); err != nil {
  1009  			return err
  1010  		}
  1011  
  1012  		config.NodeID = id
  1013  	}
  1014  	return nil
  1015  }
  1016  
  1017  // setupKeyrings is used to initialize and load keyrings during agent startup
  1018  func (a *Agent) setupKeyrings(config *nomad.Config) error {
  1019  	file := filepath.Join(a.config.DataDir, serfKeyring)
  1020  
  1021  	if a.config.Server.EncryptKey == "" {
  1022  		goto LOAD
  1023  	}
  1024  	if _, err := os.Stat(file); err != nil {
  1025  		if err := initKeyring(file, a.config.Server.EncryptKey, a.logger); err != nil {
  1026  			return err
  1027  		}
  1028  	}
  1029  
  1030  LOAD:
  1031  	if _, err := os.Stat(file); err == nil {
  1032  		config.SerfConfig.KeyringFile = file
  1033  	}
  1034  	if err := loadKeyringFile(config.SerfConfig); err != nil {
  1035  		return err
  1036  	}
  1037  	// Success!
  1038  	return nil
  1039  }
  1040  
  1041  // setupClient is used to setup the client if enabled
  1042  func (a *Agent) setupClient() error {
  1043  	if !a.config.Client.Enabled {
  1044  		return nil
  1045  	}
  1046  
  1047  	// Plugin setup must happen before the call to clientConfig, because it
  1048  	// copies the pointers to the plugin loaders from the Agent to the
  1049  	// Client config.
  1050  	if err := a.setupPlugins(); err != nil {
  1051  		return err
  1052  	}
  1053  
  1054  	// Setup the configuration
  1055  	conf, err := a.clientConfig()
  1056  	if err != nil {
  1057  		return fmt.Errorf("client setup failed: %v", err)
  1058  	}
  1059  
  1060  	// Reserve some ports for the plugins if we are on Windows
  1061  	if runtime.GOOS == "windows" {
  1062  		if err := a.reservePortsForClient(conf); err != nil {
  1063  			return err
  1064  		}
  1065  	}
  1066  	if conf.StateDBFactory == nil {
  1067  		conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode)
  1068  	}
  1069  
  1070  	// Set up a custom listener and dialer. This is used by Nomad clients when
  1071  	// running consul-template functions that utilize the Nomad API. We lazy
  1072  	// load this into the client config, therefore this needs to happen before
  1073  	// we call NewClient.
  1074  	a.builtinListener, a.builtinDialer = bufconndialer.New()
  1075  	conf.TemplateDialer = a.builtinDialer
  1076  
  1077  	// Initialize builtin Task API server here for use in the client, but it
  1078  	// won't accept connections until the HTTP servers are created.
  1079  	a.taskAPIServer = newBuiltinAPI()
  1080  	conf.APIListenerRegistrar = a.taskAPIServer
  1081  
  1082  	nomadClient, err := client.NewClient(
  1083  		conf, a.consulCatalog, a.consulProxies, a.consulService, nil)
  1084  	if err != nil {
  1085  		return fmt.Errorf("client setup failed: %v", err)
  1086  	}
  1087  	a.client = nomadClient
  1088  
  1089  	// Create the Nomad Client  services for Consul
  1090  	if *a.config.Consul.AutoAdvertise {
  1091  		httpServ := &structs.Service{
  1092  			Name:      a.config.Consul.ClientServiceName,
  1093  			PortLabel: a.config.AdvertiseAddrs.HTTP,
  1094  			Tags:      append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...),
  1095  		}
  1096  		const isServer = false
  1097  		if check := a.agentHTTPCheck(isServer); check != nil {
  1098  			httpServ.Checks = []*structs.ServiceCheck{check}
  1099  		}
  1100  		if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil {
  1101  			return err
  1102  		}
  1103  	}
  1104  
  1105  	return nil
  1106  }
  1107  
  1108  // agentHTTPCheck returns a health check for the agent's HTTP API if possible.
  1109  // If no HTTP health check can be supported nil is returned.
  1110  func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck {
  1111  	// Resolve the http check address
  1112  	httpCheckAddr := a.config.normalizedAddrs.HTTP[0]
  1113  	if *a.config.Consul.ChecksUseAdvertise {
  1114  		httpCheckAddr = a.config.AdvertiseAddrs.HTTP
  1115  	}
  1116  	check := structs.ServiceCheck{
  1117  		Name:      a.config.Consul.ClientHTTPCheckName,
  1118  		Type:      "http",
  1119  		Path:      "/v1/agent/health?type=client",
  1120  		Protocol:  "http",
  1121  		Interval:  agentHttpCheckInterval,
  1122  		Timeout:   agentHttpCheckTimeout,
  1123  		PortLabel: httpCheckAddr,
  1124  	}
  1125  	// Switch to endpoint that doesn't require a leader for servers
  1126  	if server {
  1127  		check.Name = a.config.Consul.ServerHTTPCheckName
  1128  		check.Path = "/v1/agent/health?type=server"
  1129  	}
  1130  	if !a.config.TLSConfig.EnableHTTP {
  1131  		// No HTTPS, return a plain http check
  1132  		return &check
  1133  	}
  1134  	if a.config.TLSConfig.VerifyHTTPSClient {
  1135  		a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled")
  1136  		return nil
  1137  	}
  1138  
  1139  	// HTTPS enabled; skip verification
  1140  	check.Protocol = "https"
  1141  	check.TLSSkipVerify = true
  1142  	return &check
  1143  }
  1144  
  1145  // reservePortsForClient reserves a range of ports for the client to use when
  1146  // it creates various plugins for log collection, executors, drivers, etc
  1147  func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error {
  1148  	if conf.Node.ReservedResources == nil {
  1149  		conf.Node.ReservedResources = &structs.NodeReservedResources{}
  1150  	}
  1151  
  1152  	res := conf.Node.ReservedResources.Networks.ReservedHostPorts
  1153  	if res == "" {
  1154  		res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
  1155  	} else {
  1156  		res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort)
  1157  	}
  1158  	conf.Node.ReservedResources.Networks.ReservedHostPorts = res
  1159  	return nil
  1160  }
  1161  
  1162  // Leave is used gracefully exit. Clients will inform servers
  1163  // of their departure so that allocations can be rescheduled.
  1164  func (a *Agent) Leave() error {
  1165  	if a.client != nil {
  1166  		if err := a.client.Leave(); err != nil {
  1167  			a.logger.Error("client leave failed", "error", err)
  1168  		}
  1169  	}
  1170  	if a.server != nil {
  1171  		if err := a.server.Leave(); err != nil {
  1172  			a.logger.Error("server leave failed", "error", err)
  1173  		}
  1174  	}
  1175  	return nil
  1176  }
  1177  
  1178  // Shutdown is used to terminate the agent.
  1179  func (a *Agent) Shutdown() error {
  1180  	a.shutdownLock.Lock()
  1181  	defer a.shutdownLock.Unlock()
  1182  
  1183  	if a.shutdown {
  1184  		return nil
  1185  	}
  1186  
  1187  	a.logger.Info("requesting shutdown")
  1188  	if a.client != nil {
  1189  		// Task API must be closed separately from other HTTP servers and should
  1190  		// happen before the client is shutdown
  1191  		a.taskAPIServer.Shutdown()
  1192  
  1193  		if err := a.client.Shutdown(); err != nil {
  1194  			a.logger.Error("client shutdown failed", "error", err)
  1195  		}
  1196  	}
  1197  	if a.server != nil {
  1198  		if err := a.server.Shutdown(); err != nil {
  1199  			a.logger.Error("server shutdown failed", "error", err)
  1200  		}
  1201  	}
  1202  
  1203  	if err := a.consulService.Shutdown(); err != nil {
  1204  		a.logger.Error("shutting down Consul client failed", "error", err)
  1205  	}
  1206  
  1207  	a.logger.Info("shutdown complete")
  1208  	a.shutdown = true
  1209  	close(a.shutdownCh)
  1210  	return nil
  1211  }
  1212  
  1213  // RPC is used to make an RPC call to the Nomad servers
  1214  func (a *Agent) RPC(method string, args interface{}, reply interface{}) error {
  1215  	if a.server != nil {
  1216  		return a.server.RPC(method, args, reply)
  1217  	}
  1218  	return a.client.RPC(method, args, reply)
  1219  }
  1220  
  1221  // Client returns the configured client or nil
  1222  func (a *Agent) Client() *client.Client {
  1223  	return a.client
  1224  }
  1225  
  1226  // Server returns the configured server or nil
  1227  func (a *Agent) Server() *nomad.Server {
  1228  	return a.server
  1229  }
  1230  
  1231  // Stats is used to return statistics for debugging and insight
  1232  // for various sub-systems
  1233  func (a *Agent) Stats() map[string]map[string]string {
  1234  	stats := make(map[string]map[string]string)
  1235  	if a.server != nil {
  1236  		subStat := a.server.Stats()
  1237  		for k, v := range subStat {
  1238  			stats[k] = v
  1239  		}
  1240  	}
  1241  	if a.client != nil {
  1242  		subStat := a.client.Stats()
  1243  		for k, v := range subStat {
  1244  			stats[k] = v
  1245  		}
  1246  	}
  1247  	return stats
  1248  }
  1249  
  1250  // ShouldReload determines if we should reload the configuration and agent
  1251  // connections. If the TLS Configuration has not changed, we shouldn't reload.
  1252  func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) {
  1253  	a.configLock.Lock()
  1254  	defer a.configLock.Unlock()
  1255  
  1256  	if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel {
  1257  		agent = true
  1258  	}
  1259  
  1260  	isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig)
  1261  	if err != nil {
  1262  		a.logger.Error("parsing TLS certificate", "error", err)
  1263  		return agent, false
  1264  	} else if !isEqual {
  1265  		return true, true
  1266  	}
  1267  
  1268  	// Allow the ability to only reload HTTP connections
  1269  	if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP {
  1270  		http = true
  1271  		agent = true
  1272  	}
  1273  
  1274  	// Allow the ability to only reload HTTP connections
  1275  	if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC {
  1276  		agent = true
  1277  	}
  1278  
  1279  	if a.config.TLSConfig.RPCUpgradeMode != newConfig.TLSConfig.RPCUpgradeMode {
  1280  		agent = true
  1281  	}
  1282  
  1283  	return agent, http
  1284  }
  1285  
  1286  // Reload handles configuration changes for the agent. Provides a method that
  1287  // is easier to unit test, as this action is invoked via SIGHUP.
  1288  func (a *Agent) Reload(newConfig *Config) error {
  1289  	a.configLock.Lock()
  1290  	defer a.configLock.Unlock()
  1291  
  1292  	current := a.config.Copy()
  1293  
  1294  	updatedLogging := newConfig != nil && (newConfig.LogLevel != current.LogLevel)
  1295  
  1296  	if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging {
  1297  		return fmt.Errorf("cannot reload agent with nil configuration")
  1298  	}
  1299  
  1300  	if updatedLogging {
  1301  		current.LogLevel = newConfig.LogLevel
  1302  		a.logger.SetLevel(log.LevelFromString(current.LogLevel))
  1303  	}
  1304  
  1305  	// Update eventer config
  1306  	if newConfig.Audit != nil {
  1307  		if err := a.entReloadEventer(newConfig.Audit); err != nil {
  1308  			return err
  1309  		}
  1310  	}
  1311  	// Allow auditor to call reopen regardless of config changes
  1312  	// This is primarily for enterprise audit logging to allow the underlying
  1313  	// file to be reopened if necessary
  1314  	if err := a.auditor.Reopen(); err != nil {
  1315  		return err
  1316  	}
  1317  
  1318  	fullUpdateTLSConfig := func() {
  1319  		// Completely reload the agent's TLS configuration (moving from non-TLS to
  1320  		// TLS, or vice versa)
  1321  		// This does not handle errors in loading the new TLS configuration
  1322  		current.TLSConfig = newConfig.TLSConfig.Copy()
  1323  	}
  1324  
  1325  	if !current.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() {
  1326  		// This is just a TLS configuration reload, we don't need to refresh
  1327  		// existing network connections
  1328  
  1329  		// Reload the certificates on the keyloader and on success store the
  1330  		// updated TLS config. It is important to reuse the same keyloader
  1331  		// as this allows us to dynamically reload configurations not only
  1332  		// on the Agent but on the Server and Client too (they are
  1333  		// referencing the same keyloader).
  1334  		keyloader := current.TLSConfig.GetKeyLoader()
  1335  		_, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile)
  1336  		if err != nil {
  1337  			return err
  1338  		}
  1339  
  1340  		current.TLSConfig = newConfig.TLSConfig
  1341  		current.TLSConfig.KeyLoader = keyloader
  1342  		a.config = current
  1343  		return nil
  1344  	} else if newConfig.TLSConfig.IsEmpty() && !current.TLSConfig.IsEmpty() {
  1345  		a.logger.Warn("downgrading agent's existing TLS configuration to plaintext")
  1346  		fullUpdateTLSConfig()
  1347  	} else if !newConfig.TLSConfig.IsEmpty() && current.TLSConfig.IsEmpty() {
  1348  		a.logger.Info("upgrading from plaintext configuration to TLS")
  1349  		fullUpdateTLSConfig()
  1350  	}
  1351  
  1352  	// Set agent config to the updated config
  1353  	a.config = current
  1354  	return nil
  1355  }
  1356  
  1357  // GetConfig returns the current agent configuration. The Config should *not*
  1358  // be mutated directly. First call Config.Copy.
  1359  func (a *Agent) GetConfig() *Config {
  1360  	a.configLock.Lock()
  1361  	defer a.configLock.Unlock()
  1362  
  1363  	return a.config
  1364  }
  1365  
  1366  // GetMetricsSink returns the metrics sink.
  1367  func (a *Agent) GetMetricsSink() *metrics.InmemSink {
  1368  	return a.inmemSink
  1369  }
  1370  
  1371  // setupConsul creates the Consul client and starts its main Run loop.
  1372  func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error {
  1373  	apiConf, err := consulConfig.ApiConfig()
  1374  	if err != nil {
  1375  		return err
  1376  	}
  1377  
  1378  	consulClient, err := consulapi.NewClient(apiConf)
  1379  	if err != nil {
  1380  		return err
  1381  	}
  1382  
  1383  	// Create Consul Catalog client for service discovery.
  1384  	a.consulCatalog = consulClient.Catalog()
  1385  
  1386  	// Create Consul ConfigEntries client for managing Config Entries.
  1387  	a.consulConfigEntries = consulClient.ConfigEntries()
  1388  
  1389  	// Create Consul ACL client for managing tokens.
  1390  	a.consulACLs = consulClient.ACL()
  1391  
  1392  	// Create Consul Service client for service advertisement and checks.
  1393  	isClient := false
  1394  	if a.config.Client != nil && a.config.Client.Enabled {
  1395  		isClient = true
  1396  	}
  1397  	// Create Consul Agent client for looking info about the agent.
  1398  	consulAgentClient := consulClient.Agent()
  1399  	namespacesClient := consul.NewNamespacesClient(consulClient.Namespaces(), consulAgentClient)
  1400  	a.consulService = consul.NewServiceClient(consulAgentClient, namespacesClient, a.logger, isClient)
  1401  	a.consulProxies = consul.NewConnectProxiesClient(consulAgentClient)
  1402  
  1403  	// Run the Consul service client's sync'ing main loop
  1404  	go a.consulService.Run()
  1405  	return nil
  1406  }
  1407  
  1408  // noOpAuditor is a no-op Auditor that fulfills the
  1409  // event.Auditor interface.
  1410  type noOpAuditor struct{}
  1411  
  1412  // Ensure noOpAuditor is an Auditor
  1413  var _ event.Auditor = &noOpAuditor{}
  1414  
  1415  func (e *noOpAuditor) Event(ctx context.Context, eventType string, payload interface{}) error {
  1416  	return nil
  1417  }
  1418  
  1419  func (e *noOpAuditor) Enabled() bool {
  1420  	return false
  1421  }
  1422  
  1423  func (e *noOpAuditor) Reopen() error {
  1424  	return nil
  1425  }
  1426  
  1427  func (e *noOpAuditor) SetEnabled(enabled bool) {}
  1428  
  1429  func (e *noOpAuditor) DeliveryEnforced() bool { return false }