github.com/hernad/nomad@v1.6.112/command/agent/agent.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package agent 5 6 import ( 7 "context" 8 "fmt" 9 "io" 10 golog "log" 11 "net" 12 "os" 13 "path/filepath" 14 "runtime" 15 "strings" 16 "sync" 17 "time" 18 19 metrics "github.com/armon/go-metrics" 20 "github.com/dustin/go-humanize" 21 consulapi "github.com/hashicorp/consul/api" 22 log "github.com/hashicorp/go-hclog" 23 uuidparse "github.com/hashicorp/go-uuid" 24 "github.com/hernad/nomad/client" 25 clientconfig "github.com/hernad/nomad/client/config" 26 "github.com/hernad/nomad/client/lib/cgutil" 27 "github.com/hernad/nomad/client/state" 28 "github.com/hernad/nomad/command/agent/consul" 29 "github.com/hernad/nomad/command/agent/event" 30 "github.com/hernad/nomad/helper/bufconndialer" 31 "github.com/hernad/nomad/helper/escapingfs" 32 "github.com/hernad/nomad/helper/pluginutils/loader" 33 "github.com/hernad/nomad/helper/pointer" 34 "github.com/hernad/nomad/helper/uuid" 35 "github.com/hernad/nomad/lib/cpuset" 36 "github.com/hernad/nomad/nomad" 37 "github.com/hernad/nomad/nomad/deploymentwatcher" 38 "github.com/hernad/nomad/nomad/structs" 39 "github.com/hernad/nomad/nomad/structs/config" 40 "github.com/hashicorp/raft" 41 ) 42 43 const ( 44 agentHttpCheckInterval = 10 * time.Second 45 agentHttpCheckTimeout = 5 * time.Second 46 serverRpcCheckInterval = 10 * time.Second 47 serverRpcCheckTimeout = 3 * time.Second 48 serverSerfCheckInterval = 10 * time.Second 49 serverSerfCheckTimeout = 3 * time.Second 50 51 // roles used in identifying Consul entries for Nomad agents 52 consulRoleServer = "server" 53 consulRoleClient = "client" 54 55 // DefaultRaftMultiplier is used as a baseline Raft configuration that 56 // will be reliable on a very basic server. 57 DefaultRaftMultiplier = 1 58 59 // MaxRaftMultiplier is a fairly arbitrary upper bound that limits the 60 // amount of performance detuning that's possible. 61 MaxRaftMultiplier = 10 62 ) 63 64 // Agent is a long running daemon that is used to run both 65 // clients and servers. Servers are responsible for managing 66 // state and making scheduling decisions. Clients can be 67 // scheduled to, and are responsible for interfacing with 68 // servers to run allocations. 69 type Agent struct { 70 config *Config 71 configLock sync.Mutex 72 73 logger log.InterceptLogger 74 auditor event.Auditor 75 httpLogger log.Logger 76 logOutput io.Writer 77 78 // EnterpriseAgent holds information and methods for enterprise functionality 79 EnterpriseAgent *EnterpriseAgent 80 81 // consulService is Nomad's custom Consul client for managing services 82 // and checks. 83 consulService *consul.ServiceClient 84 85 // consulProxies is the subset of Consul's Agent API Nomad uses. 86 consulProxies *consul.ConnectProxies 87 88 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 89 consulCatalog consul.CatalogAPI 90 91 // consulConfigEntries is the subset of Consul's Configuration Entries API Nomad uses. 92 consulConfigEntries consul.ConfigAPI 93 94 // consulACLs is Nomad's subset of Consul's ACL API Nomad uses. 95 consulACLs consul.ACLsAPI 96 97 // client is the launched Nomad Client. Can be nil if the agent isn't 98 // configured to run a client. 99 client *client.Client 100 101 // server is the launched Nomad Server. Can be nil if the agent isn't 102 // configured to run a server. 103 server *nomad.Server 104 105 // pluginLoader is used to load plugins 106 pluginLoader loader.PluginCatalog 107 108 // pluginSingletonLoader is a plugin loader that will returns singleton 109 // instances of the plugins. 110 pluginSingletonLoader loader.PluginCatalog 111 112 shutdown bool 113 shutdownCh chan struct{} 114 shutdownLock sync.Mutex 115 116 // builtinDialer dials the builtinListener. It is used for connecting 117 // consul-template to the HTTP API in process. In the event this agent is 118 // not running in client mode, these two fields will be nil. 119 builtinListener net.Listener 120 builtinDialer *bufconndialer.BufConnWrapper 121 122 // taskAPIServer is an HTTP server for attaching per-task listeners. Always 123 // requires auth. 124 taskAPIServer *builtinAPI 125 126 inmemSink *metrics.InmemSink 127 } 128 129 // NewAgent is used to create a new agent with the given configuration 130 func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 131 a := &Agent{ 132 config: config, 133 logOutput: logOutput, 134 shutdownCh: make(chan struct{}), 135 inmemSink: inmem, 136 } 137 138 // Create the loggers 139 a.logger = logger 140 a.httpLogger = a.logger.ResetNamed("http") 141 142 // Global logger should match internal logger as much as possible 143 golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds) 144 145 if err := a.setupConsul(config.Consul); err != nil { 146 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 147 } 148 149 if err := a.setupServer(); err != nil { 150 return nil, err 151 } 152 if err := a.setupClient(); err != nil { 153 return nil, err 154 } 155 156 if err := a.setupEnterpriseAgent(logger); err != nil { 157 return nil, err 158 } 159 if a.client == nil && a.server == nil { 160 return nil, fmt.Errorf("must have at least client or server mode enabled") 161 } 162 163 return a, nil 164 } 165 166 // convertServerConfig takes an agent config and log output and returns a Nomad 167 // Config. There may be missing fields that must be set by the agent. To do this 168 // call finalizeServerConfig. 169 func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { 170 conf := agentConfig.NomadConfig 171 if conf == nil { 172 conf = nomad.DefaultConfig() 173 } 174 conf.DevMode = agentConfig.DevMode 175 conf.EnableDebug = agentConfig.EnableDebug 176 177 conf.Build = agentConfig.Version.VersionNumber() 178 conf.Revision = agentConfig.Version.Revision 179 if agentConfig.Region != "" { 180 conf.Region = agentConfig.Region 181 } 182 183 // Set the Authoritative Region if set, otherwise default to 184 // the same as the local region. 185 if agentConfig.Server.AuthoritativeRegion != "" { 186 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 187 } else if agentConfig.Region != "" { 188 conf.AuthoritativeRegion = agentConfig.Region 189 } 190 191 if agentConfig.Datacenter != "" { 192 conf.Datacenter = agentConfig.Datacenter 193 } 194 if agentConfig.NodeName != "" { 195 conf.NodeName = agentConfig.NodeName 196 } 197 if agentConfig.Server.BootstrapExpect > 0 { 198 conf.BootstrapExpect = agentConfig.Server.BootstrapExpect 199 } 200 if agentConfig.DataDir != "" { 201 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 202 } 203 if agentConfig.Server.DataDir != "" { 204 conf.DataDir = agentConfig.Server.DataDir 205 } 206 if agentConfig.Server.RaftProtocol != 0 { 207 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 208 } 209 if v := conf.RaftConfig.ProtocolVersion; v != 3 { 210 return nil, fmt.Errorf("raft_protocol must be 3 in Nomad v1.4 and later, got %d", v) 211 } 212 raftMultiplier := int(DefaultRaftMultiplier) 213 if agentConfig.Server.RaftMultiplier != nil && *agentConfig.Server.RaftMultiplier != 0 { 214 raftMultiplier = *agentConfig.Server.RaftMultiplier 215 if raftMultiplier < 1 || raftMultiplier > MaxRaftMultiplier { 216 return nil, fmt.Errorf("raft_multiplier cannot be %d. Must be between 1 and %d", *agentConfig.Server.RaftMultiplier, MaxRaftMultiplier) 217 } 218 } 219 220 if vPtr := agentConfig.Server.RaftTrailingLogs; vPtr != nil { 221 if *vPtr < 1 { 222 return nil, fmt.Errorf("raft_trailing_logs must be non-negative, got %d", *vPtr) 223 } 224 conf.RaftConfig.TrailingLogs = uint64(*vPtr) 225 } 226 227 if vPtr := agentConfig.Server.RaftSnapshotInterval; vPtr != nil { 228 dur, err := time.ParseDuration(*vPtr) 229 if err != nil { 230 return nil, err 231 } 232 if dur < 5*time.Millisecond { 233 return nil, fmt.Errorf("raft_snapshot_interval must be greater than 5ms, got %q", *vPtr) 234 } 235 conf.RaftConfig.SnapshotInterval = dur 236 } 237 238 if vPtr := agentConfig.Server.RaftSnapshotThreshold; vPtr != nil { 239 if *vPtr < 1 { 240 return nil, fmt.Errorf("raft_snapshot_threshold must be non-negative, got %d", *vPtr) 241 } 242 conf.RaftConfig.SnapshotThreshold = uint64(*vPtr) 243 } 244 245 conf.RaftConfig.ElectionTimeout *= time.Duration(raftMultiplier) 246 conf.RaftConfig.HeartbeatTimeout *= time.Duration(raftMultiplier) 247 conf.RaftConfig.LeaderLeaseTimeout *= time.Duration(raftMultiplier) 248 conf.RaftConfig.CommitTimeout *= time.Duration(raftMultiplier) 249 250 if agentConfig.Server.NumSchedulers != nil { 251 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 252 } 253 if len(agentConfig.Server.EnabledSchedulers) != 0 { 254 // Convert to a set and require the core scheduler 255 set := make(map[string]struct{}, 4) 256 set[structs.JobTypeCore] = struct{}{} 257 for _, sched := range agentConfig.Server.EnabledSchedulers { 258 set[sched] = struct{}{} 259 } 260 261 schedulers := make([]string, 0, len(set)) 262 for k := range set { 263 schedulers = append(schedulers, k) 264 } 265 266 conf.EnabledSchedulers = schedulers 267 268 } 269 if agentConfig.ACL.Enabled { 270 conf.ACLEnabled = true 271 } 272 if agentConfig.ACL.ReplicationToken != "" { 273 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 274 } 275 if agentConfig.ACL.TokenMinExpirationTTL != 0 { 276 conf.ACLTokenMinExpirationTTL = agentConfig.ACL.TokenMinExpirationTTL 277 } 278 if agentConfig.ACL.TokenMaxExpirationTTL != 0 { 279 conf.ACLTokenMaxExpirationTTL = agentConfig.ACL.TokenMaxExpirationTTL 280 } 281 if agentConfig.Sentinel != nil { 282 conf.SentinelConfig = agentConfig.Sentinel 283 } 284 if agentConfig.Server.NonVotingServer { 285 conf.NonVoter = true 286 } 287 if agentConfig.Server.RedundancyZone != "" { 288 conf.RedundancyZone = agentConfig.Server.RedundancyZone 289 } 290 if agentConfig.Server.UpgradeVersion != "" { 291 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 292 } 293 if agentConfig.Server.EnableEventBroker != nil { 294 conf.EnableEventBroker = *agentConfig.Server.EnableEventBroker 295 } 296 if agentConfig.Server.EventBufferSize != nil { 297 if *agentConfig.Server.EventBufferSize < 0 { 298 return nil, fmt.Errorf("Invalid Config, event_buffer_size must be non-negative") 299 } 300 conf.EventBufferSize = int64(*agentConfig.Server.EventBufferSize) 301 } 302 if agentConfig.Autopilot != nil { 303 if agentConfig.Autopilot.CleanupDeadServers != nil { 304 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 305 } 306 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 307 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 308 } 309 if agentConfig.Autopilot.LastContactThreshold != 0 { 310 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 311 } 312 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 313 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 314 } 315 if agentConfig.Autopilot.MinQuorum != 0 { 316 conf.AutopilotConfig.MinQuorum = uint(agentConfig.Autopilot.MinQuorum) 317 } 318 if agentConfig.Autopilot.EnableRedundancyZones != nil { 319 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 320 } 321 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 322 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 323 } 324 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 325 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 326 } 327 } 328 329 jobMaxPriority := structs.JobDefaultMaxPriority 330 if agentConfig.Server.JobMaxPriority != nil && *agentConfig.Server.JobMaxPriority != 0 { 331 jobMaxPriority = *agentConfig.Server.JobMaxPriority 332 if jobMaxPriority < structs.JobDefaultMaxPriority || jobMaxPriority > structs.JobMaxPriority { 333 return nil, fmt.Errorf("job_max_priority cannot be %d. Must be between %d and %d", *agentConfig.Server.JobMaxPriority, structs.JobDefaultMaxPriority, structs.JobMaxPriority) 334 } 335 } 336 jobDefaultPriority := structs.JobDefaultPriority 337 if agentConfig.Server.JobDefaultPriority != nil && *agentConfig.Server.JobDefaultPriority != 0 { 338 jobDefaultPriority = *agentConfig.Server.JobDefaultPriority 339 if jobDefaultPriority < structs.JobDefaultPriority || jobDefaultPriority >= jobMaxPriority { 340 return nil, fmt.Errorf("job_default_priority cannot be %d. Must be between %d and %d", *agentConfig.Server.JobDefaultPriority, structs.JobDefaultPriority, jobMaxPriority) 341 } 342 } 343 conf.JobMaxPriority = jobMaxPriority 344 conf.JobDefaultPriority = jobDefaultPriority 345 346 // Set up the bind addresses 347 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 348 if err != nil { 349 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 350 } 351 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 352 if err != nil { 353 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 354 } 355 conf.RPCAddr.Port = rpcAddr.Port 356 conf.RPCAddr.IP = rpcAddr.IP 357 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 358 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 359 conf.SerfConfig.RejoinAfterLeave = agentConfig.Server.RejoinAfterLeave 360 361 // Set up the advertise addresses 362 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 363 if err != nil { 364 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 365 } 366 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 367 if err != nil { 368 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 369 } 370 371 // Server address is the serf advertise address and rpc port. This is the 372 // address that all servers should be able to communicate over RPC with. 373 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 374 if err != nil { 375 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 376 } 377 378 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 379 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 380 conf.ClientRPCAdvertise = rpcAddr 381 conf.ServerRPCAdvertise = serverAddr 382 383 // Set up gc threshold and heartbeat grace period 384 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 385 dur, err := time.ParseDuration(gcThreshold) 386 if err != nil { 387 return nil, err 388 } 389 conf.NodeGCThreshold = dur 390 } 391 if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" { 392 dur, err := time.ParseDuration(gcInterval) 393 if err != nil { 394 return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err) 395 } else if dur <= time.Duration(0) { 396 return nil, fmt.Errorf("job_gc_interval should be greater than 0s") 397 } 398 conf.JobGCInterval = dur 399 } 400 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 401 dur, err := time.ParseDuration(gcThreshold) 402 if err != nil { 403 return nil, err 404 } 405 conf.JobGCThreshold = dur 406 } 407 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 408 dur, err := time.ParseDuration(gcThreshold) 409 if err != nil { 410 return nil, err 411 } 412 conf.EvalGCThreshold = dur 413 } 414 if gcThreshold := agentConfig.Server.BatchEvalGCThreshold; gcThreshold != "" { 415 dur, err := time.ParseDuration(gcThreshold) 416 if err != nil { 417 return nil, err 418 } 419 conf.BatchEvalGCThreshold = dur 420 } 421 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 422 dur, err := time.ParseDuration(gcThreshold) 423 if err != nil { 424 return nil, err 425 } 426 conf.DeploymentGCThreshold = dur 427 } 428 if gcInterval := agentConfig.Server.CSIVolumeClaimGCInterval; gcInterval != "" { 429 dur, err := time.ParseDuration(gcInterval) 430 if err != nil { 431 return nil, err 432 } else if dur <= time.Duration(0) { 433 return nil, fmt.Errorf("csi_volume_claim_gc_interval should be greater than 0s") 434 } 435 conf.CSIVolumeClaimGCInterval = dur 436 } 437 if gcThreshold := agentConfig.Server.CSIVolumeClaimGCThreshold; gcThreshold != "" { 438 dur, err := time.ParseDuration(gcThreshold) 439 if err != nil { 440 return nil, err 441 } 442 conf.CSIVolumeClaimGCThreshold = dur 443 } 444 if gcThreshold := agentConfig.Server.CSIPluginGCThreshold; gcThreshold != "" { 445 dur, err := time.ParseDuration(gcThreshold) 446 if err != nil { 447 return nil, err 448 } 449 conf.CSIPluginGCThreshold = dur 450 } 451 if gcThreshold := agentConfig.Server.ACLTokenGCThreshold; gcThreshold != "" { 452 dur, err := time.ParseDuration(gcThreshold) 453 if err != nil { 454 return nil, err 455 } 456 conf.ACLTokenExpirationGCThreshold = dur 457 } 458 if gcThreshold := agentConfig.Server.RootKeyGCThreshold; gcThreshold != "" { 459 dur, err := time.ParseDuration(gcThreshold) 460 if err != nil { 461 return nil, err 462 } 463 conf.RootKeyGCThreshold = dur 464 } 465 if gcInterval := agentConfig.Server.RootKeyGCInterval; gcInterval != "" { 466 dur, err := time.ParseDuration(gcInterval) 467 if err != nil { 468 return nil, err 469 } 470 conf.RootKeyGCInterval = dur 471 } 472 if rotationThreshold := agentConfig.Server.RootKeyRotationThreshold; rotationThreshold != "" { 473 dur, err := time.ParseDuration(rotationThreshold) 474 if err != nil { 475 return nil, err 476 } 477 conf.RootKeyRotationThreshold = dur 478 } 479 480 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 481 conf.HeartbeatGrace = heartbeatGrace 482 } 483 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 484 conf.MinHeartbeatTTL = min 485 } 486 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 487 conf.MaxHeartbeatsPerSecond = maxHPS 488 } 489 if failoverTTL := agentConfig.Server.FailoverHeartbeatTTL; failoverTTL != 0 { 490 conf.FailoverHeartbeatTTL = failoverTTL 491 } 492 493 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 494 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 495 } 496 497 // handle system scheduler preemption default 498 if agentConfig.Server.DefaultSchedulerConfig != nil { 499 conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig 500 } 501 502 // Add the Consul and Vault configs 503 conf.ConsulConfig = agentConfig.Consul 504 conf.VaultConfig = agentConfig.Vault 505 506 // Set the TLS config 507 conf.TLSConfig = agentConfig.TLSConfig 508 509 // Setup telemetry related config 510 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 511 conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics 512 conf.DisableRPCRateMetricsLabels = agentConfig.Telemetry.DisableRPCRateMetricsLabels 513 514 if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil { 515 return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err) 516 } else if d < 0 { 517 return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0") 518 } else { 519 conf.RPCHandshakeTimeout = d 520 } 521 522 // Set max rpc conns; nil/0 == unlimited 523 // Leave a little room for streaming RPCs 524 minLimit := config.LimitsNonStreamingConnsPerClient + 5 525 if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 { 526 conf.RPCMaxConnsPerClient = 0 527 } else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit { 528 return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit) 529 } else { 530 conf.RPCMaxConnsPerClient = limit 531 } 532 533 // Set deployment rate limit 534 if rate := agentConfig.Server.DeploymentQueryRateLimit; rate == 0 { 535 conf.DeploymentQueryRateLimit = deploymentwatcher.LimitStateQueriesPerSecond 536 } else if rate > 0 { 537 conf.DeploymentQueryRateLimit = rate 538 } else { 539 return nil, fmt.Errorf("deploy_query_rate_limit must be greater than 0") 540 } 541 542 // Set plan rejection tracker configuration. 543 if planRejectConf := agentConfig.Server.PlanRejectionTracker; planRejectConf != nil { 544 if planRejectConf.Enabled != nil { 545 conf.NodePlanRejectionEnabled = *planRejectConf.Enabled 546 } 547 conf.NodePlanRejectionThreshold = planRejectConf.NodeThreshold 548 549 if planRejectConf.NodeWindow == 0 { 550 return nil, fmt.Errorf("plan_rejection_tracker.node_window must be greater than 0") 551 } else { 552 conf.NodePlanRejectionWindow = planRejectConf.NodeWindow 553 } 554 } 555 556 // Add Enterprise license configs 557 conf.LicenseConfig = &nomad.LicenseConfig{ 558 BuildDate: agentConfig.Version.BuildDate, 559 AdditionalPubKeys: agentConfig.Server.licenseAdditionalPublicKeys, 560 LicenseEnvBytes: agentConfig.Server.LicenseEnv, 561 LicensePath: agentConfig.Server.LicensePath, 562 } 563 564 // Add the search configuration 565 if search := agentConfig.Server.Search; search != nil { 566 conf.SearchConfig = &structs.SearchConfig{ 567 FuzzyEnabled: search.FuzzyEnabled, 568 LimitQuery: search.LimitQuery, 569 LimitResults: search.LimitResults, 570 MinTermLength: search.MinTermLength, 571 } 572 } 573 574 // Set the raft bolt parameters 575 if bolt := agentConfig.Server.RaftBoltConfig; bolt != nil { 576 conf.RaftBoltNoFreelistSync = bolt.NoFreelistSync 577 } 578 579 // Interpret job_max_source_size as bytes from string value 580 if agentConfig.Server.JobMaxSourceSize == nil { 581 agentConfig.Server.JobMaxSourceSize = pointer.Of("1M") 582 } 583 jobMaxSourceBytes, err := humanize.ParseBytes(*agentConfig.Server.JobMaxSourceSize) 584 if err != nil { 585 return nil, fmt.Errorf("failed to parse max job source bytes: %w", err) 586 } 587 conf.JobMaxSourceSize = int(jobMaxSourceBytes) 588 589 return conf, nil 590 } 591 592 // serverConfig is used to generate a new server configuration struct 593 // for initializing a nomad server. 594 func (a *Agent) serverConfig() (*nomad.Config, error) { 595 c, err := convertServerConfig(a.config) 596 if err != nil { 597 return nil, err 598 } 599 600 a.finalizeServerConfig(c) 601 return c, nil 602 } 603 604 // finalizeServerConfig sets configuration fields on the server config that are 605 // not statically convertible and are from the agent. 606 func (a *Agent) finalizeServerConfig(c *nomad.Config) { 607 // Setup the logging 608 c.Logger = a.logger 609 c.LogOutput = a.logOutput 610 c.AgentShutdown = func() error { return a.Shutdown() } 611 } 612 613 // clientConfig is used to generate a new client configuration struct for 614 // initializing a Nomad client. 615 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 616 c, err := convertClientConfig(a.config) 617 if err != nil { 618 return nil, err 619 } 620 621 if err = a.finalizeClientConfig(c); err != nil { 622 return nil, err 623 } 624 625 return c, nil 626 } 627 628 // finalizeClientConfig sets configuration fields on the client config that are 629 // not statically convertible and are from the agent. 630 func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error { 631 // Setup the logging 632 c.Logger = a.logger 633 634 // If we are running a server, append both its bind and advertise address so 635 // we are able to at least talk to the local server even if that isn't 636 // configured explicitly. This handles both running server and client on one 637 // host and -dev mode. 638 if a.server != nil { 639 advertised := a.config.AdvertiseAddrs 640 normalized := a.config.normalizedAddrs 641 642 if advertised == nil || advertised.RPC == "" { 643 return fmt.Errorf("AdvertiseAddrs is nil or empty") 644 } else if normalized == nil || normalized.RPC == "" { 645 return fmt.Errorf("normalizedAddrs is nil or empty") 646 } 647 648 if normalized.RPC == advertised.RPC { 649 c.Servers = append(c.Servers, normalized.RPC) 650 } else { 651 c.Servers = append(c.Servers, normalized.RPC, advertised.RPC) 652 } 653 } 654 655 // Setup the plugin loaders 656 c.PluginLoader = a.pluginLoader 657 c.PluginSingletonLoader = a.pluginSingletonLoader 658 659 // Log deprecation messages about Consul related configuration in client 660 // options 661 var invalidConsulKeys []string 662 for key := range c.Options { 663 if strings.HasPrefix(key, "consul") { 664 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 665 } 666 } 667 if len(invalidConsulKeys) > 0 { 668 a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ",")) 669 a.logger.Warn(`Nomad client ignores consul related configuration in client options. 670 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 671 to configure Nomad to work with Consul.`) 672 } 673 674 return nil 675 } 676 677 // convertClientConfig takes an agent config and log output and returns a client 678 // Config. There may be missing fields that must be set by the agent. To do this 679 // call finalizeServerConfig 680 func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { 681 // Set up the configuration 682 conf := agentConfig.ClientConfig 683 if conf == nil { 684 conf = clientconfig.DefaultConfig() 685 } 686 687 conf.Servers = agentConfig.Client.Servers 688 conf.DevMode = agentConfig.DevMode 689 conf.EnableDebug = agentConfig.EnableDebug 690 691 if agentConfig.Region != "" { 692 conf.Region = agentConfig.Region 693 } 694 if agentConfig.DataDir != "" { 695 conf.StateDir = filepath.Join(agentConfig.DataDir, "client") 696 conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") 697 } 698 if agentConfig.Client.StateDir != "" { 699 conf.StateDir = agentConfig.Client.StateDir 700 } 701 if agentConfig.Client.AllocDir != "" { 702 conf.AllocDir = agentConfig.Client.AllocDir 703 } 704 if agentConfig.Client.NetworkInterface != "" { 705 conf.NetworkInterface = agentConfig.Client.NetworkInterface 706 } 707 conf.ChrootEnv = agentConfig.Client.ChrootEnv 708 conf.Options = agentConfig.Client.Options 709 if agentConfig.Client.NetworkSpeed != 0 { 710 conf.NetworkSpeed = agentConfig.Client.NetworkSpeed 711 } 712 if agentConfig.Client.CpuCompute != 0 { 713 conf.CpuCompute = agentConfig.Client.CpuCompute 714 } 715 if agentConfig.Client.MemoryMB != 0 { 716 conf.MemoryMB = agentConfig.Client.MemoryMB 717 } 718 if agentConfig.Client.DiskTotalMB != 0 { 719 conf.DiskTotalMB = agentConfig.Client.DiskTotalMB 720 } 721 if agentConfig.Client.DiskFreeMB != 0 { 722 conf.DiskFreeMB = agentConfig.Client.DiskFreeMB 723 } 724 if agentConfig.Client.MaxKillTimeout != "" { 725 dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout) 726 if err != nil { 727 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 728 } 729 conf.MaxKillTimeout = dur 730 } 731 conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort) 732 conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort) 733 conf.MaxDynamicPort = agentConfig.Client.MaxDynamicPort 734 conf.MinDynamicPort = agentConfig.Client.MinDynamicPort 735 conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec 736 737 if agentConfig.Client.TemplateConfig != nil { 738 conf.TemplateConfig = agentConfig.Client.TemplateConfig.Copy() 739 } 740 741 hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes)) 742 for _, v := range agentConfig.Client.HostVolumes { 743 hvMap[v.Name] = v 744 } 745 conf.HostVolumes = hvMap 746 747 // Setup the node 748 conf.Node = new(structs.Node) 749 conf.Node.Datacenter = agentConfig.Datacenter 750 conf.Node.Name = agentConfig.NodeName 751 conf.Node.Meta = agentConfig.Client.Meta 752 conf.Node.NodeClass = agentConfig.Client.NodeClass 753 conf.Node.NodePool = agentConfig.Client.NodePool 754 755 // Set up the HTTP advertise address 756 conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP 757 758 // Canonicalize Node struct 759 conf.Node.Canonicalize() 760 761 // Reserve resources on the node. 762 // COMPAT(0.10): Remove in 0.10 763 r := conf.Node.Reserved 764 if r == nil { 765 r = new(structs.Resources) 766 conf.Node.Reserved = r 767 } 768 r.CPU = agentConfig.Client.Reserved.CPU 769 r.MemoryMB = agentConfig.Client.Reserved.MemoryMB 770 r.DiskMB = agentConfig.Client.Reserved.DiskMB 771 772 res := conf.Node.ReservedResources 773 if res == nil { 774 res = new(structs.NodeReservedResources) 775 conf.Node.ReservedResources = res 776 } 777 res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU) 778 res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB) 779 res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB) 780 res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts 781 if agentConfig.Client.Reserved.Cores != "" { 782 cores, err := cpuset.Parse(agentConfig.Client.Reserved.Cores) 783 if err != nil { 784 return nil, fmt.Errorf("failed to parse client > reserved > cores value %q: %v", agentConfig.Client.Reserved.Cores, err) 785 } 786 res.Cpu.ReservedCpuCores = cores.ToSlice() 787 } 788 789 conf.Version = agentConfig.Version 790 791 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" { 792 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 793 } 794 795 conf.ConsulConfig = agentConfig.Consul 796 conf.VaultConfig = agentConfig.Vault 797 798 // Set up Telemetry configuration 799 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 800 conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics 801 conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics 802 803 // Set the TLS related configs 804 conf.TLSConfig = agentConfig.TLSConfig 805 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 806 807 // Set the GC related configs 808 conf.GCInterval = agentConfig.Client.GCInterval 809 conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys 810 conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold 811 conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold 812 conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs 813 if agentConfig.Client.NoHostUUID != nil { 814 conf.NoHostUUID = *agentConfig.Client.NoHostUUID 815 } else { 816 // Default no_host_uuid to true 817 conf.NoHostUUID = true 818 } 819 820 // Setup the ACLs 821 conf.ACLEnabled = agentConfig.ACL.Enabled 822 conf.ACLTokenTTL = agentConfig.ACL.TokenTTL 823 conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL 824 conf.ACLRoleTTL = agentConfig.ACL.RoleTTL 825 826 // Setup networking configuration 827 conf.CNIPath = agentConfig.Client.CNIPath 828 conf.CNIConfigDir = agentConfig.Client.CNIConfigDir 829 conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName 830 conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet 831 conf.BridgeNetworkHairpinMode = agentConfig.Client.BridgeNetworkHairpinMode 832 833 for _, hn := range agentConfig.Client.HostNetworks { 834 conf.HostNetworks[hn.Name] = hn 835 } 836 conf.BindWildcardDefaultHostNetwork = agentConfig.Client.BindWildcardDefaultHostNetwork 837 838 conf.CgroupParent = cgutil.GetCgroupParent(agentConfig.Client.CgroupParent) 839 if agentConfig.Client.ReserveableCores != "" { 840 cores, err := cpuset.Parse(agentConfig.Client.ReserveableCores) 841 if err != nil { 842 return nil, fmt.Errorf("failed to parse 'reservable_cores': %v", err) 843 } 844 conf.ReservableCores = cores.ToSlice() 845 } 846 847 if agentConfig.Client.NomadServiceDiscovery != nil { 848 conf.NomadServiceDiscovery = *agentConfig.Client.NomadServiceDiscovery 849 } 850 851 artifactConfig, err := clientconfig.ArtifactConfigFromAgent(agentConfig.Client.Artifact) 852 if err != nil { 853 return nil, fmt.Errorf("invalid artifact config: %v", err) 854 } 855 conf.Artifact = artifactConfig 856 857 drainConfig, err := clientconfig.DrainConfigFromAgent(agentConfig.Client.Drain) 858 if err != nil { 859 return nil, fmt.Errorf("invalid drain_on_shutdown config: %v", err) 860 } 861 conf.Drain = drainConfig 862 863 return conf, nil 864 } 865 866 // setupServer is used to setup the server if enabled 867 func (a *Agent) setupServer() error { 868 if !a.config.Server.Enabled { 869 return nil 870 } 871 872 // Setup the configuration 873 conf, err := a.serverConfig() 874 if err != nil { 875 return fmt.Errorf("server config setup failed: %s", err) 876 } 877 878 // Generate a node ID and persist it if it is the first instance, otherwise 879 // read the persisted node ID. 880 if err := a.setupNodeID(conf); err != nil { 881 return fmt.Errorf("setting up server node ID failed: %s", err) 882 } 883 884 // Sets up the keyring for gossip encryption 885 if err := a.setupKeyrings(conf); err != nil { 886 return fmt.Errorf("failed to configure keyring: %v", err) 887 } 888 889 // Create the server 890 server, err := nomad.NewServer(conf, a.consulCatalog, a.consulConfigEntries, a.consulACLs) 891 if err != nil { 892 return fmt.Errorf("server setup failed: %v", err) 893 } 894 a.server = server 895 896 // Consul check addresses default to bind but can be toggled to use advertise 897 rpcCheckAddr := a.config.normalizedAddrs.RPC 898 serfCheckAddr := a.config.normalizedAddrs.Serf 899 if *a.config.Consul.ChecksUseAdvertise { 900 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 901 serfCheckAddr = a.config.AdvertiseAddrs.Serf 902 } 903 904 // Create the Nomad Server services for Consul 905 if *a.config.Consul.AutoAdvertise { 906 httpServ := &structs.Service{ 907 Name: a.config.Consul.ServerServiceName, 908 PortLabel: a.config.AdvertiseAddrs.HTTP, 909 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 910 } 911 const isServer = true 912 if check := a.agentHTTPCheck(isServer); check != nil { 913 httpServ.Checks = []*structs.ServiceCheck{check} 914 } 915 rpcServ := &structs.Service{ 916 Name: a.config.Consul.ServerServiceName, 917 PortLabel: a.config.AdvertiseAddrs.RPC, 918 Tags: append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...), 919 Checks: []*structs.ServiceCheck{ 920 { 921 Name: a.config.Consul.ServerRPCCheckName, 922 Type: "tcp", 923 Interval: serverRpcCheckInterval, 924 Timeout: serverRpcCheckTimeout, 925 PortLabel: rpcCheckAddr, 926 }, 927 }, 928 } 929 serfServ := &structs.Service{ 930 Name: a.config.Consul.ServerServiceName, 931 PortLabel: a.config.AdvertiseAddrs.Serf, 932 Tags: append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...), 933 Checks: []*structs.ServiceCheck{ 934 { 935 Name: a.config.Consul.ServerSerfCheckName, 936 Type: "tcp", 937 Interval: serverSerfCheckInterval, 938 Timeout: serverSerfCheckTimeout, 939 PortLabel: serfCheckAddr, 940 }, 941 }, 942 } 943 944 // Add the http port check if TLS isn't enabled 945 consulServices := []*structs.Service{ 946 rpcServ, 947 serfServ, 948 httpServ, 949 } 950 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 951 return err 952 } 953 } 954 955 return nil 956 } 957 958 // setupNodeID will pull the persisted node ID, if any, or create a random one 959 // and persist it. 960 func (a *Agent) setupNodeID(config *nomad.Config) error { 961 // For dev mode we have no filesystem access so just make a node ID. 962 if a.config.DevMode { 963 config.NodeID = uuid.Generate() 964 return nil 965 } 966 967 // Load saved state, if any. Since a user could edit this, we also 968 // validate it. Saved state overwrites any configured node id 969 fileID := filepath.Join(config.DataDir, "node-id") 970 if _, err := os.Stat(fileID); err == nil { 971 rawID, err := os.ReadFile(fileID) 972 if err != nil { 973 return err 974 } 975 976 nodeID := strings.TrimSpace(string(rawID)) 977 nodeID = strings.ToLower(nodeID) 978 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 979 return err 980 } 981 config.NodeID = nodeID 982 return nil 983 } 984 985 // If they've configured a node ID manually then just use that, as 986 // long as it's valid. 987 if config.NodeID != "" { 988 config.NodeID = strings.ToLower(config.NodeID) 989 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 990 return err 991 } 992 // Persist this configured nodeID to our data directory 993 if err := escapingfs.EnsurePath(fileID, false); err != nil { 994 return err 995 } 996 if err := os.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 997 return err 998 } 999 return nil 1000 } 1001 1002 // If we still don't have a valid node ID, make one. 1003 if config.NodeID == "" { 1004 id := uuid.Generate() 1005 if err := escapingfs.EnsurePath(fileID, false); err != nil { 1006 return err 1007 } 1008 if err := os.WriteFile(fileID, []byte(id), 0600); err != nil { 1009 return err 1010 } 1011 1012 config.NodeID = id 1013 } 1014 return nil 1015 } 1016 1017 // setupKeyrings is used to initialize and load keyrings during agent startup 1018 func (a *Agent) setupKeyrings(config *nomad.Config) error { 1019 file := filepath.Join(a.config.DataDir, serfKeyring) 1020 1021 if a.config.Server.EncryptKey == "" { 1022 goto LOAD 1023 } 1024 if _, err := os.Stat(file); err != nil { 1025 if err := initKeyring(file, a.config.Server.EncryptKey, a.logger); err != nil { 1026 return err 1027 } 1028 } 1029 1030 LOAD: 1031 if _, err := os.Stat(file); err == nil { 1032 config.SerfConfig.KeyringFile = file 1033 } 1034 if err := loadKeyringFile(config.SerfConfig); err != nil { 1035 return err 1036 } 1037 // Success! 1038 return nil 1039 } 1040 1041 // setupClient is used to setup the client if enabled 1042 func (a *Agent) setupClient() error { 1043 if !a.config.Client.Enabled { 1044 return nil 1045 } 1046 1047 // Plugin setup must happen before the call to clientConfig, because it 1048 // copies the pointers to the plugin loaders from the Agent to the 1049 // Client config. 1050 if err := a.setupPlugins(); err != nil { 1051 return err 1052 } 1053 1054 // Setup the configuration 1055 conf, err := a.clientConfig() 1056 if err != nil { 1057 return fmt.Errorf("client setup failed: %v", err) 1058 } 1059 1060 // Reserve some ports for the plugins if we are on Windows 1061 if runtime.GOOS == "windows" { 1062 if err := a.reservePortsForClient(conf); err != nil { 1063 return err 1064 } 1065 } 1066 if conf.StateDBFactory == nil { 1067 conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) 1068 } 1069 1070 // Set up a custom listener and dialer. This is used by Nomad clients when 1071 // running consul-template functions that utilize the Nomad API. We lazy 1072 // load this into the client config, therefore this needs to happen before 1073 // we call NewClient. 1074 a.builtinListener, a.builtinDialer = bufconndialer.New() 1075 conf.TemplateDialer = a.builtinDialer 1076 1077 // Initialize builtin Task API server here for use in the client, but it 1078 // won't accept connections until the HTTP servers are created. 1079 a.taskAPIServer = newBuiltinAPI() 1080 conf.APIListenerRegistrar = a.taskAPIServer 1081 1082 nomadClient, err := client.NewClient( 1083 conf, a.consulCatalog, a.consulProxies, a.consulService, nil) 1084 if err != nil { 1085 return fmt.Errorf("client setup failed: %v", err) 1086 } 1087 a.client = nomadClient 1088 1089 // Create the Nomad Client services for Consul 1090 if *a.config.Consul.AutoAdvertise { 1091 httpServ := &structs.Service{ 1092 Name: a.config.Consul.ClientServiceName, 1093 PortLabel: a.config.AdvertiseAddrs.HTTP, 1094 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 1095 } 1096 const isServer = false 1097 if check := a.agentHTTPCheck(isServer); check != nil { 1098 httpServ.Checks = []*structs.ServiceCheck{check} 1099 } 1100 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 1101 return err 1102 } 1103 } 1104 1105 return nil 1106 } 1107 1108 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 1109 // If no HTTP health check can be supported nil is returned. 1110 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 1111 // Resolve the http check address 1112 httpCheckAddr := a.config.normalizedAddrs.HTTP[0] 1113 if *a.config.Consul.ChecksUseAdvertise { 1114 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 1115 } 1116 check := structs.ServiceCheck{ 1117 Name: a.config.Consul.ClientHTTPCheckName, 1118 Type: "http", 1119 Path: "/v1/agent/health?type=client", 1120 Protocol: "http", 1121 Interval: agentHttpCheckInterval, 1122 Timeout: agentHttpCheckTimeout, 1123 PortLabel: httpCheckAddr, 1124 } 1125 // Switch to endpoint that doesn't require a leader for servers 1126 if server { 1127 check.Name = a.config.Consul.ServerHTTPCheckName 1128 check.Path = "/v1/agent/health?type=server" 1129 } 1130 if !a.config.TLSConfig.EnableHTTP { 1131 // No HTTPS, return a plain http check 1132 return &check 1133 } 1134 if a.config.TLSConfig.VerifyHTTPSClient { 1135 a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled") 1136 return nil 1137 } 1138 1139 // HTTPS enabled; skip verification 1140 check.Protocol = "https" 1141 check.TLSSkipVerify = true 1142 return &check 1143 } 1144 1145 // reservePortsForClient reserves a range of ports for the client to use when 1146 // it creates various plugins for log collection, executors, drivers, etc 1147 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 1148 if conf.Node.ReservedResources == nil { 1149 conf.Node.ReservedResources = &structs.NodeReservedResources{} 1150 } 1151 1152 res := conf.Node.ReservedResources.Networks.ReservedHostPorts 1153 if res == "" { 1154 res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 1155 } else { 1156 res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 1157 } 1158 conf.Node.ReservedResources.Networks.ReservedHostPorts = res 1159 return nil 1160 } 1161 1162 // Leave is used gracefully exit. Clients will inform servers 1163 // of their departure so that allocations can be rescheduled. 1164 func (a *Agent) Leave() error { 1165 if a.client != nil { 1166 if err := a.client.Leave(); err != nil { 1167 a.logger.Error("client leave failed", "error", err) 1168 } 1169 } 1170 if a.server != nil { 1171 if err := a.server.Leave(); err != nil { 1172 a.logger.Error("server leave failed", "error", err) 1173 } 1174 } 1175 return nil 1176 } 1177 1178 // Shutdown is used to terminate the agent. 1179 func (a *Agent) Shutdown() error { 1180 a.shutdownLock.Lock() 1181 defer a.shutdownLock.Unlock() 1182 1183 if a.shutdown { 1184 return nil 1185 } 1186 1187 a.logger.Info("requesting shutdown") 1188 if a.client != nil { 1189 // Task API must be closed separately from other HTTP servers and should 1190 // happen before the client is shutdown 1191 a.taskAPIServer.Shutdown() 1192 1193 if err := a.client.Shutdown(); err != nil { 1194 a.logger.Error("client shutdown failed", "error", err) 1195 } 1196 } 1197 if a.server != nil { 1198 if err := a.server.Shutdown(); err != nil { 1199 a.logger.Error("server shutdown failed", "error", err) 1200 } 1201 } 1202 1203 if err := a.consulService.Shutdown(); err != nil { 1204 a.logger.Error("shutting down Consul client failed", "error", err) 1205 } 1206 1207 a.logger.Info("shutdown complete") 1208 a.shutdown = true 1209 close(a.shutdownCh) 1210 return nil 1211 } 1212 1213 // RPC is used to make an RPC call to the Nomad servers 1214 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 1215 if a.server != nil { 1216 return a.server.RPC(method, args, reply) 1217 } 1218 return a.client.RPC(method, args, reply) 1219 } 1220 1221 // Client returns the configured client or nil 1222 func (a *Agent) Client() *client.Client { 1223 return a.client 1224 } 1225 1226 // Server returns the configured server or nil 1227 func (a *Agent) Server() *nomad.Server { 1228 return a.server 1229 } 1230 1231 // Stats is used to return statistics for debugging and insight 1232 // for various sub-systems 1233 func (a *Agent) Stats() map[string]map[string]string { 1234 stats := make(map[string]map[string]string) 1235 if a.server != nil { 1236 subStat := a.server.Stats() 1237 for k, v := range subStat { 1238 stats[k] = v 1239 } 1240 } 1241 if a.client != nil { 1242 subStat := a.client.Stats() 1243 for k, v := range subStat { 1244 stats[k] = v 1245 } 1246 } 1247 return stats 1248 } 1249 1250 // ShouldReload determines if we should reload the configuration and agent 1251 // connections. If the TLS Configuration has not changed, we shouldn't reload. 1252 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 1253 a.configLock.Lock() 1254 defer a.configLock.Unlock() 1255 1256 if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel { 1257 agent = true 1258 } 1259 1260 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 1261 if err != nil { 1262 a.logger.Error("parsing TLS certificate", "error", err) 1263 return agent, false 1264 } else if !isEqual { 1265 return true, true 1266 } 1267 1268 // Allow the ability to only reload HTTP connections 1269 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 1270 http = true 1271 agent = true 1272 } 1273 1274 // Allow the ability to only reload HTTP connections 1275 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 1276 agent = true 1277 } 1278 1279 if a.config.TLSConfig.RPCUpgradeMode != newConfig.TLSConfig.RPCUpgradeMode { 1280 agent = true 1281 } 1282 1283 return agent, http 1284 } 1285 1286 // Reload handles configuration changes for the agent. Provides a method that 1287 // is easier to unit test, as this action is invoked via SIGHUP. 1288 func (a *Agent) Reload(newConfig *Config) error { 1289 a.configLock.Lock() 1290 defer a.configLock.Unlock() 1291 1292 current := a.config.Copy() 1293 1294 updatedLogging := newConfig != nil && (newConfig.LogLevel != current.LogLevel) 1295 1296 if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging { 1297 return fmt.Errorf("cannot reload agent with nil configuration") 1298 } 1299 1300 if updatedLogging { 1301 current.LogLevel = newConfig.LogLevel 1302 a.logger.SetLevel(log.LevelFromString(current.LogLevel)) 1303 } 1304 1305 // Update eventer config 1306 if newConfig.Audit != nil { 1307 if err := a.entReloadEventer(newConfig.Audit); err != nil { 1308 return err 1309 } 1310 } 1311 // Allow auditor to call reopen regardless of config changes 1312 // This is primarily for enterprise audit logging to allow the underlying 1313 // file to be reopened if necessary 1314 if err := a.auditor.Reopen(); err != nil { 1315 return err 1316 } 1317 1318 fullUpdateTLSConfig := func() { 1319 // Completely reload the agent's TLS configuration (moving from non-TLS to 1320 // TLS, or vice versa) 1321 // This does not handle errors in loading the new TLS configuration 1322 current.TLSConfig = newConfig.TLSConfig.Copy() 1323 } 1324 1325 if !current.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 1326 // This is just a TLS configuration reload, we don't need to refresh 1327 // existing network connections 1328 1329 // Reload the certificates on the keyloader and on success store the 1330 // updated TLS config. It is important to reuse the same keyloader 1331 // as this allows us to dynamically reload configurations not only 1332 // on the Agent but on the Server and Client too (they are 1333 // referencing the same keyloader). 1334 keyloader := current.TLSConfig.GetKeyLoader() 1335 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 1336 if err != nil { 1337 return err 1338 } 1339 1340 current.TLSConfig = newConfig.TLSConfig 1341 current.TLSConfig.KeyLoader = keyloader 1342 a.config = current 1343 return nil 1344 } else if newConfig.TLSConfig.IsEmpty() && !current.TLSConfig.IsEmpty() { 1345 a.logger.Warn("downgrading agent's existing TLS configuration to plaintext") 1346 fullUpdateTLSConfig() 1347 } else if !newConfig.TLSConfig.IsEmpty() && current.TLSConfig.IsEmpty() { 1348 a.logger.Info("upgrading from plaintext configuration to TLS") 1349 fullUpdateTLSConfig() 1350 } 1351 1352 // Set agent config to the updated config 1353 a.config = current 1354 return nil 1355 } 1356 1357 // GetConfig returns the current agent configuration. The Config should *not* 1358 // be mutated directly. First call Config.Copy. 1359 func (a *Agent) GetConfig() *Config { 1360 a.configLock.Lock() 1361 defer a.configLock.Unlock() 1362 1363 return a.config 1364 } 1365 1366 // GetMetricsSink returns the metrics sink. 1367 func (a *Agent) GetMetricsSink() *metrics.InmemSink { 1368 return a.inmemSink 1369 } 1370 1371 // setupConsul creates the Consul client and starts its main Run loop. 1372 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 1373 apiConf, err := consulConfig.ApiConfig() 1374 if err != nil { 1375 return err 1376 } 1377 1378 consulClient, err := consulapi.NewClient(apiConf) 1379 if err != nil { 1380 return err 1381 } 1382 1383 // Create Consul Catalog client for service discovery. 1384 a.consulCatalog = consulClient.Catalog() 1385 1386 // Create Consul ConfigEntries client for managing Config Entries. 1387 a.consulConfigEntries = consulClient.ConfigEntries() 1388 1389 // Create Consul ACL client for managing tokens. 1390 a.consulACLs = consulClient.ACL() 1391 1392 // Create Consul Service client for service advertisement and checks. 1393 isClient := false 1394 if a.config.Client != nil && a.config.Client.Enabled { 1395 isClient = true 1396 } 1397 // Create Consul Agent client for looking info about the agent. 1398 consulAgentClient := consulClient.Agent() 1399 namespacesClient := consul.NewNamespacesClient(consulClient.Namespaces(), consulAgentClient) 1400 a.consulService = consul.NewServiceClient(consulAgentClient, namespacesClient, a.logger, isClient) 1401 a.consulProxies = consul.NewConnectProxiesClient(consulAgentClient) 1402 1403 // Run the Consul service client's sync'ing main loop 1404 go a.consulService.Run() 1405 return nil 1406 } 1407 1408 // noOpAuditor is a no-op Auditor that fulfills the 1409 // event.Auditor interface. 1410 type noOpAuditor struct{} 1411 1412 // Ensure noOpAuditor is an Auditor 1413 var _ event.Auditor = &noOpAuditor{} 1414 1415 func (e *noOpAuditor) Event(ctx context.Context, eventType string, payload interface{}) error { 1416 return nil 1417 } 1418 1419 func (e *noOpAuditor) Enabled() bool { 1420 return false 1421 } 1422 1423 func (e *noOpAuditor) Reopen() error { 1424 return nil 1425 } 1426 1427 func (e *noOpAuditor) SetEnabled(enabled bool) {} 1428 1429 func (e *noOpAuditor) DeliveryEnforced() bool { return false }