github.com/janma/nomad@v0.11.3/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 golog "log" 9 "net" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "sync" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 log "github.com/hashicorp/go-hclog" 21 uuidparse "github.com/hashicorp/go-uuid" 22 "github.com/hashicorp/nomad/client" 23 clientconfig "github.com/hashicorp/nomad/client/config" 24 "github.com/hashicorp/nomad/client/state" 25 "github.com/hashicorp/nomad/command/agent/consul" 26 "github.com/hashicorp/nomad/command/agent/event" 27 "github.com/hashicorp/nomad/helper/pluginutils/loader" 28 "github.com/hashicorp/nomad/helper/uuid" 29 "github.com/hashicorp/nomad/nomad" 30 "github.com/hashicorp/nomad/nomad/structs" 31 "github.com/hashicorp/nomad/nomad/structs/config" 32 "github.com/hashicorp/raft" 33 ) 34 35 const ( 36 agentHttpCheckInterval = 10 * time.Second 37 agentHttpCheckTimeout = 5 * time.Second 38 serverRpcCheckInterval = 10 * time.Second 39 serverRpcCheckTimeout = 3 * time.Second 40 serverSerfCheckInterval = 10 * time.Second 41 serverSerfCheckTimeout = 3 * time.Second 42 43 // roles used in identifying Consul entries for Nomad agents 44 consulRoleServer = "server" 45 consulRoleClient = "client" 46 ) 47 48 // Agent is a long running daemon that is used to run both 49 // clients and servers. Servers are responsible for managing 50 // state and making scheduling decisions. Clients can be 51 // scheduled to, and are responsible for interfacing with 52 // servers to run allocations. 53 type Agent struct { 54 config *Config 55 configLock sync.Mutex 56 57 logger log.InterceptLogger 58 auditor event.Auditor 59 httpLogger log.Logger 60 logOutput io.Writer 61 62 // consulService is Nomad's custom Consul client for managing services 63 // and checks. 64 consulService *consul.ServiceClient 65 66 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 67 consulCatalog consul.CatalogAPI 68 69 // consulACLs is Nomad's subset of Consul's ACL API Nomad uses. 70 consulACLs consul.ACLsAPI 71 72 // client is the launched Nomad Client. Can be nil if the agent isn't 73 // configured to run a client. 74 client *client.Client 75 76 // server is the launched Nomad Server. Can be nil if the agent isn't 77 // configured to run a server. 78 server *nomad.Server 79 80 // pluginLoader is used to load plugins 81 pluginLoader loader.PluginCatalog 82 83 // pluginSingletonLoader is a plugin loader that will returns singleton 84 // instances of the plugins. 85 pluginSingletonLoader loader.PluginCatalog 86 87 shutdown bool 88 shutdownCh chan struct{} 89 shutdownLock sync.Mutex 90 91 InmemSink *metrics.InmemSink 92 } 93 94 // NewAgent is used to create a new agent with the given configuration 95 func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 96 a := &Agent{ 97 config: config, 98 logOutput: logOutput, 99 shutdownCh: make(chan struct{}), 100 InmemSink: inmem, 101 } 102 103 // Create the loggers 104 a.logger = logger 105 a.httpLogger = a.logger.ResetNamed("http") 106 107 // Global logger should match internal logger as much as possible 108 golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds) 109 110 if err := a.setupConsul(config.Consul); err != nil { 111 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 112 } 113 114 if err := a.setupPlugins(); err != nil { 115 return nil, err 116 } 117 118 if err := a.setupServer(); err != nil { 119 return nil, err 120 } 121 if err := a.setupClient(); err != nil { 122 return nil, err 123 } 124 if err := a.setupEnterpriseAgent(logger); err != nil { 125 return nil, err 126 } 127 if a.client == nil && a.server == nil { 128 return nil, fmt.Errorf("must have at least client or server mode enabled") 129 } 130 131 return a, nil 132 } 133 134 // convertServerConfig takes an agent config and log output and returns a Nomad 135 // Config. There may be missing fields that must be set by the agent. To do this 136 // call finalizeServerConfig 137 func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { 138 conf := agentConfig.NomadConfig 139 if conf == nil { 140 conf = nomad.DefaultConfig() 141 } 142 conf.DevMode = agentConfig.DevMode 143 conf.EnableDebug = agentConfig.EnableDebug 144 145 conf.Build = agentConfig.Version.VersionNumber() 146 if agentConfig.Region != "" { 147 conf.Region = agentConfig.Region 148 } 149 150 // Set the Authoritative Region if set, otherwise default to 151 // the same as the local region. 152 if agentConfig.Server.AuthoritativeRegion != "" { 153 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 154 } else if agentConfig.Region != "" { 155 conf.AuthoritativeRegion = agentConfig.Region 156 } 157 158 if agentConfig.Datacenter != "" { 159 conf.Datacenter = agentConfig.Datacenter 160 } 161 if agentConfig.NodeName != "" { 162 conf.NodeName = agentConfig.NodeName 163 } 164 if agentConfig.Server.BootstrapExpect > 0 { 165 conf.BootstrapExpect = agentConfig.Server.BootstrapExpect 166 } 167 if agentConfig.DataDir != "" { 168 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 169 } 170 if agentConfig.Server.DataDir != "" { 171 conf.DataDir = agentConfig.Server.DataDir 172 } 173 if agentConfig.Server.ProtocolVersion != 0 { 174 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 175 } 176 if agentConfig.Server.RaftProtocol != 0 { 177 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 178 } 179 if agentConfig.Server.NumSchedulers != nil { 180 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 181 } 182 if len(agentConfig.Server.EnabledSchedulers) != 0 { 183 // Convert to a set and require the core scheduler 184 set := make(map[string]struct{}, 4) 185 set[structs.JobTypeCore] = struct{}{} 186 for _, sched := range agentConfig.Server.EnabledSchedulers { 187 set[sched] = struct{}{} 188 } 189 190 schedulers := make([]string, 0, len(set)) 191 for k := range set { 192 schedulers = append(schedulers, k) 193 } 194 195 conf.EnabledSchedulers = schedulers 196 197 } 198 if agentConfig.ACL.Enabled { 199 conf.ACLEnabled = true 200 } 201 if agentConfig.ACL.ReplicationToken != "" { 202 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 203 } 204 if agentConfig.Sentinel != nil { 205 conf.SentinelConfig = agentConfig.Sentinel 206 } 207 if agentConfig.Server.NonVotingServer { 208 conf.NonVoter = true 209 } 210 if agentConfig.Server.RedundancyZone != "" { 211 conf.RedundancyZone = agentConfig.Server.RedundancyZone 212 } 213 if agentConfig.Server.UpgradeVersion != "" { 214 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 215 } 216 if agentConfig.Autopilot != nil { 217 if agentConfig.Autopilot.CleanupDeadServers != nil { 218 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 219 } 220 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 221 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 222 } 223 if agentConfig.Autopilot.LastContactThreshold != 0 { 224 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 225 } 226 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 227 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 228 } 229 if agentConfig.Autopilot.MinQuorum != 0 { 230 conf.AutopilotConfig.MinQuorum = uint(agentConfig.Autopilot.MinQuorum) 231 } 232 if agentConfig.Autopilot.EnableRedundancyZones != nil { 233 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 234 } 235 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 236 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 237 } 238 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 239 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 240 } 241 } 242 243 // Set up the bind addresses 244 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 245 if err != nil { 246 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 247 } 248 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 249 if err != nil { 250 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 251 } 252 conf.RPCAddr.Port = rpcAddr.Port 253 conf.RPCAddr.IP = rpcAddr.IP 254 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 255 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 256 257 // Set up the advertise addresses 258 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 259 if err != nil { 260 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 261 } 262 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 263 if err != nil { 264 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 265 } 266 267 // Server address is the serf advertise address and rpc port. This is the 268 // address that all servers should be able to communicate over RPC with. 269 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 270 if err != nil { 271 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 272 } 273 274 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 275 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 276 conf.ClientRPCAdvertise = rpcAddr 277 conf.ServerRPCAdvertise = serverAddr 278 279 // Set up gc threshold and heartbeat grace period 280 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 281 dur, err := time.ParseDuration(gcThreshold) 282 if err != nil { 283 return nil, err 284 } 285 conf.NodeGCThreshold = dur 286 } 287 if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" { 288 dur, err := time.ParseDuration(gcInterval) 289 if err != nil { 290 return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err) 291 } else if dur <= time.Duration(0) { 292 return nil, fmt.Errorf("job_gc_interval should be greater than 0s") 293 } 294 conf.JobGCInterval = dur 295 } 296 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 297 dur, err := time.ParseDuration(gcThreshold) 298 if err != nil { 299 return nil, err 300 } 301 conf.JobGCThreshold = dur 302 } 303 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 304 dur, err := time.ParseDuration(gcThreshold) 305 if err != nil { 306 return nil, err 307 } 308 conf.EvalGCThreshold = dur 309 } 310 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 311 dur, err := time.ParseDuration(gcThreshold) 312 if err != nil { 313 return nil, err 314 } 315 conf.DeploymentGCThreshold = dur 316 } 317 if gcThreshold := agentConfig.Server.CSIVolumeClaimGCThreshold; gcThreshold != "" { 318 dur, err := time.ParseDuration(gcThreshold) 319 if err != nil { 320 return nil, err 321 } 322 conf.CSIVolumeClaimGCThreshold = dur 323 } 324 if gcThreshold := agentConfig.Server.CSIPluginGCThreshold; gcThreshold != "" { 325 dur, err := time.ParseDuration(gcThreshold) 326 if err != nil { 327 return nil, err 328 } 329 conf.CSIPluginGCThreshold = dur 330 } 331 332 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 333 conf.HeartbeatGrace = heartbeatGrace 334 } 335 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 336 conf.MinHeartbeatTTL = min 337 } 338 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 339 conf.MaxHeartbeatsPerSecond = maxHPS 340 } 341 342 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 343 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 344 } 345 346 // handle system scheduler preemption default 347 if agentConfig.Server.DefaultSchedulerConfig != nil { 348 conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig 349 } 350 351 // Add the Consul and Vault configs 352 conf.ConsulConfig = agentConfig.Consul 353 conf.VaultConfig = agentConfig.Vault 354 355 // Set the TLS config 356 conf.TLSConfig = agentConfig.TLSConfig 357 358 // Setup telemetry related config 359 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 360 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 361 conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics 362 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 363 364 // Parse Limits timeout from a string into durations 365 if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil { 366 return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err) 367 } else if d < 0 { 368 return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0") 369 } else { 370 conf.RPCHandshakeTimeout = d 371 } 372 373 // Set max rpc conns; nil/0 == unlimited 374 // Leave a little room for streaming RPCs 375 minLimit := config.LimitsNonStreamingConnsPerClient + 5 376 if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 { 377 conf.RPCMaxConnsPerClient = 0 378 } else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit { 379 return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit) 380 } else { 381 conf.RPCMaxConnsPerClient = limit 382 } 383 384 return conf, nil 385 } 386 387 // serverConfig is used to generate a new server configuration struct 388 // for initializing a nomad server. 389 func (a *Agent) serverConfig() (*nomad.Config, error) { 390 c, err := convertServerConfig(a.config) 391 if err != nil { 392 return nil, err 393 } 394 395 a.finalizeServerConfig(c) 396 return c, nil 397 } 398 399 // finalizeServerConfig sets configuration fields on the server config that are 400 // not staticly convertable and are from the agent. 401 func (a *Agent) finalizeServerConfig(c *nomad.Config) { 402 // Setup the logging 403 c.Logger = a.logger 404 c.LogOutput = a.logOutput 405 406 // Setup the plugin loaders 407 c.PluginLoader = a.pluginLoader 408 c.PluginSingletonLoader = a.pluginSingletonLoader 409 } 410 411 // clientConfig is used to generate a new client configuration struct for 412 // initializing a Nomad client. 413 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 414 c, err := convertClientConfig(a.config) 415 if err != nil { 416 return nil, err 417 } 418 419 if err := a.finalizeClientConfig(c); err != nil { 420 return nil, err 421 } 422 423 return c, nil 424 } 425 426 // finalizeClientConfig sets configuration fields on the client config that are 427 // not staticly convertable and are from the agent. 428 func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error { 429 // Setup the logging 430 c.Logger = a.logger 431 c.LogOutput = a.logOutput 432 433 // If we are running a server, append both its bind and advertise address so 434 // we are able to at least talk to the local server even if that isn't 435 // configured explicitly. This handles both running server and client on one 436 // host and -dev mode. 437 if a.server != nil { 438 advertised := a.config.AdvertiseAddrs 439 normalized := a.config.normalizedAddrs 440 441 if advertised == nil || advertised.RPC == "" { 442 return fmt.Errorf("AdvertiseAddrs is nil or empty") 443 } else if normalized == nil || normalized.RPC == "" { 444 return fmt.Errorf("normalizedAddrs is nil or empty") 445 } 446 447 if normalized.RPC == advertised.RPC { 448 c.Servers = append(c.Servers, normalized.RPC) 449 } else { 450 c.Servers = append(c.Servers, normalized.RPC, advertised.RPC) 451 } 452 } 453 454 // Setup the plugin loaders 455 c.PluginLoader = a.pluginLoader 456 c.PluginSingletonLoader = a.pluginSingletonLoader 457 458 // Log deprecation messages about Consul related configuration in client 459 // options 460 var invalidConsulKeys []string 461 for key := range c.Options { 462 if strings.HasPrefix(key, "consul") { 463 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 464 } 465 } 466 if len(invalidConsulKeys) > 0 { 467 a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ",")) 468 a.logger.Warn(`Nomad client ignores consul related configuration in client options. 469 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 470 to configure Nomad to work with Consul.`) 471 } 472 473 return nil 474 } 475 476 // convertClientConfig takes an agent config and log output and returns a client 477 // Config. There may be missing fields that must be set by the agent. To do this 478 // call finalizeServerConfig 479 func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { 480 // Setup the configuration 481 conf := agentConfig.ClientConfig 482 if conf == nil { 483 conf = clientconfig.DefaultConfig() 484 } 485 486 conf.Servers = agentConfig.Client.Servers 487 conf.LogLevel = agentConfig.LogLevel 488 conf.DevMode = agentConfig.DevMode 489 conf.EnableDebug = agentConfig.EnableDebug 490 491 if agentConfig.Region != "" { 492 conf.Region = agentConfig.Region 493 } 494 if agentConfig.DataDir != "" { 495 conf.StateDir = filepath.Join(agentConfig.DataDir, "client") 496 conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") 497 } 498 if agentConfig.Client.StateDir != "" { 499 conf.StateDir = agentConfig.Client.StateDir 500 } 501 if agentConfig.Client.AllocDir != "" { 502 conf.AllocDir = agentConfig.Client.AllocDir 503 } 504 if agentConfig.Client.NetworkInterface != "" { 505 conf.NetworkInterface = agentConfig.Client.NetworkInterface 506 } 507 conf.ChrootEnv = agentConfig.Client.ChrootEnv 508 conf.Options = agentConfig.Client.Options 509 if agentConfig.Client.NetworkSpeed != 0 { 510 conf.NetworkSpeed = agentConfig.Client.NetworkSpeed 511 } 512 if agentConfig.Client.CpuCompute != 0 { 513 conf.CpuCompute = agentConfig.Client.CpuCompute 514 } 515 if agentConfig.Client.MemoryMB != 0 { 516 conf.MemoryMB = agentConfig.Client.MemoryMB 517 } 518 if agentConfig.Client.MaxKillTimeout != "" { 519 dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout) 520 if err != nil { 521 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 522 } 523 conf.MaxKillTimeout = dur 524 } 525 conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort) 526 conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort) 527 conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec 528 conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist 529 conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox 530 531 hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes)) 532 for _, v := range agentConfig.Client.HostVolumes { 533 hvMap[v.Name] = v 534 } 535 conf.HostVolumes = hvMap 536 537 // Setup the node 538 conf.Node = new(structs.Node) 539 conf.Node.Datacenter = agentConfig.Datacenter 540 conf.Node.Name = agentConfig.NodeName 541 conf.Node.Meta = agentConfig.Client.Meta 542 conf.Node.NodeClass = agentConfig.Client.NodeClass 543 544 // Set up the HTTP advertise address 545 conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP 546 547 // Canonicalize Node struct 548 conf.Node.Canonicalize() 549 550 // Reserve resources on the node. 551 // COMPAT(0.10): Remove in 0.10 552 r := conf.Node.Reserved 553 if r == nil { 554 r = new(structs.Resources) 555 conf.Node.Reserved = r 556 } 557 r.CPU = agentConfig.Client.Reserved.CPU 558 r.MemoryMB = agentConfig.Client.Reserved.MemoryMB 559 r.DiskMB = agentConfig.Client.Reserved.DiskMB 560 561 res := conf.Node.ReservedResources 562 if res == nil { 563 res = new(structs.NodeReservedResources) 564 conf.Node.ReservedResources = res 565 } 566 res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU) 567 res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB) 568 res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB) 569 res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts 570 571 conf.Version = agentConfig.Version 572 573 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" { 574 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 575 } 576 577 conf.ConsulConfig = agentConfig.Consul 578 conf.VaultConfig = agentConfig.Vault 579 580 // Set up Telemetry configuration 581 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 582 conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics 583 conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics 584 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 585 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 586 587 // Set the TLS related configs 588 conf.TLSConfig = agentConfig.TLSConfig 589 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 590 591 // Set the GC related configs 592 conf.GCInterval = agentConfig.Client.GCInterval 593 conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys 594 conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold 595 conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold 596 conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs 597 if agentConfig.Client.NoHostUUID != nil { 598 conf.NoHostUUID = *agentConfig.Client.NoHostUUID 599 } else { 600 // Default no_host_uuid to true 601 conf.NoHostUUID = true 602 } 603 604 // Setup the ACLs 605 conf.ACLEnabled = agentConfig.ACL.Enabled 606 conf.ACLTokenTTL = agentConfig.ACL.TokenTTL 607 conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL 608 609 // Setup networking configuration 610 conf.CNIPath = agentConfig.Client.CNIPath 611 conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName 612 conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet 613 614 return conf, nil 615 } 616 617 // setupServer is used to setup the server if enabled 618 func (a *Agent) setupServer() error { 619 if !a.config.Server.Enabled { 620 return nil 621 } 622 623 // Setup the configuration 624 conf, err := a.serverConfig() 625 if err != nil { 626 return fmt.Errorf("server config setup failed: %s", err) 627 } 628 629 // Generate a node ID and persist it if it is the first instance, otherwise 630 // read the persisted node ID. 631 if err := a.setupNodeID(conf); err != nil { 632 return fmt.Errorf("setting up server node ID failed: %s", err) 633 } 634 635 // Sets up the keyring for gossip encryption 636 if err := a.setupKeyrings(conf); err != nil { 637 return fmt.Errorf("failed to configure keyring: %v", err) 638 } 639 640 // Create the server 641 server, err := nomad.NewServer(conf, a.consulCatalog, a.consulACLs) 642 if err != nil { 643 return fmt.Errorf("server setup failed: %v", err) 644 } 645 a.server = server 646 647 // Consul check addresses default to bind but can be toggled to use advertise 648 rpcCheckAddr := a.config.normalizedAddrs.RPC 649 serfCheckAddr := a.config.normalizedAddrs.Serf 650 if *a.config.Consul.ChecksUseAdvertise { 651 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 652 serfCheckAddr = a.config.AdvertiseAddrs.Serf 653 } 654 655 // Create the Nomad Server services for Consul 656 if *a.config.Consul.AutoAdvertise { 657 httpServ := &structs.Service{ 658 Name: a.config.Consul.ServerServiceName, 659 PortLabel: a.config.AdvertiseAddrs.HTTP, 660 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 661 } 662 const isServer = true 663 if check := a.agentHTTPCheck(isServer); check != nil { 664 httpServ.Checks = []*structs.ServiceCheck{check} 665 } 666 rpcServ := &structs.Service{ 667 Name: a.config.Consul.ServerServiceName, 668 PortLabel: a.config.AdvertiseAddrs.RPC, 669 Tags: append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...), 670 Checks: []*structs.ServiceCheck{ 671 { 672 Name: a.config.Consul.ServerRPCCheckName, 673 Type: "tcp", 674 Interval: serverRpcCheckInterval, 675 Timeout: serverRpcCheckTimeout, 676 PortLabel: rpcCheckAddr, 677 }, 678 }, 679 } 680 serfServ := &structs.Service{ 681 Name: a.config.Consul.ServerServiceName, 682 PortLabel: a.config.AdvertiseAddrs.Serf, 683 Tags: append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...), 684 Checks: []*structs.ServiceCheck{ 685 { 686 Name: a.config.Consul.ServerSerfCheckName, 687 Type: "tcp", 688 Interval: serverSerfCheckInterval, 689 Timeout: serverSerfCheckTimeout, 690 PortLabel: serfCheckAddr, 691 }, 692 }, 693 } 694 695 // Add the http port check if TLS isn't enabled 696 consulServices := []*structs.Service{ 697 rpcServ, 698 serfServ, 699 httpServ, 700 } 701 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 702 return err 703 } 704 } 705 706 return nil 707 } 708 709 // setupNodeID will pull the persisted node ID, if any, or create a random one 710 // and persist it. 711 func (a *Agent) setupNodeID(config *nomad.Config) error { 712 // For dev mode we have no filesystem access so just make a node ID. 713 if a.config.DevMode { 714 config.NodeID = uuid.Generate() 715 return nil 716 } 717 718 // Load saved state, if any. Since a user could edit this, we also 719 // validate it. Saved state overwrites any configured node id 720 fileID := filepath.Join(config.DataDir, "node-id") 721 if _, err := os.Stat(fileID); err == nil { 722 rawID, err := ioutil.ReadFile(fileID) 723 if err != nil { 724 return err 725 } 726 727 nodeID := strings.TrimSpace(string(rawID)) 728 nodeID = strings.ToLower(nodeID) 729 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 730 return err 731 } 732 config.NodeID = nodeID 733 return nil 734 } 735 736 // If they've configured a node ID manually then just use that, as 737 // long as it's valid. 738 if config.NodeID != "" { 739 config.NodeID = strings.ToLower(config.NodeID) 740 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 741 return err 742 } 743 // Persist this configured nodeID to our data directory 744 if err := lib.EnsurePath(fileID, false); err != nil { 745 return err 746 } 747 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 748 return err 749 } 750 return nil 751 } 752 753 // If we still don't have a valid node ID, make one. 754 if config.NodeID == "" { 755 id := uuid.Generate() 756 if err := lib.EnsurePath(fileID, false); err != nil { 757 return err 758 } 759 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 760 return err 761 } 762 763 config.NodeID = id 764 } 765 return nil 766 } 767 768 // setupKeyrings is used to initialize and load keyrings during agent startup 769 func (a *Agent) setupKeyrings(config *nomad.Config) error { 770 file := filepath.Join(a.config.DataDir, serfKeyring) 771 772 if a.config.Server.EncryptKey == "" { 773 goto LOAD 774 } 775 if _, err := os.Stat(file); err != nil { 776 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 777 return err 778 } 779 } 780 781 LOAD: 782 if _, err := os.Stat(file); err == nil { 783 config.SerfConfig.KeyringFile = file 784 } 785 if err := loadKeyringFile(config.SerfConfig); err != nil { 786 return err 787 } 788 // Success! 789 return nil 790 } 791 792 // setupClient is used to setup the client if enabled 793 func (a *Agent) setupClient() error { 794 if !a.config.Client.Enabled { 795 return nil 796 } 797 798 // Setup the configuration 799 conf, err := a.clientConfig() 800 if err != nil { 801 return fmt.Errorf("client setup failed: %v", err) 802 } 803 804 // Reserve some ports for the plugins if we are on Windows 805 if runtime.GOOS == "windows" { 806 if err := a.reservePortsForClient(conf); err != nil { 807 return err 808 } 809 } 810 if conf.StateDBFactory == nil { 811 conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) 812 } 813 814 client, err := client.NewClient(conf, a.consulCatalog, a.consulService) 815 if err != nil { 816 return fmt.Errorf("client setup failed: %v", err) 817 } 818 a.client = client 819 820 // Create the Nomad Client services for Consul 821 if *a.config.Consul.AutoAdvertise { 822 httpServ := &structs.Service{ 823 Name: a.config.Consul.ClientServiceName, 824 PortLabel: a.config.AdvertiseAddrs.HTTP, 825 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 826 } 827 const isServer = false 828 if check := a.agentHTTPCheck(isServer); check != nil { 829 httpServ.Checks = []*structs.ServiceCheck{check} 830 } 831 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 832 return err 833 } 834 } 835 836 return nil 837 } 838 839 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 840 // If no HTTP health check can be supported nil is returned. 841 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 842 // Resolve the http check address 843 httpCheckAddr := a.config.normalizedAddrs.HTTP 844 if *a.config.Consul.ChecksUseAdvertise { 845 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 846 } 847 check := structs.ServiceCheck{ 848 Name: a.config.Consul.ClientHTTPCheckName, 849 Type: "http", 850 Path: "/v1/agent/health?type=client", 851 Protocol: "http", 852 Interval: agentHttpCheckInterval, 853 Timeout: agentHttpCheckTimeout, 854 PortLabel: httpCheckAddr, 855 } 856 // Switch to endpoint that doesn't require a leader for servers 857 if server { 858 check.Name = a.config.Consul.ServerHTTPCheckName 859 check.Path = "/v1/agent/health?type=server" 860 } 861 if !a.config.TLSConfig.EnableHTTP { 862 // No HTTPS, return a plain http check 863 return &check 864 } 865 if a.config.TLSConfig.VerifyHTTPSClient { 866 a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled") 867 return nil 868 } 869 870 // HTTPS enabled; skip verification 871 check.Protocol = "https" 872 check.TLSSkipVerify = true 873 return &check 874 } 875 876 // reservePortsForClient reserves a range of ports for the client to use when 877 // it creates various plugins for log collection, executors, drivers, etc 878 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 879 if conf.Node.ReservedResources == nil { 880 conf.Node.ReservedResources = &structs.NodeReservedResources{} 881 } 882 883 res := conf.Node.ReservedResources.Networks.ReservedHostPorts 884 if res == "" { 885 res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 886 } else { 887 res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 888 } 889 conf.Node.ReservedResources.Networks.ReservedHostPorts = res 890 return nil 891 } 892 893 // Leave is used gracefully exit. Clients will inform servers 894 // of their departure so that allocations can be rescheduled. 895 func (a *Agent) Leave() error { 896 if a.client != nil { 897 if err := a.client.Leave(); err != nil { 898 a.logger.Error("client leave failed", "error", err) 899 } 900 } 901 if a.server != nil { 902 if err := a.server.Leave(); err != nil { 903 a.logger.Error("server leave failed", "error", err) 904 } 905 } 906 return nil 907 } 908 909 // Shutdown is used to terminate the agent. 910 func (a *Agent) Shutdown() error { 911 a.shutdownLock.Lock() 912 defer a.shutdownLock.Unlock() 913 914 if a.shutdown { 915 return nil 916 } 917 918 a.logger.Info("requesting shutdown") 919 if a.client != nil { 920 if err := a.client.Shutdown(); err != nil { 921 a.logger.Error("client shutdown failed", "error", err) 922 } 923 } 924 if a.server != nil { 925 if err := a.server.Shutdown(); err != nil { 926 a.logger.Error("server shutdown failed", "error", err) 927 } 928 } 929 930 if err := a.consulService.Shutdown(); err != nil { 931 a.logger.Error("shutting down Consul client failed", "error", err) 932 } 933 934 a.logger.Info("shutdown complete") 935 a.shutdown = true 936 close(a.shutdownCh) 937 return nil 938 } 939 940 // RPC is used to make an RPC call to the Nomad servers 941 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 942 if a.server != nil { 943 return a.server.RPC(method, args, reply) 944 } 945 return a.client.RPC(method, args, reply) 946 } 947 948 // Client returns the configured client or nil 949 func (a *Agent) Client() *client.Client { 950 return a.client 951 } 952 953 // Server returns the configured server or nil 954 func (a *Agent) Server() *nomad.Server { 955 return a.server 956 } 957 958 // Stats is used to return statistics for debugging and insight 959 // for various sub-systems 960 func (a *Agent) Stats() map[string]map[string]string { 961 stats := make(map[string]map[string]string) 962 if a.server != nil { 963 subStat := a.server.Stats() 964 for k, v := range subStat { 965 stats[k] = v 966 } 967 } 968 if a.client != nil { 969 subStat := a.client.Stats() 970 for k, v := range subStat { 971 stats[k] = v 972 } 973 } 974 return stats 975 } 976 977 // ShouldReload determines if we should reload the configuration and agent 978 // connections. If the TLS Configuration has not changed, we shouldn't reload. 979 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 980 a.configLock.Lock() 981 defer a.configLock.Unlock() 982 983 if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel { 984 agent = true 985 } 986 987 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 988 if err != nil { 989 a.logger.Error("parsing TLS certificate", "error", err) 990 return agent, false 991 } else if !isEqual { 992 return true, true 993 } 994 995 // Allow the ability to only reload HTTP connections 996 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 997 http = true 998 agent = true 999 } 1000 1001 // Allow the ability to only reload HTTP connections 1002 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 1003 agent = true 1004 } 1005 1006 return agent, http 1007 } 1008 1009 // Reload handles configuration changes for the agent. Provides a method that 1010 // is easier to unit test, as this action is invoked via SIGHUP. 1011 func (a *Agent) Reload(newConfig *Config) error { 1012 a.configLock.Lock() 1013 defer a.configLock.Unlock() 1014 1015 updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel) 1016 1017 if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging { 1018 return fmt.Errorf("cannot reload agent with nil configuration") 1019 } 1020 1021 if updatedLogging { 1022 a.config.LogLevel = newConfig.LogLevel 1023 a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel)) 1024 } 1025 1026 // Update eventer config 1027 if newConfig.Audit != nil { 1028 if err := a.entReloadEventer(newConfig.Audit); err != nil { 1029 return err 1030 } 1031 } 1032 // Allow auditor to call reopen regardless of config changes 1033 // This is primarily for enterprise audit logging to allow the underlying 1034 // file to be reopened if necessary 1035 if err := a.auditor.Reopen(); err != nil { 1036 return err 1037 } 1038 1039 fullUpdateTLSConfig := func() { 1040 // Completely reload the agent's TLS configuration (moving from non-TLS to 1041 // TLS, or vice versa) 1042 // This does not handle errors in loading the new TLS configuration 1043 a.config.TLSConfig = newConfig.TLSConfig.Copy() 1044 } 1045 1046 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 1047 // This is just a TLS configuration reload, we don't need to refresh 1048 // existing network connections 1049 1050 // Reload the certificates on the keyloader and on success store the 1051 // updated TLS config. It is important to reuse the same keyloader 1052 // as this allows us to dynamically reload configurations not only 1053 // on the Agent but on the Server and Client too (they are 1054 // referencing the same keyloader). 1055 keyloader := a.config.TLSConfig.GetKeyLoader() 1056 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 1057 if err != nil { 1058 return err 1059 } 1060 a.config.TLSConfig = newConfig.TLSConfig 1061 a.config.TLSConfig.KeyLoader = keyloader 1062 return nil 1063 } else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() { 1064 a.logger.Warn("downgrading agent's existing TLS configuration to plaintext") 1065 fullUpdateTLSConfig() 1066 } else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() { 1067 a.logger.Info("upgrading from plaintext configuration to TLS") 1068 fullUpdateTLSConfig() 1069 } 1070 1071 return nil 1072 } 1073 1074 // GetConfig creates a locked reference to the agent's config 1075 func (a *Agent) GetConfig() *Config { 1076 a.configLock.Lock() 1077 defer a.configLock.Unlock() 1078 1079 return a.config 1080 } 1081 1082 // setupConsul creates the Consul client and starts its main Run loop. 1083 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 1084 apiConf, err := consulConfig.ApiConfig() 1085 if err != nil { 1086 return err 1087 } 1088 client, err := api.NewClient(apiConf) 1089 if err != nil { 1090 return err 1091 } 1092 1093 // Create Consul Catalog client for service discovery. 1094 a.consulCatalog = client.Catalog() 1095 1096 // Create Consul ACL client for managing tokens. 1097 a.consulACLs = client.ACL() 1098 1099 // Create Consul Service client for service advertisement and checks. 1100 isClient := false 1101 if a.config.Client != nil && a.config.Client.Enabled { 1102 isClient = true 1103 } 1104 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 1105 1106 // Run the Consul service client's sync'ing main loop 1107 go a.consulService.Run() 1108 return nil 1109 } 1110 1111 // noOpAuditor is a no-op Auditor that fulfills the 1112 // event.Auditor interface. 1113 type noOpAuditor struct{} 1114 1115 // Ensure noOpAuditor is an Auditor 1116 var _ event.Auditor = &noOpAuditor{} 1117 1118 func (e *noOpAuditor) Event(ctx context.Context, eventType string, payload interface{}) error { 1119 return nil 1120 } 1121 1122 func (e *noOpAuditor) Enabled() bool { 1123 return false 1124 } 1125 1126 func (e *noOpAuditor) Reopen() error { 1127 return nil 1128 } 1129 1130 func (e *noOpAuditor) SetEnabled(enabled bool) {} 1131 1132 func (e *noOpAuditor) DeliveryEnforced() bool { return false }