github.com/thomasobenaus/nomad@v0.11.1/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "io/ioutil" 8 golog "log" 9 "net" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "sync" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 log "github.com/hashicorp/go-hclog" 21 uuidparse "github.com/hashicorp/go-uuid" 22 "github.com/hashicorp/nomad/client" 23 clientconfig "github.com/hashicorp/nomad/client/config" 24 "github.com/hashicorp/nomad/client/state" 25 "github.com/hashicorp/nomad/command/agent/consul" 26 "github.com/hashicorp/nomad/command/agent/event" 27 "github.com/hashicorp/nomad/helper/pluginutils/loader" 28 "github.com/hashicorp/nomad/helper/uuid" 29 "github.com/hashicorp/nomad/nomad" 30 "github.com/hashicorp/nomad/nomad/structs" 31 "github.com/hashicorp/nomad/nomad/structs/config" 32 "github.com/hashicorp/raft" 33 ) 34 35 const ( 36 agentHttpCheckInterval = 10 * time.Second 37 agentHttpCheckTimeout = 5 * time.Second 38 serverRpcCheckInterval = 10 * time.Second 39 serverRpcCheckTimeout = 3 * time.Second 40 serverSerfCheckInterval = 10 * time.Second 41 serverSerfCheckTimeout = 3 * time.Second 42 43 // roles used in identifying Consul entries for Nomad agents 44 consulRoleServer = "server" 45 consulRoleClient = "client" 46 ) 47 48 // Agent is a long running daemon that is used to run both 49 // clients and servers. Servers are responsible for managing 50 // state and making scheduling decisions. Clients can be 51 // scheduled to, and are responsible for interfacing with 52 // servers to run allocations. 53 type Agent struct { 54 config *Config 55 configLock sync.Mutex 56 57 logger log.InterceptLogger 58 auditor event.Auditor 59 httpLogger log.Logger 60 logOutput io.Writer 61 62 // consulService is Nomad's custom Consul client for managing services 63 // and checks. 64 consulService *consul.ServiceClient 65 66 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 67 consulCatalog consul.CatalogAPI 68 69 // consulACLs is Nomad's subset of Consul's ACL API Nomad uses. 70 consulACLs consul.ACLsAPI 71 72 // client is the launched Nomad Client. Can be nil if the agent isn't 73 // configured to run a client. 74 client *client.Client 75 76 // server is the launched Nomad Server. Can be nil if the agent isn't 77 // configured to run a server. 78 server *nomad.Server 79 80 // pluginLoader is used to load plugins 81 pluginLoader loader.PluginCatalog 82 83 // pluginSingletonLoader is a plugin loader that will returns singleton 84 // instances of the plugins. 85 pluginSingletonLoader loader.PluginCatalog 86 87 shutdown bool 88 shutdownCh chan struct{} 89 shutdownLock sync.Mutex 90 91 InmemSink *metrics.InmemSink 92 } 93 94 // NewAgent is used to create a new agent with the given configuration 95 func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 96 a := &Agent{ 97 config: config, 98 logOutput: logOutput, 99 shutdownCh: make(chan struct{}), 100 InmemSink: inmem, 101 } 102 103 // Create the loggers 104 a.logger = logger 105 a.httpLogger = a.logger.ResetNamed("http") 106 107 // Global logger should match internal logger as much as possible 108 golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds) 109 110 if err := a.setupConsul(config.Consul); err != nil { 111 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 112 } 113 114 if err := a.setupPlugins(); err != nil { 115 return nil, err 116 } 117 118 if err := a.setupServer(); err != nil { 119 return nil, err 120 } 121 if err := a.setupClient(); err != nil { 122 return nil, err 123 } 124 if err := a.setupEnterpriseAgent(logger); err != nil { 125 return nil, err 126 } 127 if a.client == nil && a.server == nil { 128 return nil, fmt.Errorf("must have at least client or server mode enabled") 129 } 130 131 return a, nil 132 } 133 134 // convertServerConfig takes an agent config and log output and returns a Nomad 135 // Config. There may be missing fields that must be set by the agent. To do this 136 // call finalizeServerConfig 137 func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { 138 conf := agentConfig.NomadConfig 139 if conf == nil { 140 conf = nomad.DefaultConfig() 141 } 142 conf.DevMode = agentConfig.DevMode 143 conf.EnableDebug = agentConfig.EnableDebug 144 145 conf.Build = agentConfig.Version.VersionNumber() 146 if agentConfig.Region != "" { 147 conf.Region = agentConfig.Region 148 } 149 150 // Set the Authoritative Region if set, otherwise default to 151 // the same as the local region. 152 if agentConfig.Server.AuthoritativeRegion != "" { 153 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 154 } else if agentConfig.Region != "" { 155 conf.AuthoritativeRegion = agentConfig.Region 156 } 157 158 if agentConfig.Datacenter != "" { 159 conf.Datacenter = agentConfig.Datacenter 160 } 161 if agentConfig.NodeName != "" { 162 conf.NodeName = agentConfig.NodeName 163 } 164 if agentConfig.Server.BootstrapExpect > 0 { 165 conf.BootstrapExpect = agentConfig.Server.BootstrapExpect 166 } 167 if agentConfig.DataDir != "" { 168 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 169 } 170 if agentConfig.Server.DataDir != "" { 171 conf.DataDir = agentConfig.Server.DataDir 172 } 173 if agentConfig.Server.ProtocolVersion != 0 { 174 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 175 } 176 if agentConfig.Server.RaftProtocol != 0 { 177 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 178 } 179 if agentConfig.Server.NumSchedulers != nil { 180 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 181 } 182 if len(agentConfig.Server.EnabledSchedulers) != 0 { 183 // Convert to a set and require the core scheduler 184 set := make(map[string]struct{}, 4) 185 set[structs.JobTypeCore] = struct{}{} 186 for _, sched := range agentConfig.Server.EnabledSchedulers { 187 set[sched] = struct{}{} 188 } 189 190 schedulers := make([]string, 0, len(set)) 191 for k := range set { 192 schedulers = append(schedulers, k) 193 } 194 195 conf.EnabledSchedulers = schedulers 196 197 } 198 if agentConfig.ACL.Enabled { 199 conf.ACLEnabled = true 200 } 201 if agentConfig.ACL.ReplicationToken != "" { 202 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 203 } 204 if agentConfig.Sentinel != nil { 205 conf.SentinelConfig = agentConfig.Sentinel 206 } 207 if agentConfig.Server.NonVotingServer { 208 conf.NonVoter = true 209 } 210 if agentConfig.Server.RedundancyZone != "" { 211 conf.RedundancyZone = agentConfig.Server.RedundancyZone 212 } 213 if agentConfig.Server.UpgradeVersion != "" { 214 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 215 } 216 if agentConfig.Autopilot != nil { 217 if agentConfig.Autopilot.CleanupDeadServers != nil { 218 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 219 } 220 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 221 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 222 } 223 if agentConfig.Autopilot.LastContactThreshold != 0 { 224 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 225 } 226 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 227 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 228 } 229 if agentConfig.Autopilot.MinQuorum != 0 { 230 conf.AutopilotConfig.MinQuorum = uint(agentConfig.Autopilot.MinQuorum) 231 } 232 if agentConfig.Autopilot.EnableRedundancyZones != nil { 233 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 234 } 235 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 236 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 237 } 238 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 239 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 240 } 241 } 242 243 // Set up the bind addresses 244 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 245 if err != nil { 246 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 247 } 248 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 249 if err != nil { 250 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 251 } 252 conf.RPCAddr.Port = rpcAddr.Port 253 conf.RPCAddr.IP = rpcAddr.IP 254 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 255 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 256 257 // Set up the advertise addresses 258 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 259 if err != nil { 260 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 261 } 262 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 263 if err != nil { 264 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 265 } 266 267 // Server address is the serf advertise address and rpc port. This is the 268 // address that all servers should be able to communicate over RPC with. 269 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 270 if err != nil { 271 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 272 } 273 274 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 275 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 276 conf.ClientRPCAdvertise = rpcAddr 277 conf.ServerRPCAdvertise = serverAddr 278 279 // Set up gc threshold and heartbeat grace period 280 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 281 dur, err := time.ParseDuration(gcThreshold) 282 if err != nil { 283 return nil, err 284 } 285 conf.NodeGCThreshold = dur 286 } 287 if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" { 288 dur, err := time.ParseDuration(gcInterval) 289 if err != nil { 290 return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err) 291 } else if dur <= time.Duration(0) { 292 return nil, fmt.Errorf("job_gc_interval should be greater than 0s") 293 } 294 conf.JobGCInterval = dur 295 } 296 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 297 dur, err := time.ParseDuration(gcThreshold) 298 if err != nil { 299 return nil, err 300 } 301 conf.JobGCThreshold = dur 302 } 303 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 304 dur, err := time.ParseDuration(gcThreshold) 305 if err != nil { 306 return nil, err 307 } 308 conf.EvalGCThreshold = dur 309 } 310 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 311 dur, err := time.ParseDuration(gcThreshold) 312 if err != nil { 313 return nil, err 314 } 315 conf.DeploymentGCThreshold = dur 316 } 317 318 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 319 conf.HeartbeatGrace = heartbeatGrace 320 } 321 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 322 conf.MinHeartbeatTTL = min 323 } 324 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 325 conf.MaxHeartbeatsPerSecond = maxHPS 326 } 327 328 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 329 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 330 } 331 332 // handle system scheduler preemption default 333 if agentConfig.Server.DefaultSchedulerConfig != nil { 334 conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig 335 } 336 337 // Add the Consul and Vault configs 338 conf.ConsulConfig = agentConfig.Consul 339 conf.VaultConfig = agentConfig.Vault 340 341 // Set the TLS config 342 conf.TLSConfig = agentConfig.TLSConfig 343 344 // Setup telemetry related config 345 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 346 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 347 conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics 348 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 349 350 // Parse Limits timeout from a string into durations 351 if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil { 352 return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err) 353 } else if d < 0 { 354 return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0") 355 } else { 356 conf.RPCHandshakeTimeout = d 357 } 358 359 // Set max rpc conns; nil/0 == unlimited 360 // Leave a little room for streaming RPCs 361 minLimit := config.LimitsNonStreamingConnsPerClient + 5 362 if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 { 363 conf.RPCMaxConnsPerClient = 0 364 } else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit { 365 return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit) 366 } else { 367 conf.RPCMaxConnsPerClient = limit 368 } 369 370 return conf, nil 371 } 372 373 // serverConfig is used to generate a new server configuration struct 374 // for initializing a nomad server. 375 func (a *Agent) serverConfig() (*nomad.Config, error) { 376 c, err := convertServerConfig(a.config) 377 if err != nil { 378 return nil, err 379 } 380 381 a.finalizeServerConfig(c) 382 return c, nil 383 } 384 385 // finalizeServerConfig sets configuration fields on the server config that are 386 // not staticly convertable and are from the agent. 387 func (a *Agent) finalizeServerConfig(c *nomad.Config) { 388 // Setup the logging 389 c.Logger = a.logger 390 c.LogOutput = a.logOutput 391 392 // Setup the plugin loaders 393 c.PluginLoader = a.pluginLoader 394 c.PluginSingletonLoader = a.pluginSingletonLoader 395 } 396 397 // clientConfig is used to generate a new client configuration struct for 398 // initializing a Nomad client. 399 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 400 c, err := convertClientConfig(a.config) 401 if err != nil { 402 return nil, err 403 } 404 405 if err := a.finalizeClientConfig(c); err != nil { 406 return nil, err 407 } 408 409 return c, nil 410 } 411 412 // finalizeClientConfig sets configuration fields on the client config that are 413 // not staticly convertable and are from the agent. 414 func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error { 415 // Setup the logging 416 c.Logger = a.logger 417 c.LogOutput = a.logOutput 418 419 // If we are running a server, append both its bind and advertise address so 420 // we are able to at least talk to the local server even if that isn't 421 // configured explicitly. This handles both running server and client on one 422 // host and -dev mode. 423 if a.server != nil { 424 advertised := a.config.AdvertiseAddrs 425 normalized := a.config.normalizedAddrs 426 427 if advertised == nil || advertised.RPC == "" { 428 return fmt.Errorf("AdvertiseAddrs is nil or empty") 429 } else if normalized == nil || normalized.RPC == "" { 430 return fmt.Errorf("normalizedAddrs is nil or empty") 431 } 432 433 if normalized.RPC == advertised.RPC { 434 c.Servers = append(c.Servers, normalized.RPC) 435 } else { 436 c.Servers = append(c.Servers, normalized.RPC, advertised.RPC) 437 } 438 } 439 440 // Setup the plugin loaders 441 c.PluginLoader = a.pluginLoader 442 c.PluginSingletonLoader = a.pluginSingletonLoader 443 444 // Log deprecation messages about Consul related configuration in client 445 // options 446 var invalidConsulKeys []string 447 for key := range c.Options { 448 if strings.HasPrefix(key, "consul") { 449 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 450 } 451 } 452 if len(invalidConsulKeys) > 0 { 453 a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ",")) 454 a.logger.Warn(`Nomad client ignores consul related configuration in client options. 455 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 456 to configure Nomad to work with Consul.`) 457 } 458 459 return nil 460 } 461 462 // convertClientConfig takes an agent config and log output and returns a client 463 // Config. There may be missing fields that must be set by the agent. To do this 464 // call finalizeServerConfig 465 func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { 466 // Setup the configuration 467 conf := agentConfig.ClientConfig 468 if conf == nil { 469 conf = clientconfig.DefaultConfig() 470 } 471 472 conf.Servers = agentConfig.Client.Servers 473 conf.LogLevel = agentConfig.LogLevel 474 conf.DevMode = agentConfig.DevMode 475 conf.EnableDebug = agentConfig.EnableDebug 476 477 if agentConfig.Region != "" { 478 conf.Region = agentConfig.Region 479 } 480 if agentConfig.DataDir != "" { 481 conf.StateDir = filepath.Join(agentConfig.DataDir, "client") 482 conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") 483 } 484 if agentConfig.Client.StateDir != "" { 485 conf.StateDir = agentConfig.Client.StateDir 486 } 487 if agentConfig.Client.AllocDir != "" { 488 conf.AllocDir = agentConfig.Client.AllocDir 489 } 490 if agentConfig.Client.NetworkInterface != "" { 491 conf.NetworkInterface = agentConfig.Client.NetworkInterface 492 } 493 conf.ChrootEnv = agentConfig.Client.ChrootEnv 494 conf.Options = agentConfig.Client.Options 495 if agentConfig.Client.NetworkSpeed != 0 { 496 conf.NetworkSpeed = agentConfig.Client.NetworkSpeed 497 } 498 if agentConfig.Client.CpuCompute != 0 { 499 conf.CpuCompute = agentConfig.Client.CpuCompute 500 } 501 if agentConfig.Client.MemoryMB != 0 { 502 conf.MemoryMB = agentConfig.Client.MemoryMB 503 } 504 if agentConfig.Client.MaxKillTimeout != "" { 505 dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout) 506 if err != nil { 507 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 508 } 509 conf.MaxKillTimeout = dur 510 } 511 conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort) 512 conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort) 513 conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec 514 conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist 515 conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox 516 517 hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes)) 518 for _, v := range agentConfig.Client.HostVolumes { 519 hvMap[v.Name] = v 520 } 521 conf.HostVolumes = hvMap 522 523 // Setup the node 524 conf.Node = new(structs.Node) 525 conf.Node.Datacenter = agentConfig.Datacenter 526 conf.Node.Name = agentConfig.NodeName 527 conf.Node.Meta = agentConfig.Client.Meta 528 conf.Node.NodeClass = agentConfig.Client.NodeClass 529 530 // Set up the HTTP advertise address 531 conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP 532 533 // Canonicalize Node struct 534 conf.Node.Canonicalize() 535 536 // Reserve resources on the node. 537 // COMPAT(0.10): Remove in 0.10 538 r := conf.Node.Reserved 539 if r == nil { 540 r = new(structs.Resources) 541 conf.Node.Reserved = r 542 } 543 r.CPU = agentConfig.Client.Reserved.CPU 544 r.MemoryMB = agentConfig.Client.Reserved.MemoryMB 545 r.DiskMB = agentConfig.Client.Reserved.DiskMB 546 547 res := conf.Node.ReservedResources 548 if res == nil { 549 res = new(structs.NodeReservedResources) 550 conf.Node.ReservedResources = res 551 } 552 res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU) 553 res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB) 554 res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB) 555 res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts 556 557 conf.Version = agentConfig.Version 558 559 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" { 560 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 561 } 562 563 conf.ConsulConfig = agentConfig.Consul 564 conf.VaultConfig = agentConfig.Vault 565 566 // Set up Telemetry configuration 567 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 568 conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics 569 conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics 570 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 571 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 572 573 // Set the TLS related configs 574 conf.TLSConfig = agentConfig.TLSConfig 575 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 576 577 // Set the GC related configs 578 conf.GCInterval = agentConfig.Client.GCInterval 579 conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys 580 conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold 581 conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold 582 conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs 583 if agentConfig.Client.NoHostUUID != nil { 584 conf.NoHostUUID = *agentConfig.Client.NoHostUUID 585 } else { 586 // Default no_host_uuid to true 587 conf.NoHostUUID = true 588 } 589 590 // Setup the ACLs 591 conf.ACLEnabled = agentConfig.ACL.Enabled 592 conf.ACLTokenTTL = agentConfig.ACL.TokenTTL 593 conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL 594 595 // Setup networking configuration 596 conf.CNIPath = agentConfig.Client.CNIPath 597 conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName 598 conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet 599 600 return conf, nil 601 } 602 603 // setupServer is used to setup the server if enabled 604 func (a *Agent) setupServer() error { 605 if !a.config.Server.Enabled { 606 return nil 607 } 608 609 // Setup the configuration 610 conf, err := a.serverConfig() 611 if err != nil { 612 return fmt.Errorf("server config setup failed: %s", err) 613 } 614 615 // Generate a node ID and persist it if it is the first instance, otherwise 616 // read the persisted node ID. 617 if err := a.setupNodeID(conf); err != nil { 618 return fmt.Errorf("setting up server node ID failed: %s", err) 619 } 620 621 // Sets up the keyring for gossip encryption 622 if err := a.setupKeyrings(conf); err != nil { 623 return fmt.Errorf("failed to configure keyring: %v", err) 624 } 625 626 // Create the server 627 server, err := nomad.NewServer(conf, a.consulCatalog, a.consulACLs) 628 if err != nil { 629 return fmt.Errorf("server setup failed: %v", err) 630 } 631 a.server = server 632 633 // Consul check addresses default to bind but can be toggled to use advertise 634 rpcCheckAddr := a.config.normalizedAddrs.RPC 635 serfCheckAddr := a.config.normalizedAddrs.Serf 636 if *a.config.Consul.ChecksUseAdvertise { 637 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 638 serfCheckAddr = a.config.AdvertiseAddrs.Serf 639 } 640 641 // Create the Nomad Server services for Consul 642 if *a.config.Consul.AutoAdvertise { 643 httpServ := &structs.Service{ 644 Name: a.config.Consul.ServerServiceName, 645 PortLabel: a.config.AdvertiseAddrs.HTTP, 646 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 647 } 648 const isServer = true 649 if check := a.agentHTTPCheck(isServer); check != nil { 650 httpServ.Checks = []*structs.ServiceCheck{check} 651 } 652 rpcServ := &structs.Service{ 653 Name: a.config.Consul.ServerServiceName, 654 PortLabel: a.config.AdvertiseAddrs.RPC, 655 Tags: append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...), 656 Checks: []*structs.ServiceCheck{ 657 { 658 Name: a.config.Consul.ServerRPCCheckName, 659 Type: "tcp", 660 Interval: serverRpcCheckInterval, 661 Timeout: serverRpcCheckTimeout, 662 PortLabel: rpcCheckAddr, 663 }, 664 }, 665 } 666 serfServ := &structs.Service{ 667 Name: a.config.Consul.ServerServiceName, 668 PortLabel: a.config.AdvertiseAddrs.Serf, 669 Tags: append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...), 670 Checks: []*structs.ServiceCheck{ 671 { 672 Name: a.config.Consul.ServerSerfCheckName, 673 Type: "tcp", 674 Interval: serverSerfCheckInterval, 675 Timeout: serverSerfCheckTimeout, 676 PortLabel: serfCheckAddr, 677 }, 678 }, 679 } 680 681 // Add the http port check if TLS isn't enabled 682 consulServices := []*structs.Service{ 683 rpcServ, 684 serfServ, 685 httpServ, 686 } 687 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 688 return err 689 } 690 } 691 692 return nil 693 } 694 695 // setupNodeID will pull the persisted node ID, if any, or create a random one 696 // and persist it. 697 func (a *Agent) setupNodeID(config *nomad.Config) error { 698 // For dev mode we have no filesystem access so just make a node ID. 699 if a.config.DevMode { 700 config.NodeID = uuid.Generate() 701 return nil 702 } 703 704 // Load saved state, if any. Since a user could edit this, we also 705 // validate it. Saved state overwrites any configured node id 706 fileID := filepath.Join(config.DataDir, "node-id") 707 if _, err := os.Stat(fileID); err == nil { 708 rawID, err := ioutil.ReadFile(fileID) 709 if err != nil { 710 return err 711 } 712 713 nodeID := strings.TrimSpace(string(rawID)) 714 nodeID = strings.ToLower(nodeID) 715 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 716 return err 717 } 718 config.NodeID = nodeID 719 return nil 720 } 721 722 // If they've configured a node ID manually then just use that, as 723 // long as it's valid. 724 if config.NodeID != "" { 725 config.NodeID = strings.ToLower(config.NodeID) 726 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 727 return err 728 } 729 // Persist this configured nodeID to our data directory 730 if err := lib.EnsurePath(fileID, false); err != nil { 731 return err 732 } 733 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 734 return err 735 } 736 return nil 737 } 738 739 // If we still don't have a valid node ID, make one. 740 if config.NodeID == "" { 741 id := uuid.Generate() 742 if err := lib.EnsurePath(fileID, false); err != nil { 743 return err 744 } 745 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 746 return err 747 } 748 749 config.NodeID = id 750 } 751 return nil 752 } 753 754 // setupKeyrings is used to initialize and load keyrings during agent startup 755 func (a *Agent) setupKeyrings(config *nomad.Config) error { 756 file := filepath.Join(a.config.DataDir, serfKeyring) 757 758 if a.config.Server.EncryptKey == "" { 759 goto LOAD 760 } 761 if _, err := os.Stat(file); err != nil { 762 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 763 return err 764 } 765 } 766 767 LOAD: 768 if _, err := os.Stat(file); err == nil { 769 config.SerfConfig.KeyringFile = file 770 } 771 if err := loadKeyringFile(config.SerfConfig); err != nil { 772 return err 773 } 774 // Success! 775 return nil 776 } 777 778 // setupClient is used to setup the client if enabled 779 func (a *Agent) setupClient() error { 780 if !a.config.Client.Enabled { 781 return nil 782 } 783 784 // Setup the configuration 785 conf, err := a.clientConfig() 786 if err != nil { 787 return fmt.Errorf("client setup failed: %v", err) 788 } 789 790 // Reserve some ports for the plugins if we are on Windows 791 if runtime.GOOS == "windows" { 792 if err := a.reservePortsForClient(conf); err != nil { 793 return err 794 } 795 } 796 if conf.StateDBFactory == nil { 797 conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) 798 } 799 800 client, err := client.NewClient(conf, a.consulCatalog, a.consulService) 801 if err != nil { 802 return fmt.Errorf("client setup failed: %v", err) 803 } 804 a.client = client 805 806 // Create the Nomad Client services for Consul 807 if *a.config.Consul.AutoAdvertise { 808 httpServ := &structs.Service{ 809 Name: a.config.Consul.ClientServiceName, 810 PortLabel: a.config.AdvertiseAddrs.HTTP, 811 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 812 } 813 const isServer = false 814 if check := a.agentHTTPCheck(isServer); check != nil { 815 httpServ.Checks = []*structs.ServiceCheck{check} 816 } 817 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 818 return err 819 } 820 } 821 822 return nil 823 } 824 825 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 826 // If no HTTP health check can be supported nil is returned. 827 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 828 // Resolve the http check address 829 httpCheckAddr := a.config.normalizedAddrs.HTTP 830 if *a.config.Consul.ChecksUseAdvertise { 831 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 832 } 833 check := structs.ServiceCheck{ 834 Name: a.config.Consul.ClientHTTPCheckName, 835 Type: "http", 836 Path: "/v1/agent/health?type=client", 837 Protocol: "http", 838 Interval: agentHttpCheckInterval, 839 Timeout: agentHttpCheckTimeout, 840 PortLabel: httpCheckAddr, 841 } 842 // Switch to endpoint that doesn't require a leader for servers 843 if server { 844 check.Name = a.config.Consul.ServerHTTPCheckName 845 check.Path = "/v1/agent/health?type=server" 846 } 847 if !a.config.TLSConfig.EnableHTTP { 848 // No HTTPS, return a plain http check 849 return &check 850 } 851 if a.config.TLSConfig.VerifyHTTPSClient { 852 a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled") 853 return nil 854 } 855 856 // HTTPS enabled; skip verification 857 check.Protocol = "https" 858 check.TLSSkipVerify = true 859 return &check 860 } 861 862 // reservePortsForClient reserves a range of ports for the client to use when 863 // it creates various plugins for log collection, executors, drivers, etc 864 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 865 if conf.Node.ReservedResources == nil { 866 conf.Node.ReservedResources = &structs.NodeReservedResources{} 867 } 868 869 res := conf.Node.ReservedResources.Networks.ReservedHostPorts 870 if res == "" { 871 res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 872 } else { 873 res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 874 } 875 conf.Node.ReservedResources.Networks.ReservedHostPorts = res 876 return nil 877 } 878 879 // Leave is used gracefully exit. Clients will inform servers 880 // of their departure so that allocations can be rescheduled. 881 func (a *Agent) Leave() error { 882 if a.client != nil { 883 if err := a.client.Leave(); err != nil { 884 a.logger.Error("client leave failed", "error", err) 885 } 886 } 887 if a.server != nil { 888 if err := a.server.Leave(); err != nil { 889 a.logger.Error("server leave failed", "error", err) 890 } 891 } 892 return nil 893 } 894 895 // Shutdown is used to terminate the agent. 896 func (a *Agent) Shutdown() error { 897 a.shutdownLock.Lock() 898 defer a.shutdownLock.Unlock() 899 900 if a.shutdown { 901 return nil 902 } 903 904 a.logger.Info("requesting shutdown") 905 if a.client != nil { 906 if err := a.client.Shutdown(); err != nil { 907 a.logger.Error("client shutdown failed", "error", err) 908 } 909 } 910 if a.server != nil { 911 if err := a.server.Shutdown(); err != nil { 912 a.logger.Error("server shutdown failed", "error", err) 913 } 914 } 915 916 if err := a.consulService.Shutdown(); err != nil { 917 a.logger.Error("shutting down Consul client failed", "error", err) 918 } 919 920 a.logger.Info("shutdown complete") 921 a.shutdown = true 922 close(a.shutdownCh) 923 return nil 924 } 925 926 // RPC is used to make an RPC call to the Nomad servers 927 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 928 if a.server != nil { 929 return a.server.RPC(method, args, reply) 930 } 931 return a.client.RPC(method, args, reply) 932 } 933 934 // Client returns the configured client or nil 935 func (a *Agent) Client() *client.Client { 936 return a.client 937 } 938 939 // Server returns the configured server or nil 940 func (a *Agent) Server() *nomad.Server { 941 return a.server 942 } 943 944 // Stats is used to return statistics for debugging and insight 945 // for various sub-systems 946 func (a *Agent) Stats() map[string]map[string]string { 947 stats := make(map[string]map[string]string) 948 if a.server != nil { 949 subStat := a.server.Stats() 950 for k, v := range subStat { 951 stats[k] = v 952 } 953 } 954 if a.client != nil { 955 subStat := a.client.Stats() 956 for k, v := range subStat { 957 stats[k] = v 958 } 959 } 960 return stats 961 } 962 963 // ShouldReload determines if we should reload the configuration and agent 964 // connections. If the TLS Configuration has not changed, we shouldn't reload. 965 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 966 a.configLock.Lock() 967 defer a.configLock.Unlock() 968 969 if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel { 970 agent = true 971 } 972 973 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 974 if err != nil { 975 a.logger.Error("parsing TLS certificate", "error", err) 976 return agent, false 977 } else if !isEqual { 978 return true, true 979 } 980 981 // Allow the ability to only reload HTTP connections 982 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 983 http = true 984 agent = true 985 } 986 987 // Allow the ability to only reload HTTP connections 988 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 989 agent = true 990 } 991 992 return agent, http 993 } 994 995 // Reload handles configuration changes for the agent. Provides a method that 996 // is easier to unit test, as this action is invoked via SIGHUP. 997 func (a *Agent) Reload(newConfig *Config) error { 998 a.configLock.Lock() 999 defer a.configLock.Unlock() 1000 1001 updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel) 1002 1003 if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging { 1004 return fmt.Errorf("cannot reload agent with nil configuration") 1005 } 1006 1007 if updatedLogging { 1008 a.config.LogLevel = newConfig.LogLevel 1009 a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel)) 1010 } 1011 1012 // Update eventer config 1013 if newConfig.Audit != nil { 1014 if err := a.entReloadEventer(newConfig.Audit); err != nil { 1015 return err 1016 } 1017 } 1018 // Allow auditor to call reopen regardless of config changes 1019 // This is primarily for enterprise audit logging to allow the underlying 1020 // file to be reopened if necessary 1021 if err := a.auditor.Reopen(); err != nil { 1022 return err 1023 } 1024 1025 fullUpdateTLSConfig := func() { 1026 // Completely reload the agent's TLS configuration (moving from non-TLS to 1027 // TLS, or vice versa) 1028 // This does not handle errors in loading the new TLS configuration 1029 a.config.TLSConfig = newConfig.TLSConfig.Copy() 1030 } 1031 1032 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 1033 // This is just a TLS configuration reload, we don't need to refresh 1034 // existing network connections 1035 1036 // Reload the certificates on the keyloader and on success store the 1037 // updated TLS config. It is important to reuse the same keyloader 1038 // as this allows us to dynamically reload configurations not only 1039 // on the Agent but on the Server and Client too (they are 1040 // referencing the same keyloader). 1041 keyloader := a.config.TLSConfig.GetKeyLoader() 1042 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 1043 if err != nil { 1044 return err 1045 } 1046 a.config.TLSConfig = newConfig.TLSConfig 1047 a.config.TLSConfig.KeyLoader = keyloader 1048 return nil 1049 } else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() { 1050 a.logger.Warn("downgrading agent's existing TLS configuration to plaintext") 1051 fullUpdateTLSConfig() 1052 } else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() { 1053 a.logger.Info("upgrading from plaintext configuration to TLS") 1054 fullUpdateTLSConfig() 1055 } 1056 1057 return nil 1058 } 1059 1060 // GetConfig creates a locked reference to the agent's config 1061 func (a *Agent) GetConfig() *Config { 1062 a.configLock.Lock() 1063 defer a.configLock.Unlock() 1064 1065 return a.config 1066 } 1067 1068 // setupConsul creates the Consul client and starts its main Run loop. 1069 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 1070 apiConf, err := consulConfig.ApiConfig() 1071 if err != nil { 1072 return err 1073 } 1074 client, err := api.NewClient(apiConf) 1075 if err != nil { 1076 return err 1077 } 1078 1079 // Create Consul Catalog client for service discovery. 1080 a.consulCatalog = client.Catalog() 1081 1082 // Create Consul ACL client for managing tokens. 1083 a.consulACLs = client.ACL() 1084 1085 // Create Consul Service client for service advertisement and checks. 1086 isClient := false 1087 if a.config.Client != nil && a.config.Client.Enabled { 1088 isClient = true 1089 } 1090 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 1091 1092 // Run the Consul service client's sync'ing main loop 1093 go a.consulService.Run() 1094 return nil 1095 } 1096 1097 // noOpAuditor is a no-op Auditor that fulfills the 1098 // event.Auditor interface. 1099 type noOpAuditor struct{} 1100 1101 // Ensure noOpAuditor is an Auditor 1102 var _ event.Auditor = &noOpAuditor{} 1103 1104 func (e *noOpAuditor) Event(ctx context.Context, eventType string, payload interface{}) error { 1105 return nil 1106 } 1107 1108 func (e *noOpAuditor) Enabled() bool { 1109 return false 1110 } 1111 1112 func (e *noOpAuditor) Reopen() error { 1113 return nil 1114 } 1115 1116 func (e *noOpAuditor) SetEnabled(enabled bool) {} 1117 1118 func (e *noOpAuditor) DeliveryEnforced() bool { return false }