github.com/superfly/nomad@v0.10.5-fly/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 golog "log" 8 "net" 9 "os" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 log "github.com/hashicorp/go-hclog" 21 uuidparse "github.com/hashicorp/go-uuid" 22 "github.com/hashicorp/nomad/client" 23 clientconfig "github.com/hashicorp/nomad/client/config" 24 "github.com/hashicorp/nomad/client/state" 25 "github.com/hashicorp/nomad/command/agent/consul" 26 "github.com/hashicorp/nomad/helper/pluginutils/loader" 27 "github.com/hashicorp/nomad/helper/uuid" 28 "github.com/hashicorp/nomad/nomad" 29 "github.com/hashicorp/nomad/nomad/structs" 30 "github.com/hashicorp/nomad/nomad/structs/config" 31 "github.com/hashicorp/raft" 32 ) 33 34 const ( 35 agentHttpCheckInterval = 10 * time.Second 36 agentHttpCheckTimeout = 5 * time.Second 37 serverRpcCheckInterval = 10 * time.Second 38 serverRpcCheckTimeout = 3 * time.Second 39 serverSerfCheckInterval = 10 * time.Second 40 serverSerfCheckTimeout = 3 * time.Second 41 42 // roles used in identifying Consul entries for Nomad agents 43 consulRoleServer = "server" 44 consulRoleClient = "client" 45 ) 46 47 // Agent is a long running daemon that is used to run both 48 // clients and servers. Servers are responsible for managing 49 // state and making scheduling decisions. Clients can be 50 // scheduled to, and are responsible for interfacing with 51 // servers to run allocations. 52 type Agent struct { 53 config *Config 54 configLock sync.Mutex 55 56 logger log.InterceptLogger 57 httpLogger log.Logger 58 logOutput io.Writer 59 60 // consulService is Nomad's custom Consul client for managing services 61 // and checks. 62 consulService *consul.ServiceClient 63 64 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 65 consulCatalog consul.CatalogAPI 66 67 // consulACLs is Nomad's subset of Consul's ACL API Nomad uses. 68 consulACLs consul.ACLsAPI 69 70 // client is the launched Nomad Client. Can be nil if the agent isn't 71 // configured to run a client. 72 client *client.Client 73 74 // server is the launched Nomad Server. Can be nil if the agent isn't 75 // configured to run a server. 76 server *nomad.Server 77 78 // pluginLoader is used to load plugins 79 pluginLoader loader.PluginCatalog 80 81 // pluginSingletonLoader is a plugin loader that will returns singleton 82 // instances of the plugins. 83 pluginSingletonLoader loader.PluginCatalog 84 85 shutdown bool 86 shutdownCh chan struct{} 87 shutdownLock sync.Mutex 88 89 InmemSink *metrics.InmemSink 90 } 91 92 // NewAgent is used to create a new agent with the given configuration 93 func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 94 a := &Agent{ 95 config: config, 96 logOutput: logOutput, 97 shutdownCh: make(chan struct{}), 98 InmemSink: inmem, 99 } 100 101 // Create the loggers 102 a.logger = logger 103 a.httpLogger = a.logger.ResetNamed("http") 104 105 // Global logger should match internal logger as much as possible 106 golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds) 107 108 if err := a.setupConsul(config.Consul); err != nil { 109 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 110 } 111 112 if err := a.setupPlugins(); err != nil { 113 return nil, err 114 } 115 116 if err := a.setupServer(); err != nil { 117 return nil, err 118 } 119 if err := a.setupClient(); err != nil { 120 return nil, err 121 } 122 if a.client == nil && a.server == nil { 123 return nil, fmt.Errorf("must have at least client or server mode enabled") 124 } 125 126 return a, nil 127 } 128 129 // convertServerConfig takes an agent config and log output and returns a Nomad 130 // Config. There may be missing fields that must be set by the agent. To do this 131 // call finalizeServerConfig 132 func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { 133 conf := agentConfig.NomadConfig 134 if conf == nil { 135 conf = nomad.DefaultConfig() 136 } 137 conf.DevMode = agentConfig.DevMode 138 conf.EnableDebug = agentConfig.EnableDebug 139 140 conf.Build = agentConfig.Version.VersionNumber() 141 if agentConfig.Region != "" { 142 conf.Region = agentConfig.Region 143 } 144 145 // Set the Authoritative Region if set, otherwise default to 146 // the same as the local region. 147 if agentConfig.Server.AuthoritativeRegion != "" { 148 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 149 } else if agentConfig.Region != "" { 150 conf.AuthoritativeRegion = agentConfig.Region 151 } 152 153 if agentConfig.Datacenter != "" { 154 conf.Datacenter = agentConfig.Datacenter 155 } 156 if agentConfig.NodeName != "" { 157 conf.NodeName = agentConfig.NodeName 158 } 159 if agentConfig.Server.BootstrapExpect > 0 { 160 if agentConfig.Server.BootstrapExpect == 1 { 161 conf.Bootstrap = true 162 } else { 163 atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect)) 164 } 165 } 166 if agentConfig.DataDir != "" { 167 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 168 } 169 if agentConfig.Server.DataDir != "" { 170 conf.DataDir = agentConfig.Server.DataDir 171 } 172 if agentConfig.Server.ProtocolVersion != 0 { 173 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 174 } 175 if agentConfig.Server.RaftProtocol != 0 { 176 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 177 } 178 if agentConfig.Server.NumSchedulers != nil { 179 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 180 } 181 if len(agentConfig.Server.EnabledSchedulers) != 0 { 182 // Convert to a set and require the core scheduler 183 set := make(map[string]struct{}, 4) 184 set[structs.JobTypeCore] = struct{}{} 185 for _, sched := range agentConfig.Server.EnabledSchedulers { 186 set[sched] = struct{}{} 187 } 188 189 schedulers := make([]string, 0, len(set)) 190 for k := range set { 191 schedulers = append(schedulers, k) 192 } 193 194 conf.EnabledSchedulers = schedulers 195 196 } 197 if agentConfig.ACL.Enabled { 198 conf.ACLEnabled = true 199 } 200 if agentConfig.ACL.ReplicationToken != "" { 201 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 202 } 203 if agentConfig.Sentinel != nil { 204 conf.SentinelConfig = agentConfig.Sentinel 205 } 206 if agentConfig.Server.NonVotingServer { 207 conf.NonVoter = true 208 } 209 if agentConfig.Server.RedundancyZone != "" { 210 conf.RedundancyZone = agentConfig.Server.RedundancyZone 211 } 212 if agentConfig.Server.UpgradeVersion != "" { 213 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 214 } 215 if agentConfig.Autopilot != nil { 216 if agentConfig.Autopilot.CleanupDeadServers != nil { 217 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 218 } 219 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 220 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 221 } 222 if agentConfig.Autopilot.LastContactThreshold != 0 { 223 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 224 } 225 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 226 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 227 } 228 if agentConfig.Autopilot.EnableRedundancyZones != nil { 229 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 230 } 231 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 232 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 233 } 234 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 235 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 236 } 237 } 238 239 // Set up the bind addresses 240 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 241 if err != nil { 242 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 243 } 244 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 245 if err != nil { 246 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 247 } 248 conf.RPCAddr.Port = rpcAddr.Port 249 conf.RPCAddr.IP = rpcAddr.IP 250 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 251 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 252 253 // Set up the advertise addresses 254 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 255 if err != nil { 256 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 257 } 258 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 259 if err != nil { 260 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 261 } 262 263 // Server address is the serf advertise address and rpc port. This is the 264 // address that all servers should be able to communicate over RPC with. 265 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 266 if err != nil { 267 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 268 } 269 270 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 271 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 272 conf.ClientRPCAdvertise = rpcAddr 273 conf.ServerRPCAdvertise = serverAddr 274 275 // Set up gc threshold and heartbeat grace period 276 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 277 dur, err := time.ParseDuration(gcThreshold) 278 if err != nil { 279 return nil, err 280 } 281 conf.NodeGCThreshold = dur 282 } 283 if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" { 284 dur, err := time.ParseDuration(gcInterval) 285 if err != nil { 286 return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err) 287 } else if dur <= time.Duration(0) { 288 return nil, fmt.Errorf("job_gc_interval should be greater than 0s") 289 } 290 conf.JobGCInterval = dur 291 } 292 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 293 dur, err := time.ParseDuration(gcThreshold) 294 if err != nil { 295 return nil, err 296 } 297 conf.JobGCThreshold = dur 298 } 299 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 300 dur, err := time.ParseDuration(gcThreshold) 301 if err != nil { 302 return nil, err 303 } 304 conf.EvalGCThreshold = dur 305 } 306 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 307 dur, err := time.ParseDuration(gcThreshold) 308 if err != nil { 309 return nil, err 310 } 311 conf.DeploymentGCThreshold = dur 312 } 313 314 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 315 conf.HeartbeatGrace = heartbeatGrace 316 } 317 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 318 conf.MinHeartbeatTTL = min 319 } 320 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 321 conf.MaxHeartbeatsPerSecond = maxHPS 322 } 323 324 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 325 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 326 } 327 328 // handle system scheduler preemption default 329 if agentConfig.Server.DefaultSchedulerConfig != nil { 330 conf.DefaultSchedulerConfig = *agentConfig.Server.DefaultSchedulerConfig 331 } 332 333 // Add the Consul and Vault configs 334 conf.ConsulConfig = agentConfig.Consul 335 conf.VaultConfig = agentConfig.Vault 336 337 // Set the TLS config 338 conf.TLSConfig = agentConfig.TLSConfig 339 340 // Setup telemetry related config 341 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 342 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 343 conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics 344 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 345 346 // Parse Limits timeout from a string into durations 347 if d, err := time.ParseDuration(agentConfig.Limits.RPCHandshakeTimeout); err != nil { 348 return nil, fmt.Errorf("error parsing rpc_handshake_timeout: %v", err) 349 } else if d < 0 { 350 return nil, fmt.Errorf("rpc_handshake_timeout must be >= 0") 351 } else { 352 conf.RPCHandshakeTimeout = d 353 } 354 355 // Set max rpc conns; nil/0 == unlimited 356 // Leave a little room for streaming RPCs 357 minLimit := config.LimitsNonStreamingConnsPerClient + 5 358 if agentConfig.Limits.RPCMaxConnsPerClient == nil || *agentConfig.Limits.RPCMaxConnsPerClient == 0 { 359 conf.RPCMaxConnsPerClient = 0 360 } else if limit := *agentConfig.Limits.RPCMaxConnsPerClient; limit <= minLimit { 361 return nil, fmt.Errorf("rpc_max_conns_per_client must be > %d; found: %d", minLimit, limit) 362 } else { 363 conf.RPCMaxConnsPerClient = limit 364 } 365 366 return conf, nil 367 } 368 369 // serverConfig is used to generate a new server configuration struct 370 // for initializing a nomad server. 371 func (a *Agent) serverConfig() (*nomad.Config, error) { 372 c, err := convertServerConfig(a.config) 373 if err != nil { 374 return nil, err 375 } 376 377 a.finalizeServerConfig(c) 378 return c, nil 379 } 380 381 // finalizeServerConfig sets configuration fields on the server config that are 382 // not staticly convertable and are from the agent. 383 func (a *Agent) finalizeServerConfig(c *nomad.Config) { 384 // Setup the logging 385 c.Logger = a.logger 386 c.LogOutput = a.logOutput 387 388 // Setup the plugin loaders 389 c.PluginLoader = a.pluginLoader 390 c.PluginSingletonLoader = a.pluginSingletonLoader 391 } 392 393 // clientConfig is used to generate a new client configuration struct for 394 // initializing a Nomad client. 395 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 396 c, err := convertClientConfig(a.config) 397 if err != nil { 398 return nil, err 399 } 400 401 if err := a.finalizeClientConfig(c); err != nil { 402 return nil, err 403 } 404 405 return c, nil 406 } 407 408 // finalizeClientConfig sets configuration fields on the client config that are 409 // not staticly convertable and are from the agent. 410 func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error { 411 // Setup the logging 412 c.Logger = a.logger 413 c.LogOutput = a.logOutput 414 415 // If we are running a server, append both its bind and advertise address so 416 // we are able to at least talk to the local server even if that isn't 417 // configured explicitly. This handles both running server and client on one 418 // host and -dev mode. 419 if a.server != nil { 420 if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" { 421 return fmt.Errorf("AdvertiseAddrs is nil or empty") 422 } else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" { 423 return fmt.Errorf("normalizedAddrs is nil or empty") 424 } 425 426 c.Servers = append(c.Servers, 427 a.config.normalizedAddrs.RPC, 428 a.config.AdvertiseAddrs.RPC) 429 } 430 431 // Setup the plugin loaders 432 c.PluginLoader = a.pluginLoader 433 c.PluginSingletonLoader = a.pluginSingletonLoader 434 435 // Log deprecation messages about Consul related configuration in client 436 // options 437 var invalidConsulKeys []string 438 for key := range c.Options { 439 if strings.HasPrefix(key, "consul") { 440 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 441 } 442 } 443 if len(invalidConsulKeys) > 0 { 444 a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ",")) 445 a.logger.Warn(`Nomad client ignores consul related configuration in client options. 446 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 447 to configure Nomad to work with Consul.`) 448 } 449 450 return nil 451 } 452 453 // convertClientConfig takes an agent config and log output and returns a client 454 // Config. There may be missing fields that must be set by the agent. To do this 455 // call finalizeServerConfig 456 func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { 457 // Setup the configuration 458 conf := agentConfig.ClientConfig 459 if conf == nil { 460 conf = clientconfig.DefaultConfig() 461 } 462 463 conf.Servers = agentConfig.Client.Servers 464 conf.LogLevel = agentConfig.LogLevel 465 conf.DevMode = agentConfig.DevMode 466 conf.EnableDebug = agentConfig.EnableDebug 467 468 if agentConfig.Region != "" { 469 conf.Region = agentConfig.Region 470 } 471 if agentConfig.DataDir != "" { 472 conf.StateDir = filepath.Join(agentConfig.DataDir, "client") 473 conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") 474 } 475 if agentConfig.Client.StateDir != "" { 476 conf.StateDir = agentConfig.Client.StateDir 477 } 478 if agentConfig.Client.AllocDir != "" { 479 conf.AllocDir = agentConfig.Client.AllocDir 480 } 481 if agentConfig.Client.NetworkInterface != "" { 482 conf.NetworkInterface = agentConfig.Client.NetworkInterface 483 } 484 conf.ChrootEnv = agentConfig.Client.ChrootEnv 485 conf.Options = agentConfig.Client.Options 486 if agentConfig.Client.NetworkSpeed != 0 { 487 conf.NetworkSpeed = agentConfig.Client.NetworkSpeed 488 } 489 if agentConfig.Client.CpuCompute != 0 { 490 conf.CpuCompute = agentConfig.Client.CpuCompute 491 } 492 if agentConfig.Client.MemoryMB != 0 { 493 conf.MemoryMB = agentConfig.Client.MemoryMB 494 } 495 if agentConfig.Client.MaxKillTimeout != "" { 496 dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout) 497 if err != nil { 498 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 499 } 500 conf.MaxKillTimeout = dur 501 } 502 conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort) 503 conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort) 504 conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec 505 conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist 506 conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox 507 508 hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes)) 509 for _, v := range agentConfig.Client.HostVolumes { 510 hvMap[v.Name] = v 511 } 512 conf.HostVolumes = hvMap 513 514 // Setup the node 515 conf.Node = new(structs.Node) 516 conf.Node.Datacenter = agentConfig.Datacenter 517 conf.Node.Name = agentConfig.NodeName 518 conf.Node.Meta = agentConfig.Client.Meta 519 conf.Node.NodeClass = agentConfig.Client.NodeClass 520 521 // Set up the HTTP advertise address 522 conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP 523 524 // Reserve resources on the node. 525 // COMPAT(0.10): Remove in 0.10 526 r := conf.Node.Reserved 527 if r == nil { 528 r = new(structs.Resources) 529 conf.Node.Reserved = r 530 } 531 r.CPU = agentConfig.Client.Reserved.CPU 532 r.MemoryMB = agentConfig.Client.Reserved.MemoryMB 533 r.DiskMB = agentConfig.Client.Reserved.DiskMB 534 535 res := conf.Node.ReservedResources 536 if res == nil { 537 res = new(structs.NodeReservedResources) 538 conf.Node.ReservedResources = res 539 } 540 res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU) 541 res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB) 542 res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB) 543 res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts 544 545 conf.Version = agentConfig.Version 546 547 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" { 548 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 549 } 550 551 conf.ConsulConfig = agentConfig.Consul 552 conf.VaultConfig = agentConfig.Vault 553 554 // Set up Telemetry configuration 555 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 556 conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics 557 conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics 558 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 559 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 560 561 // Set the TLS related configs 562 conf.TLSConfig = agentConfig.TLSConfig 563 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 564 565 // Set the GC related configs 566 conf.GCInterval = agentConfig.Client.GCInterval 567 conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys 568 conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold 569 conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold 570 conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs 571 if agentConfig.Client.NoHostUUID != nil { 572 conf.NoHostUUID = *agentConfig.Client.NoHostUUID 573 } else { 574 // Default no_host_uuid to true 575 conf.NoHostUUID = true 576 } 577 578 // Setup the ACLs 579 conf.ACLEnabled = agentConfig.ACL.Enabled 580 conf.ACLTokenTTL = agentConfig.ACL.TokenTTL 581 conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL 582 583 // Setup networking configration 584 conf.CNIPath = agentConfig.Client.CNIPath 585 conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName 586 conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet 587 588 return conf, nil 589 } 590 591 // setupServer is used to setup the server if enabled 592 func (a *Agent) setupServer() error { 593 if !a.config.Server.Enabled { 594 return nil 595 } 596 597 // Setup the configuration 598 conf, err := a.serverConfig() 599 if err != nil { 600 return fmt.Errorf("server config setup failed: %s", err) 601 } 602 603 // Generate a node ID and persist it if it is the first instance, otherwise 604 // read the persisted node ID. 605 if err := a.setupNodeID(conf); err != nil { 606 return fmt.Errorf("setting up server node ID failed: %s", err) 607 } 608 609 // Sets up the keyring for gossip encryption 610 if err := a.setupKeyrings(conf); err != nil { 611 return fmt.Errorf("failed to configure keyring: %v", err) 612 } 613 614 // Create the server 615 server, err := nomad.NewServer(conf, a.consulCatalog, a.consulACLs) 616 if err != nil { 617 return fmt.Errorf("server setup failed: %v", err) 618 } 619 a.server = server 620 621 // Consul check addresses default to bind but can be toggled to use advertise 622 rpcCheckAddr := a.config.normalizedAddrs.RPC 623 serfCheckAddr := a.config.normalizedAddrs.Serf 624 if *a.config.Consul.ChecksUseAdvertise { 625 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 626 serfCheckAddr = a.config.AdvertiseAddrs.Serf 627 } 628 629 // Create the Nomad Server services for Consul 630 if *a.config.Consul.AutoAdvertise { 631 httpServ := &structs.Service{ 632 Name: a.config.Consul.ServerServiceName, 633 PortLabel: a.config.AdvertiseAddrs.HTTP, 634 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 635 } 636 const isServer = true 637 if check := a.agentHTTPCheck(isServer); check != nil { 638 httpServ.Checks = []*structs.ServiceCheck{check} 639 } 640 rpcServ := &structs.Service{ 641 Name: a.config.Consul.ServerServiceName, 642 PortLabel: a.config.AdvertiseAddrs.RPC, 643 Tags: append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...), 644 Checks: []*structs.ServiceCheck{ 645 { 646 Name: a.config.Consul.ServerRPCCheckName, 647 Type: "tcp", 648 Interval: serverRpcCheckInterval, 649 Timeout: serverRpcCheckTimeout, 650 PortLabel: rpcCheckAddr, 651 }, 652 }, 653 } 654 serfServ := &structs.Service{ 655 Name: a.config.Consul.ServerServiceName, 656 PortLabel: a.config.AdvertiseAddrs.Serf, 657 Tags: append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...), 658 Checks: []*structs.ServiceCheck{ 659 { 660 Name: a.config.Consul.ServerSerfCheckName, 661 Type: "tcp", 662 Interval: serverSerfCheckInterval, 663 Timeout: serverSerfCheckTimeout, 664 PortLabel: serfCheckAddr, 665 }, 666 }, 667 } 668 669 // Add the http port check if TLS isn't enabled 670 consulServices := []*structs.Service{ 671 rpcServ, 672 serfServ, 673 httpServ, 674 } 675 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 676 return err 677 } 678 } 679 680 return nil 681 } 682 683 // setupNodeID will pull the persisted node ID, if any, or create a random one 684 // and persist it. 685 func (a *Agent) setupNodeID(config *nomad.Config) error { 686 // For dev mode we have no filesystem access so just make a node ID. 687 if a.config.DevMode { 688 config.NodeID = uuid.Generate() 689 return nil 690 } 691 692 // Load saved state, if any. Since a user could edit this, we also 693 // validate it. Saved state overwrites any configured node id 694 fileID := filepath.Join(config.DataDir, "node-id") 695 if _, err := os.Stat(fileID); err == nil { 696 rawID, err := ioutil.ReadFile(fileID) 697 if err != nil { 698 return err 699 } 700 701 nodeID := strings.TrimSpace(string(rawID)) 702 nodeID = strings.ToLower(nodeID) 703 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 704 return err 705 } 706 config.NodeID = nodeID 707 return nil 708 } 709 710 // If they've configured a node ID manually then just use that, as 711 // long as it's valid. 712 if config.NodeID != "" { 713 config.NodeID = strings.ToLower(config.NodeID) 714 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 715 return err 716 } 717 // Persist this configured nodeID to our data directory 718 if err := lib.EnsurePath(fileID, false); err != nil { 719 return err 720 } 721 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 722 return err 723 } 724 return nil 725 } 726 727 // If we still don't have a valid node ID, make one. 728 if config.NodeID == "" { 729 id := uuid.Generate() 730 if err := lib.EnsurePath(fileID, false); err != nil { 731 return err 732 } 733 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 734 return err 735 } 736 737 config.NodeID = id 738 } 739 return nil 740 } 741 742 // setupKeyrings is used to initialize and load keyrings during agent startup 743 func (a *Agent) setupKeyrings(config *nomad.Config) error { 744 file := filepath.Join(a.config.DataDir, serfKeyring) 745 746 if a.config.Server.EncryptKey == "" { 747 goto LOAD 748 } 749 if _, err := os.Stat(file); err != nil { 750 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 751 return err 752 } 753 } 754 755 LOAD: 756 if _, err := os.Stat(file); err == nil { 757 config.SerfConfig.KeyringFile = file 758 } 759 if err := loadKeyringFile(config.SerfConfig); err != nil { 760 return err 761 } 762 // Success! 763 return nil 764 } 765 766 // setupClient is used to setup the client if enabled 767 func (a *Agent) setupClient() error { 768 if !a.config.Client.Enabled { 769 return nil 770 } 771 772 // Setup the configuration 773 conf, err := a.clientConfig() 774 if err != nil { 775 return fmt.Errorf("client setup failed: %v", err) 776 } 777 778 // Reserve some ports for the plugins if we are on Windows 779 if runtime.GOOS == "windows" { 780 if err := a.reservePortsForClient(conf); err != nil { 781 return err 782 } 783 } 784 if conf.StateDBFactory == nil { 785 conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) 786 } 787 788 client, err := client.NewClient(conf, a.consulCatalog, a.consulService) 789 if err != nil { 790 return fmt.Errorf("client setup failed: %v", err) 791 } 792 a.client = client 793 794 // Create the Nomad Client services for Consul 795 if *a.config.Consul.AutoAdvertise { 796 httpServ := &structs.Service{ 797 Name: a.config.Consul.ClientServiceName, 798 PortLabel: a.config.AdvertiseAddrs.HTTP, 799 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 800 } 801 const isServer = false 802 if check := a.agentHTTPCheck(isServer); check != nil { 803 httpServ.Checks = []*structs.ServiceCheck{check} 804 } 805 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 806 return err 807 } 808 } 809 810 return nil 811 } 812 813 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 814 // If no HTTP health check can be supported nil is returned. 815 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 816 // Resolve the http check address 817 httpCheckAddr := a.config.normalizedAddrs.HTTP 818 if *a.config.Consul.ChecksUseAdvertise { 819 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 820 } 821 check := structs.ServiceCheck{ 822 Name: a.config.Consul.ClientHTTPCheckName, 823 Type: "http", 824 Path: "/v1/agent/health?type=client", 825 Protocol: "http", 826 Interval: agentHttpCheckInterval, 827 Timeout: agentHttpCheckTimeout, 828 PortLabel: httpCheckAddr, 829 } 830 // Switch to endpoint that doesn't require a leader for servers 831 if server { 832 check.Name = a.config.Consul.ServerHTTPCheckName 833 check.Path = "/v1/agent/health?type=server" 834 } 835 if !a.config.TLSConfig.EnableHTTP { 836 // No HTTPS, return a plain http check 837 return &check 838 } 839 if a.config.TLSConfig.VerifyHTTPSClient { 840 a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled") 841 return nil 842 } 843 844 // HTTPS enabled; skip verification 845 check.Protocol = "https" 846 check.TLSSkipVerify = true 847 return &check 848 } 849 850 // reservePortsForClient reserves a range of ports for the client to use when 851 // it creates various plugins for log collection, executors, drivers, etc 852 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 853 if conf.Node.ReservedResources == nil { 854 conf.Node.ReservedResources = &structs.NodeReservedResources{} 855 } 856 857 res := conf.Node.ReservedResources.Networks.ReservedHostPorts 858 if res == "" { 859 res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 860 } else { 861 res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 862 } 863 conf.Node.ReservedResources.Networks.ReservedHostPorts = res 864 return nil 865 } 866 867 // Leave is used gracefully exit. Clients will inform servers 868 // of their departure so that allocations can be rescheduled. 869 func (a *Agent) Leave() error { 870 if a.client != nil { 871 if err := a.client.Leave(); err != nil { 872 a.logger.Error("client leave failed", "error", err) 873 } 874 } 875 if a.server != nil { 876 if err := a.server.Leave(); err != nil { 877 a.logger.Error("server leave failed", "error", err) 878 } 879 } 880 return nil 881 } 882 883 // Shutdown is used to terminate the agent. 884 func (a *Agent) Shutdown() error { 885 a.shutdownLock.Lock() 886 defer a.shutdownLock.Unlock() 887 888 if a.shutdown { 889 return nil 890 } 891 892 a.logger.Info("requesting shutdown") 893 if a.client != nil { 894 if err := a.client.Shutdown(); err != nil { 895 a.logger.Error("client shutdown failed", "error", err) 896 } 897 } 898 if a.server != nil { 899 if err := a.server.Shutdown(); err != nil { 900 a.logger.Error("server shutdown failed", "error", err) 901 } 902 } 903 904 if err := a.consulService.Shutdown(); err != nil { 905 a.logger.Error("shutting down Consul client failed", "error", err) 906 } 907 908 a.logger.Info("shutdown complete") 909 a.shutdown = true 910 close(a.shutdownCh) 911 return nil 912 } 913 914 // RPC is used to make an RPC call to the Nomad servers 915 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 916 if a.server != nil { 917 return a.server.RPC(method, args, reply) 918 } 919 return a.client.RPC(method, args, reply) 920 } 921 922 // Client returns the configured client or nil 923 func (a *Agent) Client() *client.Client { 924 return a.client 925 } 926 927 // Server returns the configured server or nil 928 func (a *Agent) Server() *nomad.Server { 929 return a.server 930 } 931 932 // Stats is used to return statistics for debugging and insight 933 // for various sub-systems 934 func (a *Agent) Stats() map[string]map[string]string { 935 stats := make(map[string]map[string]string) 936 if a.server != nil { 937 subStat := a.server.Stats() 938 for k, v := range subStat { 939 stats[k] = v 940 } 941 } 942 if a.client != nil { 943 subStat := a.client.Stats() 944 for k, v := range subStat { 945 stats[k] = v 946 } 947 } 948 return stats 949 } 950 951 // ShouldReload determines if we should reload the configuration and agent 952 // connections. If the TLS Configuration has not changed, we shouldn't reload. 953 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 954 a.configLock.Lock() 955 defer a.configLock.Unlock() 956 957 if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel { 958 agent = true 959 } 960 961 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 962 if err != nil { 963 a.logger.Error("parsing TLS certificate", "error", err) 964 return agent, false 965 } else if !isEqual { 966 return true, true 967 } 968 969 // Allow the ability to only reload HTTP connections 970 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 971 http = true 972 agent = true 973 } 974 975 // Allow the ability to only reload HTTP connections 976 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 977 agent = true 978 } 979 980 return agent, http 981 } 982 983 // Reload handles configuration changes for the agent. Provides a method that 984 // is easier to unit test, as this action is invoked via SIGHUP. 985 func (a *Agent) Reload(newConfig *Config) error { 986 a.configLock.Lock() 987 defer a.configLock.Unlock() 988 989 updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel) 990 991 if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging { 992 return fmt.Errorf("cannot reload agent with nil configuration") 993 } 994 995 if updatedLogging { 996 a.config.LogLevel = newConfig.LogLevel 997 a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel)) 998 } 999 1000 fullUpdateTLSConfig := func() { 1001 // Completely reload the agent's TLS configuration (moving from non-TLS to 1002 // TLS, or vice versa) 1003 // This does not handle errors in loading the new TLS configuration 1004 a.config.TLSConfig = newConfig.TLSConfig.Copy() 1005 } 1006 1007 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 1008 // This is just a TLS configuration reload, we don't need to refresh 1009 // existing network connections 1010 1011 // Reload the certificates on the keyloader and on success store the 1012 // updated TLS config. It is important to reuse the same keyloader 1013 // as this allows us to dynamically reload configurations not only 1014 // on the Agent but on the Server and Client too (they are 1015 // referencing the same keyloader). 1016 keyloader := a.config.TLSConfig.GetKeyLoader() 1017 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 1018 if err != nil { 1019 return err 1020 } 1021 a.config.TLSConfig = newConfig.TLSConfig 1022 a.config.TLSConfig.KeyLoader = keyloader 1023 return nil 1024 } else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() { 1025 a.logger.Warn("downgrading agent's existing TLS configuration to plaintext") 1026 fullUpdateTLSConfig() 1027 } else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() { 1028 a.logger.Info("upgrading from plaintext configuration to TLS") 1029 fullUpdateTLSConfig() 1030 } 1031 1032 return nil 1033 } 1034 1035 // GetConfig creates a locked reference to the agent's config 1036 func (a *Agent) GetConfig() *Config { 1037 a.configLock.Lock() 1038 defer a.configLock.Unlock() 1039 1040 return a.config 1041 } 1042 1043 // setupConsul creates the Consul client and starts its main Run loop. 1044 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 1045 apiConf, err := consulConfig.ApiConfig() 1046 if err != nil { 1047 return err 1048 } 1049 client, err := api.NewClient(apiConf) 1050 if err != nil { 1051 return err 1052 } 1053 1054 // Create Consul Catalog client for service discovery. 1055 a.consulCatalog = client.Catalog() 1056 1057 // Create Consul ACL client for managing tokens. 1058 a.consulACLs = client.ACL() 1059 1060 // Create Consul Service client for service advertisement and checks. 1061 isClient := false 1062 if a.config.Client != nil && a.config.Client.Enabled { 1063 isClient = true 1064 } 1065 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 1066 1067 // Run the Consul service client's sync'ing main loop 1068 go a.consulService.Run() 1069 return nil 1070 }