github.com/karlem/nomad@v0.10.2-rc1/command/agent/agent.go (about) 1 package agent 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 golog "log" 8 "net" 9 "os" 10 "path/filepath" 11 "runtime" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 metrics "github.com/armon/go-metrics" 18 "github.com/hashicorp/consul/api" 19 "github.com/hashicorp/consul/lib" 20 log "github.com/hashicorp/go-hclog" 21 uuidparse "github.com/hashicorp/go-uuid" 22 "github.com/hashicorp/nomad/client" 23 clientconfig "github.com/hashicorp/nomad/client/config" 24 "github.com/hashicorp/nomad/client/state" 25 "github.com/hashicorp/nomad/command/agent/consul" 26 "github.com/hashicorp/nomad/helper/pluginutils/loader" 27 "github.com/hashicorp/nomad/helper/uuid" 28 "github.com/hashicorp/nomad/nomad" 29 "github.com/hashicorp/nomad/nomad/structs" 30 "github.com/hashicorp/nomad/nomad/structs/config" 31 "github.com/hashicorp/raft" 32 ) 33 34 const ( 35 agentHttpCheckInterval = 10 * time.Second 36 agentHttpCheckTimeout = 5 * time.Second 37 serverRpcCheckInterval = 10 * time.Second 38 serverRpcCheckTimeout = 3 * time.Second 39 serverSerfCheckInterval = 10 * time.Second 40 serverSerfCheckTimeout = 3 * time.Second 41 42 // roles used in identifying Consul entries for Nomad agents 43 consulRoleServer = "server" 44 consulRoleClient = "client" 45 ) 46 47 // Agent is a long running daemon that is used to run both 48 // clients and servers. Servers are responsible for managing 49 // state and making scheduling decisions. Clients can be 50 // scheduled to, and are responsible for interfacing with 51 // servers to run allocations. 52 type Agent struct { 53 config *Config 54 configLock sync.Mutex 55 56 logger log.InterceptLogger 57 httpLogger log.Logger 58 logOutput io.Writer 59 60 // consulService is Nomad's custom Consul client for managing services 61 // and checks. 62 consulService *consul.ServiceClient 63 64 // consulCatalog is the subset of Consul's Catalog API Nomad uses. 65 consulCatalog consul.CatalogAPI 66 67 // client is the launched Nomad Client. Can be nil if the agent isn't 68 // configured to run a client. 69 client *client.Client 70 71 // server is the launched Nomad Server. Can be nil if the agent isn't 72 // configured to run a server. 73 server *nomad.Server 74 75 // pluginLoader is used to load plugins 76 pluginLoader loader.PluginCatalog 77 78 // pluginSingletonLoader is a plugin loader that will returns singleton 79 // instances of the plugins. 80 pluginSingletonLoader loader.PluginCatalog 81 82 shutdown bool 83 shutdownCh chan struct{} 84 shutdownLock sync.Mutex 85 86 InmemSink *metrics.InmemSink 87 } 88 89 // NewAgent is used to create a new agent with the given configuration 90 func NewAgent(config *Config, logger log.InterceptLogger, logOutput io.Writer, inmem *metrics.InmemSink) (*Agent, error) { 91 a := &Agent{ 92 config: config, 93 logOutput: logOutput, 94 shutdownCh: make(chan struct{}), 95 InmemSink: inmem, 96 } 97 98 // Create the loggers 99 a.logger = logger 100 a.httpLogger = a.logger.ResetNamed("http") 101 102 // Global logger should match internal logger as much as possible 103 golog.SetFlags(golog.LstdFlags | golog.Lmicroseconds) 104 105 if err := a.setupConsul(config.Consul); err != nil { 106 return nil, fmt.Errorf("Failed to initialize Consul client: %v", err) 107 } 108 109 if err := a.setupPlugins(); err != nil { 110 return nil, err 111 } 112 113 if err := a.setupServer(); err != nil { 114 return nil, err 115 } 116 if err := a.setupClient(); err != nil { 117 return nil, err 118 } 119 if a.client == nil && a.server == nil { 120 return nil, fmt.Errorf("must have at least client or server mode enabled") 121 } 122 123 return a, nil 124 } 125 126 // convertServerConfig takes an agent config and log output and returns a Nomad 127 // Config. There may be missing fields that must be set by the agent. To do this 128 // call finalizeServerConfig 129 func convertServerConfig(agentConfig *Config) (*nomad.Config, error) { 130 conf := agentConfig.NomadConfig 131 if conf == nil { 132 conf = nomad.DefaultConfig() 133 } 134 conf.DevMode = agentConfig.DevMode 135 conf.Build = agentConfig.Version.VersionNumber() 136 if agentConfig.Region != "" { 137 conf.Region = agentConfig.Region 138 } 139 140 // Set the Authoritative Region if set, otherwise default to 141 // the same as the local region. 142 if agentConfig.Server.AuthoritativeRegion != "" { 143 conf.AuthoritativeRegion = agentConfig.Server.AuthoritativeRegion 144 } else if agentConfig.Region != "" { 145 conf.AuthoritativeRegion = agentConfig.Region 146 } 147 148 if agentConfig.Datacenter != "" { 149 conf.Datacenter = agentConfig.Datacenter 150 } 151 if agentConfig.NodeName != "" { 152 conf.NodeName = agentConfig.NodeName 153 } 154 if agentConfig.Server.BootstrapExpect > 0 { 155 if agentConfig.Server.BootstrapExpect == 1 { 156 conf.Bootstrap = true 157 } else { 158 atomic.StoreInt32(&conf.BootstrapExpect, int32(agentConfig.Server.BootstrapExpect)) 159 } 160 } 161 if agentConfig.DataDir != "" { 162 conf.DataDir = filepath.Join(agentConfig.DataDir, "server") 163 } 164 if agentConfig.Server.DataDir != "" { 165 conf.DataDir = agentConfig.Server.DataDir 166 } 167 if agentConfig.Server.ProtocolVersion != 0 { 168 conf.ProtocolVersion = uint8(agentConfig.Server.ProtocolVersion) 169 } 170 if agentConfig.Server.RaftProtocol != 0 { 171 conf.RaftConfig.ProtocolVersion = raft.ProtocolVersion(agentConfig.Server.RaftProtocol) 172 } 173 if agentConfig.Server.NumSchedulers != nil { 174 conf.NumSchedulers = *agentConfig.Server.NumSchedulers 175 } 176 if len(agentConfig.Server.EnabledSchedulers) != 0 { 177 // Convert to a set and require the core scheduler 178 set := make(map[string]struct{}, 4) 179 set[structs.JobTypeCore] = struct{}{} 180 for _, sched := range agentConfig.Server.EnabledSchedulers { 181 set[sched] = struct{}{} 182 } 183 184 schedulers := make([]string, 0, len(set)) 185 for k := range set { 186 schedulers = append(schedulers, k) 187 } 188 189 conf.EnabledSchedulers = schedulers 190 191 } 192 if agentConfig.ACL.Enabled { 193 conf.ACLEnabled = true 194 } 195 if agentConfig.ACL.ReplicationToken != "" { 196 conf.ReplicationToken = agentConfig.ACL.ReplicationToken 197 } 198 if agentConfig.Sentinel != nil { 199 conf.SentinelConfig = agentConfig.Sentinel 200 } 201 if agentConfig.Server.NonVotingServer { 202 conf.NonVoter = true 203 } 204 if agentConfig.Server.RedundancyZone != "" { 205 conf.RedundancyZone = agentConfig.Server.RedundancyZone 206 } 207 if agentConfig.Server.UpgradeVersion != "" { 208 conf.UpgradeVersion = agentConfig.Server.UpgradeVersion 209 } 210 if agentConfig.Autopilot != nil { 211 if agentConfig.Autopilot.CleanupDeadServers != nil { 212 conf.AutopilotConfig.CleanupDeadServers = *agentConfig.Autopilot.CleanupDeadServers 213 } 214 if agentConfig.Autopilot.ServerStabilizationTime != 0 { 215 conf.AutopilotConfig.ServerStabilizationTime = agentConfig.Autopilot.ServerStabilizationTime 216 } 217 if agentConfig.Autopilot.LastContactThreshold != 0 { 218 conf.AutopilotConfig.LastContactThreshold = agentConfig.Autopilot.LastContactThreshold 219 } 220 if agentConfig.Autopilot.MaxTrailingLogs != 0 { 221 conf.AutopilotConfig.MaxTrailingLogs = uint64(agentConfig.Autopilot.MaxTrailingLogs) 222 } 223 if agentConfig.Autopilot.EnableRedundancyZones != nil { 224 conf.AutopilotConfig.EnableRedundancyZones = *agentConfig.Autopilot.EnableRedundancyZones 225 } 226 if agentConfig.Autopilot.DisableUpgradeMigration != nil { 227 conf.AutopilotConfig.DisableUpgradeMigration = *agentConfig.Autopilot.DisableUpgradeMigration 228 } 229 if agentConfig.Autopilot.EnableCustomUpgrades != nil { 230 conf.AutopilotConfig.EnableCustomUpgrades = *agentConfig.Autopilot.EnableCustomUpgrades 231 } 232 } 233 234 // Set up the bind addresses 235 rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC) 236 if err != nil { 237 return nil, fmt.Errorf("Failed to parse RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err) 238 } 239 serfAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.Serf) 240 if err != nil { 241 return nil, fmt.Errorf("Failed to parse Serf address %q: %v", agentConfig.normalizedAddrs.Serf, err) 242 } 243 conf.RPCAddr.Port = rpcAddr.Port 244 conf.RPCAddr.IP = rpcAddr.IP 245 conf.SerfConfig.MemberlistConfig.BindPort = serfAddr.Port 246 conf.SerfConfig.MemberlistConfig.BindAddr = serfAddr.IP.String() 247 248 // Set up the advertise addresses 249 rpcAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.RPC) 250 if err != nil { 251 return nil, fmt.Errorf("Failed to parse RPC advertise address %q: %v", agentConfig.AdvertiseAddrs.RPC, err) 252 } 253 serfAddr, err = net.ResolveTCPAddr("tcp", agentConfig.AdvertiseAddrs.Serf) 254 if err != nil { 255 return nil, fmt.Errorf("Failed to parse Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 256 } 257 258 // Server address is the serf advertise address and rpc port. This is the 259 // address that all servers should be able to communicate over RPC with. 260 serverAddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(serfAddr.IP.String(), fmt.Sprintf("%d", rpcAddr.Port))) 261 if err != nil { 262 return nil, fmt.Errorf("Failed to resolve Serf advertise address %q: %v", agentConfig.AdvertiseAddrs.Serf, err) 263 } 264 265 conf.SerfConfig.MemberlistConfig.AdvertiseAddr = serfAddr.IP.String() 266 conf.SerfConfig.MemberlistConfig.AdvertisePort = serfAddr.Port 267 conf.ClientRPCAdvertise = rpcAddr 268 conf.ServerRPCAdvertise = serverAddr 269 270 // Set up gc threshold and heartbeat grace period 271 if gcThreshold := agentConfig.Server.NodeGCThreshold; gcThreshold != "" { 272 dur, err := time.ParseDuration(gcThreshold) 273 if err != nil { 274 return nil, err 275 } 276 conf.NodeGCThreshold = dur 277 } 278 if gcInterval := agentConfig.Server.JobGCInterval; gcInterval != "" { 279 dur, err := time.ParseDuration(gcInterval) 280 if err != nil { 281 return nil, fmt.Errorf("failed to parse job_gc_interval: %v", err) 282 } else if dur <= time.Duration(0) { 283 return nil, fmt.Errorf("job_gc_interval should be greater than 0s") 284 } 285 conf.JobGCInterval = dur 286 } 287 if gcThreshold := agentConfig.Server.JobGCThreshold; gcThreshold != "" { 288 dur, err := time.ParseDuration(gcThreshold) 289 if err != nil { 290 return nil, err 291 } 292 conf.JobGCThreshold = dur 293 } 294 if gcThreshold := agentConfig.Server.EvalGCThreshold; gcThreshold != "" { 295 dur, err := time.ParseDuration(gcThreshold) 296 if err != nil { 297 return nil, err 298 } 299 conf.EvalGCThreshold = dur 300 } 301 if gcThreshold := agentConfig.Server.DeploymentGCThreshold; gcThreshold != "" { 302 dur, err := time.ParseDuration(gcThreshold) 303 if err != nil { 304 return nil, err 305 } 306 conf.DeploymentGCThreshold = dur 307 } 308 309 if heartbeatGrace := agentConfig.Server.HeartbeatGrace; heartbeatGrace != 0 { 310 conf.HeartbeatGrace = heartbeatGrace 311 } 312 if min := agentConfig.Server.MinHeartbeatTTL; min != 0 { 313 conf.MinHeartbeatTTL = min 314 } 315 if maxHPS := agentConfig.Server.MaxHeartbeatsPerSecond; maxHPS != 0 { 316 conf.MaxHeartbeatsPerSecond = maxHPS 317 } 318 319 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ServerServiceName == "" { 320 return nil, fmt.Errorf("server_service_name must be set when auto_advertise is enabled") 321 } 322 323 // Add the Consul and Vault configs 324 conf.ConsulConfig = agentConfig.Consul 325 conf.VaultConfig = agentConfig.Vault 326 327 // Set the TLS config 328 conf.TLSConfig = agentConfig.TLSConfig 329 330 // Setup telemetry related config 331 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 332 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 333 conf.DisableDispatchedJobSummaryMetrics = agentConfig.Telemetry.DisableDispatchedJobSummaryMetrics 334 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 335 336 return conf, nil 337 } 338 339 // serverConfig is used to generate a new server configuration struct 340 // for initializing a nomad server. 341 func (a *Agent) serverConfig() (*nomad.Config, error) { 342 c, err := convertServerConfig(a.config) 343 if err != nil { 344 return nil, err 345 } 346 347 a.finalizeServerConfig(c) 348 return c, nil 349 } 350 351 // finalizeServerConfig sets configuration fields on the server config that are 352 // not staticly convertable and are from the agent. 353 func (a *Agent) finalizeServerConfig(c *nomad.Config) { 354 // Setup the logging 355 c.Logger = a.logger 356 c.LogOutput = a.logOutput 357 358 // Setup the plugin loaders 359 c.PluginLoader = a.pluginLoader 360 c.PluginSingletonLoader = a.pluginSingletonLoader 361 } 362 363 // clientConfig is used to generate a new client configuration struct for 364 // initializing a Nomad client. 365 func (a *Agent) clientConfig() (*clientconfig.Config, error) { 366 c, err := convertClientConfig(a.config) 367 if err != nil { 368 return nil, err 369 } 370 371 if err := a.finalizeClientConfig(c); err != nil { 372 return nil, err 373 } 374 375 return c, nil 376 } 377 378 // finalizeClientConfig sets configuration fields on the client config that are 379 // not staticly convertable and are from the agent. 380 func (a *Agent) finalizeClientConfig(c *clientconfig.Config) error { 381 // Setup the logging 382 c.Logger = a.logger 383 c.LogOutput = a.logOutput 384 385 // If we are running a server, append both its bind and advertise address so 386 // we are able to at least talk to the local server even if that isn't 387 // configured explicitly. This handles both running server and client on one 388 // host and -dev mode. 389 if a.server != nil { 390 if a.config.AdvertiseAddrs == nil || a.config.AdvertiseAddrs.RPC == "" { 391 return fmt.Errorf("AdvertiseAddrs is nil or empty") 392 } else if a.config.normalizedAddrs == nil || a.config.normalizedAddrs.RPC == "" { 393 return fmt.Errorf("normalizedAddrs is nil or empty") 394 } 395 396 c.Servers = append(c.Servers, 397 a.config.normalizedAddrs.RPC, 398 a.config.AdvertiseAddrs.RPC) 399 } 400 401 // Setup the plugin loaders 402 c.PluginLoader = a.pluginLoader 403 c.PluginSingletonLoader = a.pluginSingletonLoader 404 405 // Log deprecation messages about Consul related configuration in client 406 // options 407 var invalidConsulKeys []string 408 for key := range c.Options { 409 if strings.HasPrefix(key, "consul") { 410 invalidConsulKeys = append(invalidConsulKeys, fmt.Sprintf("options.%s", key)) 411 } 412 } 413 if len(invalidConsulKeys) > 0 { 414 a.logger.Warn("invalid consul keys", "keys", strings.Join(invalidConsulKeys, ",")) 415 a.logger.Warn(`Nomad client ignores consul related configuration in client options. 416 Please refer to the guide https://www.nomadproject.io/docs/agent/configuration/consul.html 417 to configure Nomad to work with Consul.`) 418 } 419 420 return nil 421 } 422 423 // convertClientConfig takes an agent config and log output and returns a client 424 // Config. There may be missing fields that must be set by the agent. To do this 425 // call finalizeServerConfig 426 func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { 427 // Setup the configuration 428 conf := agentConfig.ClientConfig 429 if conf == nil { 430 conf = clientconfig.DefaultConfig() 431 } 432 433 conf.Servers = agentConfig.Client.Servers 434 conf.LogLevel = agentConfig.LogLevel 435 conf.DevMode = agentConfig.DevMode 436 if agentConfig.Region != "" { 437 conf.Region = agentConfig.Region 438 } 439 if agentConfig.DataDir != "" { 440 conf.StateDir = filepath.Join(agentConfig.DataDir, "client") 441 conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") 442 } 443 if agentConfig.Client.StateDir != "" { 444 conf.StateDir = agentConfig.Client.StateDir 445 } 446 if agentConfig.Client.AllocDir != "" { 447 conf.AllocDir = agentConfig.Client.AllocDir 448 } 449 if agentConfig.Client.NetworkInterface != "" { 450 conf.NetworkInterface = agentConfig.Client.NetworkInterface 451 } 452 conf.ChrootEnv = agentConfig.Client.ChrootEnv 453 conf.Options = agentConfig.Client.Options 454 if agentConfig.Client.NetworkSpeed != 0 { 455 conf.NetworkSpeed = agentConfig.Client.NetworkSpeed 456 } 457 if agentConfig.Client.CpuCompute != 0 { 458 conf.CpuCompute = agentConfig.Client.CpuCompute 459 } 460 if agentConfig.Client.MemoryMB != 0 { 461 conf.MemoryMB = agentConfig.Client.MemoryMB 462 } 463 if agentConfig.Client.MaxKillTimeout != "" { 464 dur, err := time.ParseDuration(agentConfig.Client.MaxKillTimeout) 465 if err != nil { 466 return nil, fmt.Errorf("Error parsing max kill timeout: %s", err) 467 } 468 conf.MaxKillTimeout = dur 469 } 470 conf.ClientMaxPort = uint(agentConfig.Client.ClientMaxPort) 471 conf.ClientMinPort = uint(agentConfig.Client.ClientMinPort) 472 conf.DisableRemoteExec = agentConfig.Client.DisableRemoteExec 473 conf.TemplateConfig.FunctionBlacklist = agentConfig.Client.TemplateConfig.FunctionBlacklist 474 conf.TemplateConfig.DisableSandbox = agentConfig.Client.TemplateConfig.DisableSandbox 475 476 hvMap := make(map[string]*structs.ClientHostVolumeConfig, len(agentConfig.Client.HostVolumes)) 477 for _, v := range agentConfig.Client.HostVolumes { 478 hvMap[v.Name] = v 479 } 480 conf.HostVolumes = hvMap 481 482 // Setup the node 483 conf.Node = new(structs.Node) 484 conf.Node.Datacenter = agentConfig.Datacenter 485 conf.Node.Name = agentConfig.NodeName 486 conf.Node.Meta = agentConfig.Client.Meta 487 conf.Node.NodeClass = agentConfig.Client.NodeClass 488 489 // Set up the HTTP advertise address 490 conf.Node.HTTPAddr = agentConfig.AdvertiseAddrs.HTTP 491 492 // Reserve resources on the node. 493 // COMPAT(0.10): Remove in 0.10 494 r := conf.Node.Reserved 495 if r == nil { 496 r = new(structs.Resources) 497 conf.Node.Reserved = r 498 } 499 r.CPU = agentConfig.Client.Reserved.CPU 500 r.MemoryMB = agentConfig.Client.Reserved.MemoryMB 501 r.DiskMB = agentConfig.Client.Reserved.DiskMB 502 503 res := conf.Node.ReservedResources 504 if res == nil { 505 res = new(structs.NodeReservedResources) 506 conf.Node.ReservedResources = res 507 } 508 res.Cpu.CpuShares = int64(agentConfig.Client.Reserved.CPU) 509 res.Memory.MemoryMB = int64(agentConfig.Client.Reserved.MemoryMB) 510 res.Disk.DiskMB = int64(agentConfig.Client.Reserved.DiskMB) 511 res.Networks.ReservedHostPorts = agentConfig.Client.Reserved.ReservedPorts 512 513 conf.Version = agentConfig.Version 514 515 if *agentConfig.Consul.AutoAdvertise && agentConfig.Consul.ClientServiceName == "" { 516 return nil, fmt.Errorf("client_service_name must be set when auto_advertise is enabled") 517 } 518 519 conf.ConsulConfig = agentConfig.Consul 520 conf.VaultConfig = agentConfig.Vault 521 522 // Set up Telemetry configuration 523 conf.StatsCollectionInterval = agentConfig.Telemetry.collectionInterval 524 conf.PublishNodeMetrics = agentConfig.Telemetry.PublishNodeMetrics 525 conf.PublishAllocationMetrics = agentConfig.Telemetry.PublishAllocationMetrics 526 conf.DisableTaggedMetrics = agentConfig.Telemetry.DisableTaggedMetrics 527 conf.BackwardsCompatibleMetrics = agentConfig.Telemetry.BackwardsCompatibleMetrics 528 529 // Set the TLS related configs 530 conf.TLSConfig = agentConfig.TLSConfig 531 conf.Node.TLSEnabled = conf.TLSConfig.EnableHTTP 532 533 // Set the GC related configs 534 conf.GCInterval = agentConfig.Client.GCInterval 535 conf.GCParallelDestroys = agentConfig.Client.GCParallelDestroys 536 conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold 537 conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold 538 conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs 539 if agentConfig.Client.NoHostUUID != nil { 540 conf.NoHostUUID = *agentConfig.Client.NoHostUUID 541 } else { 542 // Default no_host_uuid to true 543 conf.NoHostUUID = true 544 } 545 546 // Setup the ACLs 547 conf.ACLEnabled = agentConfig.ACL.Enabled 548 conf.ACLTokenTTL = agentConfig.ACL.TokenTTL 549 conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL 550 551 // Setup networking configration 552 conf.CNIPath = agentConfig.Client.CNIPath 553 conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName 554 conf.BridgeNetworkAllocSubnet = agentConfig.Client.BridgeNetworkSubnet 555 556 return conf, nil 557 } 558 559 // setupServer is used to setup the server if enabled 560 func (a *Agent) setupServer() error { 561 if !a.config.Server.Enabled { 562 return nil 563 } 564 565 // Setup the configuration 566 conf, err := a.serverConfig() 567 if err != nil { 568 return fmt.Errorf("server config setup failed: %s", err) 569 } 570 571 // Generate a node ID and persist it if it is the first instance, otherwise 572 // read the persisted node ID. 573 if err := a.setupNodeID(conf); err != nil { 574 return fmt.Errorf("setting up server node ID failed: %s", err) 575 } 576 577 // Sets up the keyring for gossip encryption 578 if err := a.setupKeyrings(conf); err != nil { 579 return fmt.Errorf("failed to configure keyring: %v", err) 580 } 581 582 // Create the server 583 server, err := nomad.NewServer(conf, a.consulCatalog) 584 if err != nil { 585 return fmt.Errorf("server setup failed: %v", err) 586 } 587 a.server = server 588 589 // Consul check addresses default to bind but can be toggled to use advertise 590 rpcCheckAddr := a.config.normalizedAddrs.RPC 591 serfCheckAddr := a.config.normalizedAddrs.Serf 592 if *a.config.Consul.ChecksUseAdvertise { 593 rpcCheckAddr = a.config.AdvertiseAddrs.RPC 594 serfCheckAddr = a.config.AdvertiseAddrs.Serf 595 } 596 597 // Create the Nomad Server services for Consul 598 if *a.config.Consul.AutoAdvertise { 599 httpServ := &structs.Service{ 600 Name: a.config.Consul.ServerServiceName, 601 PortLabel: a.config.AdvertiseAddrs.HTTP, 602 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 603 } 604 const isServer = true 605 if check := a.agentHTTPCheck(isServer); check != nil { 606 httpServ.Checks = []*structs.ServiceCheck{check} 607 } 608 rpcServ := &structs.Service{ 609 Name: a.config.Consul.ServerServiceName, 610 PortLabel: a.config.AdvertiseAddrs.RPC, 611 Tags: append([]string{consul.ServiceTagRPC}, a.config.Consul.Tags...), 612 Checks: []*structs.ServiceCheck{ 613 { 614 Name: a.config.Consul.ServerRPCCheckName, 615 Type: "tcp", 616 Interval: serverRpcCheckInterval, 617 Timeout: serverRpcCheckTimeout, 618 PortLabel: rpcCheckAddr, 619 }, 620 }, 621 } 622 serfServ := &structs.Service{ 623 Name: a.config.Consul.ServerServiceName, 624 PortLabel: a.config.AdvertiseAddrs.Serf, 625 Tags: append([]string{consul.ServiceTagSerf}, a.config.Consul.Tags...), 626 Checks: []*structs.ServiceCheck{ 627 { 628 Name: a.config.Consul.ServerSerfCheckName, 629 Type: "tcp", 630 Interval: serverSerfCheckInterval, 631 Timeout: serverSerfCheckTimeout, 632 PortLabel: serfCheckAddr, 633 }, 634 }, 635 } 636 637 // Add the http port check if TLS isn't enabled 638 consulServices := []*structs.Service{ 639 rpcServ, 640 serfServ, 641 httpServ, 642 } 643 if err := a.consulService.RegisterAgent(consulRoleServer, consulServices); err != nil { 644 return err 645 } 646 } 647 648 return nil 649 } 650 651 // setupNodeID will pull the persisted node ID, if any, or create a random one 652 // and persist it. 653 func (a *Agent) setupNodeID(config *nomad.Config) error { 654 // For dev mode we have no filesystem access so just make a node ID. 655 if a.config.DevMode { 656 config.NodeID = uuid.Generate() 657 return nil 658 } 659 660 // Load saved state, if any. Since a user could edit this, we also 661 // validate it. Saved state overwrites any configured node id 662 fileID := filepath.Join(config.DataDir, "node-id") 663 if _, err := os.Stat(fileID); err == nil { 664 rawID, err := ioutil.ReadFile(fileID) 665 if err != nil { 666 return err 667 } 668 669 nodeID := strings.TrimSpace(string(rawID)) 670 nodeID = strings.ToLower(nodeID) 671 if _, err := uuidparse.ParseUUID(nodeID); err != nil { 672 return err 673 } 674 config.NodeID = nodeID 675 return nil 676 } 677 678 // If they've configured a node ID manually then just use that, as 679 // long as it's valid. 680 if config.NodeID != "" { 681 config.NodeID = strings.ToLower(config.NodeID) 682 if _, err := uuidparse.ParseUUID(config.NodeID); err != nil { 683 return err 684 } 685 // Persist this configured nodeID to our data directory 686 if err := lib.EnsurePath(fileID, false); err != nil { 687 return err 688 } 689 if err := ioutil.WriteFile(fileID, []byte(config.NodeID), 0600); err != nil { 690 return err 691 } 692 return nil 693 } 694 695 // If we still don't have a valid node ID, make one. 696 if config.NodeID == "" { 697 id := uuid.Generate() 698 if err := lib.EnsurePath(fileID, false); err != nil { 699 return err 700 } 701 if err := ioutil.WriteFile(fileID, []byte(id), 0600); err != nil { 702 return err 703 } 704 705 config.NodeID = id 706 } 707 return nil 708 } 709 710 // setupKeyrings is used to initialize and load keyrings during agent startup 711 func (a *Agent) setupKeyrings(config *nomad.Config) error { 712 file := filepath.Join(a.config.DataDir, serfKeyring) 713 714 if a.config.Server.EncryptKey == "" { 715 goto LOAD 716 } 717 if _, err := os.Stat(file); err != nil { 718 if err := initKeyring(file, a.config.Server.EncryptKey); err != nil { 719 return err 720 } 721 } 722 723 LOAD: 724 if _, err := os.Stat(file); err == nil { 725 config.SerfConfig.KeyringFile = file 726 } 727 if err := loadKeyringFile(config.SerfConfig); err != nil { 728 return err 729 } 730 // Success! 731 return nil 732 } 733 734 // setupClient is used to setup the client if enabled 735 func (a *Agent) setupClient() error { 736 if !a.config.Client.Enabled { 737 return nil 738 } 739 740 // Setup the configuration 741 conf, err := a.clientConfig() 742 if err != nil { 743 return fmt.Errorf("client setup failed: %v", err) 744 } 745 746 // Reserve some ports for the plugins if we are on Windows 747 if runtime.GOOS == "windows" { 748 if err := a.reservePortsForClient(conf); err != nil { 749 return err 750 } 751 } 752 if conf.StateDBFactory == nil { 753 conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) 754 } 755 756 client, err := client.NewClient(conf, a.consulCatalog, a.consulService) 757 if err != nil { 758 return fmt.Errorf("client setup failed: %v", err) 759 } 760 a.client = client 761 762 // Create the Nomad Client services for Consul 763 if *a.config.Consul.AutoAdvertise { 764 httpServ := &structs.Service{ 765 Name: a.config.Consul.ClientServiceName, 766 PortLabel: a.config.AdvertiseAddrs.HTTP, 767 Tags: append([]string{consul.ServiceTagHTTP}, a.config.Consul.Tags...), 768 } 769 const isServer = false 770 if check := a.agentHTTPCheck(isServer); check != nil { 771 httpServ.Checks = []*structs.ServiceCheck{check} 772 } 773 if err := a.consulService.RegisterAgent(consulRoleClient, []*structs.Service{httpServ}); err != nil { 774 return err 775 } 776 } 777 778 return nil 779 } 780 781 // agentHTTPCheck returns a health check for the agent's HTTP API if possible. 782 // If no HTTP health check can be supported nil is returned. 783 func (a *Agent) agentHTTPCheck(server bool) *structs.ServiceCheck { 784 // Resolve the http check address 785 httpCheckAddr := a.config.normalizedAddrs.HTTP 786 if *a.config.Consul.ChecksUseAdvertise { 787 httpCheckAddr = a.config.AdvertiseAddrs.HTTP 788 } 789 check := structs.ServiceCheck{ 790 Name: a.config.Consul.ClientHTTPCheckName, 791 Type: "http", 792 Path: "/v1/agent/health?type=client", 793 Protocol: "http", 794 Interval: agentHttpCheckInterval, 795 Timeout: agentHttpCheckTimeout, 796 PortLabel: httpCheckAddr, 797 } 798 // Switch to endpoint that doesn't require a leader for servers 799 if server { 800 check.Name = a.config.Consul.ServerHTTPCheckName 801 check.Path = "/v1/agent/health?type=server" 802 } 803 if !a.config.TLSConfig.EnableHTTP { 804 // No HTTPS, return a plain http check 805 return &check 806 } 807 if a.config.TLSConfig.VerifyHTTPSClient { 808 a.logger.Warn("not registering Nomad HTTPS Health Check because verify_https_client enabled") 809 return nil 810 } 811 812 // HTTPS enabled; skip verification 813 check.Protocol = "https" 814 check.TLSSkipVerify = true 815 return &check 816 } 817 818 // reservePortsForClient reserves a range of ports for the client to use when 819 // it creates various plugins for log collection, executors, drivers, etc 820 func (a *Agent) reservePortsForClient(conf *clientconfig.Config) error { 821 if conf.Node.ReservedResources == nil { 822 conf.Node.ReservedResources = &structs.NodeReservedResources{} 823 } 824 825 res := conf.Node.ReservedResources.Networks.ReservedHostPorts 826 if res == "" { 827 res = fmt.Sprintf("%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 828 } else { 829 res += fmt.Sprintf(",%d-%d", conf.ClientMinPort, conf.ClientMaxPort) 830 } 831 conf.Node.ReservedResources.Networks.ReservedHostPorts = res 832 return nil 833 } 834 835 // Leave is used gracefully exit. Clients will inform servers 836 // of their departure so that allocations can be rescheduled. 837 func (a *Agent) Leave() error { 838 if a.client != nil { 839 if err := a.client.Leave(); err != nil { 840 a.logger.Error("client leave failed", "error", err) 841 } 842 } 843 if a.server != nil { 844 if err := a.server.Leave(); err != nil { 845 a.logger.Error("server leave failed", "error", err) 846 } 847 } 848 return nil 849 } 850 851 // Shutdown is used to terminate the agent. 852 func (a *Agent) Shutdown() error { 853 a.shutdownLock.Lock() 854 defer a.shutdownLock.Unlock() 855 856 if a.shutdown { 857 return nil 858 } 859 860 a.logger.Info("requesting shutdown") 861 if a.client != nil { 862 if err := a.client.Shutdown(); err != nil { 863 a.logger.Error("client shutdown failed", "error", err) 864 } 865 } 866 if a.server != nil { 867 if err := a.server.Shutdown(); err != nil { 868 a.logger.Error("server shutdown failed", "error", err) 869 } 870 } 871 872 if err := a.consulService.Shutdown(); err != nil { 873 a.logger.Error("shutting down Consul client failed", "error", err) 874 } 875 876 a.logger.Info("shutdown complete") 877 a.shutdown = true 878 close(a.shutdownCh) 879 return nil 880 } 881 882 // RPC is used to make an RPC call to the Nomad servers 883 func (a *Agent) RPC(method string, args interface{}, reply interface{}) error { 884 if a.server != nil { 885 return a.server.RPC(method, args, reply) 886 } 887 return a.client.RPC(method, args, reply) 888 } 889 890 // Client returns the configured client or nil 891 func (a *Agent) Client() *client.Client { 892 return a.client 893 } 894 895 // Server returns the configured server or nil 896 func (a *Agent) Server() *nomad.Server { 897 return a.server 898 } 899 900 // Stats is used to return statistics for debugging and insight 901 // for various sub-systems 902 func (a *Agent) Stats() map[string]map[string]string { 903 stats := make(map[string]map[string]string) 904 if a.server != nil { 905 subStat := a.server.Stats() 906 for k, v := range subStat { 907 stats[k] = v 908 } 909 } 910 if a.client != nil { 911 subStat := a.client.Stats() 912 for k, v := range subStat { 913 stats[k] = v 914 } 915 } 916 return stats 917 } 918 919 // ShouldReload determines if we should reload the configuration and agent 920 // connections. If the TLS Configuration has not changed, we shouldn't reload. 921 func (a *Agent) ShouldReload(newConfig *Config) (agent, http bool) { 922 a.configLock.Lock() 923 defer a.configLock.Unlock() 924 925 if newConfig.LogLevel != "" && newConfig.LogLevel != a.config.LogLevel { 926 agent = true 927 } 928 929 isEqual, err := a.config.TLSConfig.CertificateInfoIsEqual(newConfig.TLSConfig) 930 if err != nil { 931 a.logger.Error("parsing TLS certificate", "error", err) 932 return agent, false 933 } else if !isEqual { 934 return true, true 935 } 936 937 // Allow the ability to only reload HTTP connections 938 if a.config.TLSConfig.EnableHTTP != newConfig.TLSConfig.EnableHTTP { 939 http = true 940 agent = true 941 } 942 943 // Allow the ability to only reload HTTP connections 944 if a.config.TLSConfig.EnableRPC != newConfig.TLSConfig.EnableRPC { 945 agent = true 946 } 947 948 return agent, http 949 } 950 951 // Reload handles configuration changes for the agent. Provides a method that 952 // is easier to unit test, as this action is invoked via SIGHUP. 953 func (a *Agent) Reload(newConfig *Config) error { 954 a.configLock.Lock() 955 defer a.configLock.Unlock() 956 957 updatedLogging := newConfig != nil && (newConfig.LogLevel != a.config.LogLevel) 958 959 if newConfig == nil || newConfig.TLSConfig == nil && !updatedLogging { 960 return fmt.Errorf("cannot reload agent with nil configuration") 961 } 962 963 if updatedLogging { 964 a.config.LogLevel = newConfig.LogLevel 965 a.logger.SetLevel(log.LevelFromString(newConfig.LogLevel)) 966 } 967 968 fullUpdateTLSConfig := func() { 969 // Completely reload the agent's TLS configuration (moving from non-TLS to 970 // TLS, or vice versa) 971 // This does not handle errors in loading the new TLS configuration 972 a.config.TLSConfig = newConfig.TLSConfig.Copy() 973 } 974 975 if !a.config.TLSConfig.IsEmpty() && !newConfig.TLSConfig.IsEmpty() { 976 // This is just a TLS configuration reload, we don't need to refresh 977 // existing network connections 978 979 // Reload the certificates on the keyloader and on success store the 980 // updated TLS config. It is important to reuse the same keyloader 981 // as this allows us to dynamically reload configurations not only 982 // on the Agent but on the Server and Client too (they are 983 // referencing the same keyloader). 984 keyloader := a.config.TLSConfig.GetKeyLoader() 985 _, err := keyloader.LoadKeyPair(newConfig.TLSConfig.CertFile, newConfig.TLSConfig.KeyFile) 986 if err != nil { 987 return err 988 } 989 a.config.TLSConfig = newConfig.TLSConfig 990 a.config.TLSConfig.KeyLoader = keyloader 991 return nil 992 } else if newConfig.TLSConfig.IsEmpty() && !a.config.TLSConfig.IsEmpty() { 993 a.logger.Warn("downgrading agent's existing TLS configuration to plaintext") 994 fullUpdateTLSConfig() 995 } else if !newConfig.TLSConfig.IsEmpty() && a.config.TLSConfig.IsEmpty() { 996 a.logger.Info("upgrading from plaintext configuration to TLS") 997 fullUpdateTLSConfig() 998 } 999 1000 return nil 1001 } 1002 1003 // GetConfig creates a locked reference to the agent's config 1004 func (a *Agent) GetConfig() *Config { 1005 a.configLock.Lock() 1006 defer a.configLock.Unlock() 1007 1008 return a.config 1009 } 1010 1011 // setupConsul creates the Consul client and starts its main Run loop. 1012 func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { 1013 apiConf, err := consulConfig.ApiConfig() 1014 if err != nil { 1015 return err 1016 } 1017 client, err := api.NewClient(apiConf) 1018 if err != nil { 1019 return err 1020 } 1021 1022 // Determine version for TLSSkipVerify 1023 1024 // Create Consul Catalog client for service discovery. 1025 a.consulCatalog = client.Catalog() 1026 1027 // Create Consul Service client for service advertisement and checks. 1028 isClient := false 1029 if a.config.Client != nil && a.config.Client.Enabled { 1030 isClient = true 1031 } 1032 a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) 1033 1034 // Run the Consul service client's sync'ing main loop 1035 go a.consulService.Run() 1036 return nil 1037 }